Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/translate-toolkit-1.5.1/translate/storage/txt.py
blob: 15ad7e1b2b6b88644596ea2a8d1553ae33e3c0e7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 
# Copyright 2007 Zuza Software Foundation
# 
# This file is part of translate.
#
# translate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# translate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with translate; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""This class implements the functionality for handling plain text files, or 
similar wiki type files.

Supported formats are 
  - Plain text
  - dokuwiki
  - MediaWiki
""" 

from translate.storage import base
import re

dokuwiki = []
dokuwiki.append(("Dokuwiki heading", re.compile(r"( ?={2,6}[\s]*)(.+)"), re.compile("([\s]*={2,6}[\s]*)$")))
dokuwiki.append(("Dokuwiki bullet", re.compile(r"([\s]{2,}\*[\s]*)(.+)"), re.compile("[\s]+$")))
dokuwiki.append(("Dokuwiki numbered item", re.compile(r"([\s]{2,}-[\s]*)(.+)"), re.compile("[\s]+$")))

mediawiki = []
mediawiki.append(("MediaWiki heading", re.compile(r"(={2,5}[\s]*)(.+)"), re.compile("([\s]*={2,5}[\s]*)$")))
mediawiki.append(("MediaWiki bullet", re.compile(r"(\*+[\s]*)(.+)"), re.compile("[\s]+$")))
mediawiki.append(("MediaWiki numbered item", re.compile(r"(#+[\s]*)(.+)"), re.compile("[\s]+$")))

flavours = {
"dokuwiki": dokuwiki,
"mediawiki": mediawiki,
None: [],
"plain": []
}

class TxtUnit(base.TranslationUnit):
    """This class represents a block of text from a text file"""
    def __init__(self, source="", encoding="utf-8"):
        """Construct the txtunit"""
        self.encoding = encoding
        super(TxtUnit, self).__init__(source)
        self.source = source
        self.pretext = ""
        self.posttext = ""
        self.location = []

    def __str__(self):
        """Convert a txt unit to a string"""
        string = u"".join([self.pretext, self.source, self.posttext])
        if isinstance(string, unicode):
            return string.encode(self.encoding)
        return string

    # Note that source and target are equivalent for monolingual units
    def setsource(self, source):
        """Sets the definition to the quoted value of source"""
        if isinstance(source, str):
            source = source.decode(self.encoding)
        self._source = source

    def getsource(self):
        """gets the unquoted source string"""
        return self._source
    source = property(getsource, setsource)

    def settarget(self, target):
        """Sets the definition to the quoted value of target"""
        self.source = target

    def gettarget(self):
        """gets the unquoted target string"""
        return self.source
    target = property(gettarget, settarget)

    def addlocation(self, location):
        self.location.append(location)

    def getlocations(self):
        return self.location

class TxtFile(base.TranslationStore):
    """This class represents a text file, made up of txtunits"""
    UnitClass = TxtUnit
    def __init__(self, inputfile=None, flavour=None, encoding="utf-8"):
        base.TranslationStore.__init__(self, unitclass=self.UnitClass)
        self.filename = getattr(inputfile, 'name', '')
        self.flavour = flavours.get(flavour, [])
        if inputfile is not None:
            txtsrc = inputfile.readlines()
            self.parse(txtsrc)
        self.encoding = encoding

    def parse(self, lines):
        """Read in text lines and create txtunits from the blocks of text"""
        block = []
        startline = 0
        pretext = ""
        posttext = ""
        if not isinstance(lines, list):
            lines = lines.split("\n")
        for linenum in range(len(lines)):
            line = lines[linenum].rstrip("\n").rstrip("\r")
            for rule, prere, postre in self.flavour:
                match = prere.match(line)
                if match:
                    pretext, source = match.groups()
                    postmatch = postre.search(source)
                    if postmatch:
                        posttext = postmatch.group()
                        source = source[:postmatch.start()]
                    block.append(source)
                    isbreak = True
                    break
            else:
                isbreak = not line.strip()
            if isbreak and block:
                unit = self.addsourceunit("\n".join(block))
                unit.addlocation("%s:%d" % (self.filename, startline + 1))
                unit.pretext = pretext
                unit.posttext = posttext
                pretext = ""
                posttext = ""
                block = []
            elif not isbreak:
                if not block:
                    startline = linenum
                block.append(line)
        if block:
            unit = self.addsourceunit("\n".join(block))
            unit.addlocation("%s:%d" % (self.filename, startline + 1))

    def __str__(self):
        source = self.getoutput()
        if isinstance(source, unicode):
            return source.encode(getattr(self, "encoding", "UTF-8"))
        return source

    def getoutput(self):
        """Convert the units back to blocks"""
        blocks = [str(unit) for unit in self.units]
        string = "\n\n".join(blocks)
        return string