Processing/IO_Manager.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552

# Copyright (C) IBM Corporation 2008

import gtk
from BeautifulSoup import Tag
from NewtifulSoup import NewtifulStoneSoup as BeautifulStoneSoup
import os, platform, urllib
from Article_Builder import Article_Builder
from Processing.Article.Article import Article
from MediaWiki_Helper import MediaWiki_Helper, PageNotFoundError
from MediaWiki_Parser import MediaWiki_Parser
import shutil
import re

class theme_not_found_error(Exception):
    def __init__(self, value):
        self.parameter = value
    def __str__(self):
        return repr(self.parameter)

class page_not_found_error(Exception):
    def __init__(self, value):
        self.parameter = value
    def __str__(self):
        return repr(self.parameter)
    
class theme_exists_error(Exception):
    def __init__(self, value):
        self.parameter = value
    def __str__(self):
        return repr(self.parameter)
    
    
"""
This class sits between the GUI and the back end (handling 
mediawiki communication and raw pages/theme/article modifications)
"""    
class IO_Manager:
    
    def __init__(self):
        running_on = platform.system()
        if running_on == "Windows":
            # On Windows, used to save to a data subfolder of the folder that contains this code
            #self.workingDir = os.path.join(__file__.rsplit("\\", 1)[0], "Data")
            # but better to write to user home with os.getenv("USERPROFILE") or os.path.expanduser("~")
            self.workingDir = os.path.join(os.getenv("USERPROFILE"), ".slicedata")
        elif running_on == "Linux":
            if "olpc" in platform.platform().lower():
                from sugar.activity import activity
                # On Sugar, save to the data subfolder of the app directory
                self.workingDir =  os.path.join(activity.get_activity_root(), "data")
                print "Activity root is: %s" % str(activity.get_activity_root())
                print "Data folder is: %s" % self.workingDir
            else:
                # On Linux, save to a .slicedata subdir of the user's homedir
                self.workingDir = os.path.join(os.getenv("HOME"), ".slicedata")
        self.proxies = {}
        proxy_file = os.path.join(os.path.split(os.path.split(__file__)[0])[0], 'proxy.cfg')
        if os.access(proxy_file, os.F_OK):
            proxy_file_handle = open(proxy_file, "r")
            for line in proxy_file_handle.readlines():
                parts = line.split(':', 1)
                #print "setting " + parts[0] + " proxy to " + parts[1]
                self.proxies[parts[0].strip()] = parts[1].strip()
            proxy_file_handle.close()
        if self.proxies == {}:
            self.proxies = None
    
    def clean_title(self, title):
        """
            removes non-alphanumeric chars from titles and lowercases it
        """
        print "Cleaning: " + title
        output = re.sub(re.compile('\W'), "_", title).lower()
        print "Output: " + output
        return output
    
    def install_library(self):
        if platform.system() == "Linux" and "olpc" in platform.platform().lower():
            file_list = [('Lion (from en.wikipedia.org)', os.path.join(os.path.split(__file__)[0], "demolibrary", "lion-wikipedia.dita"), 'Wikipedia Articles'), ('Tiger (from en.wikipedia.org)', os.path.join(os.path.split(__file__)[0], "demolibrary", "tiger-wikipedia.dita"), 'Wikipedia Articles'), ('Giraffe (from en.wikipedia.org)', os.path.join(os.path.split(__file__)[0], "demolibrary", "giraffe-wikipedia.dita"), 'Wikipedia Articles'), ('Giraffe', os.path.join(os.path.split(__file__)[0], "demolibrary", "giraffe-blank.dita"), 'My Articles'), ('Zebra (from en.wikipedia.org)', os.path.join(os.path.split(__file__)[0], "demolibrary", "zebra-wikipedia.dita"), 'Wikipedia Articles')]
            for file in file_list:
                if file[2] not in self.get_themes():
                    print "install library: creating theme %s" % file[2]
                    self.add_theme_to_library(file[2])
                print "install library: opening %s" % file[1]
                open_file = open(file[1], "r")
                contents = open_file.read()
                open_file.close()
                if contents:
                    print "install library: content read sucessfully"
                print "install library: saving page %s" % file[0]
                self.save_page(file[0], contents, file[2], get_images=True)
                print "install library: save successful"
                                 
    def __add_page_to_library(self, title, path, theme="My Articles"):
        """
            Adds a page to the library. If a theme is specified it is added to that theme, otherwise it is put into the 'No Assigned Theme' theme.
            
            @param title: The title of the article to add to library.
            @param path: The path of the article to add to library.
            @param theme: Which theme to store the article in. (Optional, defaults to No Assigned Theme).
        """
        try:
            #change to relative path
            path = path.replace(os.path.join(self.workingDir, ""), "", 1)
            map = self.load_map(theme)
            existing_entry = map.find("topicref", attrs={"navtitle" : title})
            if existing_entry != None:
                existing_entry.extract()
            map.map.append(Tag(map, "topicref", [("href", path), ("navtitle", title)]))
            self.save_map(theme, map)
        except Exception:
            self.add_theme_to_library(theme)
            self.__add_page_to_library(title, path, theme)
        
    def add_theme_to_library(self, theme):
        """
            Adds themes to the library.
            @param theme: Theme to add.
            @raise theme_exists_error: If trying to add theme that already exists.
        """
        try:
            map = self.load_map("Library")
            # Ensure theme does not exist
            if map.find(attrs={"navtitle" : theme}) == None:
                # create a new entry in the library for the theme
                 map.map.append(Tag(map, "topicref", [("format", "ditamap"), ("href", "%s.ditamap" % self.clean_title(theme)), ("navtitle", theme)]))
                # save the theme file
                 self.__create_map(theme)
            else:
                raise theme_exists_error("Theme already exists")
            self.save_map("Library", map)
        except theme_not_found_error:
            # this error is caused by failing to open the library, so create the library and try again
            self.__create_map("Library")
            self.add_theme_to_library(theme)
                
    def __create_map(self, map_name):
        """
            Creates a new map for the specified theme name.
            @param map_name: name of map theme.
        """
        self.save_map(map_name, BeautifulStoneSoup(\
                '<?xml version="1.0" encoding="utf-8"?>\
                <!DOCTYPE map PUBLIC "-//IBM//DTD DITA IBM Map//EN" "ibm-map.dtd">\
                <map title="%s">\
                </map>' % map_name))
    
    def download_wiki_article(self, title, theme, wiki=None, statuslabel = None):
        """
            manages downloading and saving of wiki articles.
            @param title: Title of article to get
            @param theme: Theme to save to
            @param wiki: (optional) wiki to search - see MediaWiki helper for default behaviour
            @param statuslabel: gtk status label to write to

        """
        if statuslabel != None:
            statuslabel.set_label("%s download in progress..." % (title))
        if wiki == None:
            #article, url = MediaWiki_Helper().getArticleAsHTMLByTitle(title)
            wiki = "en.wikipedia.org"
        
        article, url = MediaWiki_Helper().getArticleAsHTMLByTitle(title, wiki)
        if statuslabel != None:
            statuslabel.set_label("Processing %s..." % (title))
        
        parser = MediaWiki_Parser(article, title, url)
        contents = parser.parse()
        #TODO: change line below when taking from other sources:
        self.save_page(title + " (from %s)" % wiki, contents, theme, False, True, statuslabel)
        if statuslabel != None:
                statuslabel.set_label("Done.")
#        unique=2
#        new_title = title.lower()
#        contents = self.image_handler(parser.parse(), title)
#        if not os.path.exists(os.path.join(self.workingDir, title.lower())):
#                os.makedirs(os.path.join(self.workingDir, title.lower()), 0777)
#        while os.access(os.path.join(self.workingDir, title.lower(), "%s.dita" % new_title), os.F_OK):
#            new_title = title.lower() + str(unique)
#            unique += 1
#        contents = contents.replace('<prolog>', '<prolog>\n<resourceid id="%d" />' % self.get_unique_article_ID(), 1)
#        file = open(os.path.join(self.workingDir, title.lower(), "%s.dita" % new_title), "w")
#        file.write(contents)
#        file.close()
    
    def get_pages_in_theme(self, theme):
        """
            Returns a list of all pages in the specified theme.
            @param theme: Theme to query
            @return: List of dictionaries containing page 'path' and 'title'.
        """
        try:
            map = self.load_map(theme)
        except Exception:
            return []
        output = []
        for page in map.map.findAll("topicref"):
            output.append(page['navtitle'])
        output.sort()
        return output
            
    def get_themes(self):
        """
            Returns a list of all themes stored in the library.
            @return: List of theme names.
        """
        try:
            map = self.load_map("Library")
            output = []
            for theme in map.findAll("topicref"):
                output.append(theme["navtitle"])
            output.sort()
            return output
        except Exception:
            return []
    
    def get_unique_article_ID(self):
        """
            Creates and maintains a file to record the last unique article ID issued.
            when a new ID is requested, returns last id + 1 and upates file.
            @returns: Unique numeric ID
        """
        if not os.access(os.path.join(self.workingDir, "idfile"), os.F_OK):
            # if no ID file, take a guess at where to start numbering.
            id = 1
            # Worst case scenario is that every file is an article, so count all files
            for item in os.walk(self.workingDir):
                id += len(item[2])
            # Multiply by 1000 to prevent any problems caused by deleting files
            id = id * 1000
            print "ID FILE NOT FOUND, setting ID to " + str(id)
            id_file = open(os.path.join(self.workingDir, "idfile"), "w")
            id_file.write(str(id))
            id_file.close()
            return id
        else:
            id_file = open(os.path.join(self.workingDir, "idfile"), "r")
            id = long(id_file.read())
            id_file.close()
            id += 1
            id_file = open(os.path.join(self.workingDir, "idfile"), "w")
            id_file.write(str(id))
            id_file.close()
            return id          
        
    def image_handler(self, document, title, statuslabel=None):
        """
            Takes a DITA article and downloads images referenced in it (finding all <image> tags).
            Attemps to fix incomplete paths using source url.
            @param document: DITA to work on
            @param title: Title of article 
            @return: The document with image tags adjusted to point to local paths
        """
        document = BeautifulStoneSoup(document)
        dir_path =  os.path.join(self.workingDir, self.clean_title(title), "images")
        print dir_path
        if not os.path.exists(dir_path):
            os.makedirs(dir_path, 0777)
        if statuslabel != None:
            i = title.find(" (from ")
            temptitle = title[0:i]
            statuslabel.set_label("Downloading %s images..." % (temptitle, ))
        for image in document.findAll("image"):
            fail = False
            path = image['href']
            if "#DEMOLIBRARY#" in path:
                path = path.replace("#DEMOLIBRARY#", os.path.join(os.path.split(__file__)[0], "demolibrary"))
                image_title = os.path.split(path)[1]
                shutil.copyfile(path, os.path.join(dir_path, image_title))
            else:
                image_title = path.rsplit("/", 1)[-1]
                # attempt to fix incomplete paths
                if (not path.startswith("http://")) and document.source != None and document.source.has_key("href"):
                    if path.startswith("/"):
                        path = document.source['href'].rsplit("/", 1)[0] + path
                    else:
                        path = document.source['href'].rsplit("/", 1)[0] + "/" + path
                print "Retrieving image: " + path
                file = open(os.path.join(dir_path, image_title), 'wb')
                image_contents = self.__open_URL(path)
                if image_contents == None:
                    fail = True
                else:
                    file.write(image_contents)
                file.close()
            #change to relative paths:
            if not fail:
                image['href'] = os.path.join(dir_path.replace(os.path.join(self.workingDir, ""), "", 1), image_title)
            else:
                image.extract()
        return document.prettify()
              
    def load_map(self, map_name):
        """
            Loads the specified theme map.
            @param map_name: Name of theme map to load
            @return: map contents as a Soup
            @raise theme_not_found_error: If theme map not found.
        """
        if not os.access(os.path.join(self.workingDir, "%s.ditamap" % self.clean_title(map_name)), os.F_OK):
            raise theme_not_found_error("Theme '" + map_name + "' not found")
        file = open(os.path.join(self.workingDir, "%s.ditamap" % self.clean_title(map_name)), "r")
        map = BeautifulStoneSoup(file.read())
        file.close()
        return map
    
    def load_raw_page(self, title, theme):
        """
            Returns contents of specified page.
            
            @param title: Title of page to open.
            @param theme: Theme of page to open.
            @return: Contents of page.
        """
        theme_map = self.load_map(theme)
        print title + theme
        page_location = theme_map.find("topicref", attrs={ "navtitle" : title })
        if page_location != None:
            page_location = page_location['href']
        else:
            raise page_not_found_error("No match for " + title + " in " + theme)
        
        #if not os.access(page_location, os.F_OK):
        if os.access(os.path.join(self.workingDir, page_location), os.F_OK):
            page_location = os.path.join(self.workingDir, page_location)
        else:
            raise page_not_found_error("Page not found at " + page_location)
        page = open(page_location, "r")
        output = page.read()
        page.close()
        return output
        
    def load_page(self, title, theme):    
        return Article_Builder().get_article_from_dita(self.load_raw_page(title, theme))    
        
    def copy_page(self, title, fromtheme, totheme):
        """
            Copys a page from one theme to another. If no title specified, all pages in theme are moved.
            @param page_title: Title of page to move. (Optional, defaults to None)
            @param from_theme: Source theme.
            @param to_theme: Destination theme.  
        """
        print "COPY PAGE %s FROM %s TO %s" % (title, fromtheme, totheme)
        article = self.load_raw_page(title, fromtheme)
        self.save_page(title, article, totheme, overwrite=False)
        
    def load_article(self, title, theme):
        """
            loads the specified article
        """
        article_data = self.load_page(title, theme)
        article = Article(article_data)
        article.article_title = title
        article.article_theme = theme
        return article
    
    def move_page(self, from_theme, to_theme, page_title = None):
        """
            Moves a page from one theme to another. If no title specified, all pages in theme are moved.
            @param page_title: Title of page to move. (Optional, defaults to None)
            @param from_theme: Source theme.
            @param to_theme: Destination theme.  
        """
        try:
            from_map = self.load_map(from_theme)
            to_map = self.load_map(to_theme)
            if page_title == None:
                pages = from_map.findAll("topicref")
            else:
                pages = [from_map.find("topicref", attrs={"navtitle" : page_title})]
            if pages == [None] or pages == []:
                raise exception("not found")
            for page in pages:
                to_map.map.append(page)
            self.save_map(to_theme, to_map)
            for page in pages:
                page.extract()
            self.save_map(from_theme, from_map)
        except Exception:
            # Shouldn't ever happen
            pass
    
    def __open_URL(self, url):
        """
            retrieves content from specified url
        """
        urllib._urlopener = self.New_URL_Opener()
        try:
            print "opening " + url
            print "proxies: " + str(self.proxies)
            doc = urllib.urlopen(url, proxies=self.proxies)
            output = doc.read()
            doc.close()
            print "url opened succesfully"
            return output
        except IOError, e:
            print e
    
    def page_exists(self, title, theme):
        """
            boolean check if an article exists
        """
        try:
            map = self.load_map(theme)
            if map.find("topicref", attrs={"navtitle" : title}) != None:
                return True
            else:
                return False
        except Exception:
            return False
        
    def theme_exists(self, theme):
        """
            boolean check if a theme exists
        """
        themes = self.get_themes()
        if theme in themes:
            return True
        else:
            return False
    
    def remove_page(self, page, theme):
        """
            Removes specified page from the specified theme.
            @param page: Page to remove
            @param theme: Containing theme
        """
        if theme == "Downloaded Articles":
            return
        theme_map = self.load_map(theme)
        entry = theme_map.find("topicref", attrs={"navtitle" : page})
        try:
            os.remove(entry['href'])
        except Exception:
            pass
        entry.extract()
        self.save_map(theme, theme_map)
    
    def remove_theme(self, theme):
        """
            Removes specified theme, moving all articles in it to the 'No Assigned Theme' theme.
            @param theme: Theme to remove
        """    
        try:
            #Just remove map from library at the moment
            #self.move_pages(theme, "No Assigned Theme")
            library = self.load_map("Library")
            entry = library.find("topicref", attrs={"navtitle" : theme})
            if entry != None:
                os.remove(os.path.join(self.workingDir, entry['href']))
                entry.extract()
                self.save_map("Library", library)
        except Exception:
            # Trying to remove a theme that doesn't exist, so pretend it worked.
            pass
    
    def rename_page(self, theme, old_title, new_title):
        """
            renames specified page in specified theme
        """
        try:
            map = self.load_map(theme)
            page = map.find("topicref", attrs={"navtitle" : old_title})
            if page != None:
                page['navtitle'] = new_title
                self.save_map(theme, map)
        except Exception:
            pass
    
    def rename_theme(self, old_name, new_name):
        """
            renames specified theme
        """
        library = self.load_map("Library")
        entry = library.find("topicref", attrs={"navtitle" : old_name})
        if entry != None and library.find("topicref", attrs={"navtitle" : new_name}) == None:
            self.add_theme_to_library(new_name)
            theme = self.load_map(entry['navtitle'])
            theme.map['name'] = new_name
            self.save_map(new_name, theme)
            self.remove_theme(old_name)
        
    def save_article(self, article, overwrite = True):
        """
            wrapper method for save_page to allow saving article objects
        """
        title = article.article_title
        theme = article.article_theme
        if title != None and theme != None:
            contents = Article_Builder().get_dita_from_article(article)
            self.save_page(title, contents, theme, overwrite)
        else:
            raise theme_not_found_error("Theme or title not specified")
        
    def save_map(self, map_name, map_data):
        """
            Saves the specified map.
            @param map_name: Name of map
            @param map_data: Contents of map
        """
        if not os.path.exists(self.workingDir):
                os.makedirs(self.workingDir, 0777)
        map = open(os.path.join(self.workingDir, "%s.ditamap" % self.clean_title(map_name)), "w")
        map.write(map_data.prettify())
        map.close()
        
    def save_page(self, title, contents, theme="Downloaded Articles", overwrite=True, get_images=False, statuslabel=None):
        """
            Saves the specified page contents as specified title (in optional specified theme).
            @param title: Title to save as.
            @param contents: Contents to save.
            @param theme: Theme to save in.
            @param overwrite: Boolean to specify overwrite if file already exists.
        """
        unique=2
        new_title = self.clean_title(title) + "-" + self.clean_title(theme)
        if get_images:
            contents = self.image_handler(contents, title, statuslabel)
        directory = os.path.join(self.workingDir, self.clean_title(title))
        if not os.path.exists(directory):
                os.makedirs(directory, 0777)
        if overwrite == False:
            while os.access(os.path.join(directory, "%s.dita" % new_title), os.F_OK):
                new_title = self.clean_title(title) + str(unique)
                unique += 1
        contents = contents.replace('<prolog>', '<prolog>\n<resourceid id="%d" />' % self.get_unique_article_ID(), 1)
        file = open(os.path.join(directory, "%s.dita" % new_title), "w")
        file.write(contents)
        file.close()
        self.__add_page_to_library(title, os.path.join(directory, "%s.dita" % new_title), theme)
        print "Page saved to - " + os.path.join(directory, "%s.dita" % new_title)
        return os.path.join(directory, "%s.dita" % new_title)
    
    def validate_image_list(self, image_list):
        """
            provides a mechanism for validating image lists and expanding relative paths
            @param image_list: list of images to validate
            @return: list of images with corrected paths, and broken images removed
        """
        for i in xrange(len(image_list)):
            if not os.access(image_list[i][0], os.F_OK):
                if os.access(os.path.join(self.workingDir, image_list[i][0]), os.F_OK):
                    image_list[i] = (os.path.join(self.workingDir, image_list[i][0]), image_list[i][1])
                else:
                    image = None
        #removing during for loop was unreliable
        while None in image_list:
            image_list.remove(None)
        return image_list

    class New_URL_Opener(urllib.FancyURLopener):
        version = "Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11"