Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/Processing/IO_Manager.py
diff options
context:
space:
mode:
Diffstat (limited to 'Processing/IO_Manager.py')
-rw-r--r--Processing/IO_Manager.py552
1 files changed, 552 insertions, 0 deletions
diff --git a/Processing/IO_Manager.py b/Processing/IO_Manager.py
new file mode 100644
index 0000000..7cffced
--- /dev/null
+++ b/Processing/IO_Manager.py
@@ -0,0 +1,552 @@
+# Copyright (C) IBM Corporation 2008
+
+import gtk
+from BeautifulSoup import Tag
+from NewtifulSoup import NewtifulStoneSoup as BeautifulStoneSoup
+import os, platform, urllib
+from Article_Builder import Article_Builder
+from Processing.Article.Article import Article
+from MediaWiki_Helper import MediaWiki_Helper, PageNotFoundError
+from MediaWiki_Parser import MediaWiki_Parser
+import shutil
+import re
+
+class theme_not_found_error(Exception):
+ def __init__(self, value):
+ self.parameter = value
+ def __str__(self):
+ return repr(self.parameter)
+
+class page_not_found_error(Exception):
+ def __init__(self, value):
+ self.parameter = value
+ def __str__(self):
+ return repr(self.parameter)
+
+class theme_exists_error(Exception):
+ def __init__(self, value):
+ self.parameter = value
+ def __str__(self):
+ return repr(self.parameter)
+
+
+"""
+This class sits between the GUI and the back end (handling
+mediawiki communication and raw pages/theme/article modifications)
+"""
+class IO_Manager:
+
+ def __init__(self):
+ running_on = platform.system()
+ if running_on == "Windows":
+ # On Windows, used to save to a data subfolder of the folder that contains this code
+ #self.workingDir = os.path.join(__file__.rsplit("\\", 1)[0], "Data")
+ # but better to write to user home with os.getenv("USERPROFILE") or os.path.expanduser("~")
+ self.workingDir = os.path.join(os.getenv("USERPROFILE"), ".slicedata")
+ elif running_on == "Linux":
+ if "olpc" in platform.platform().lower():
+ from sugar.activity import activity
+ # On Sugar, save to the data subfolder of the app directory
+ self.workingDir = os.path.join(activity.get_activity_root(), "data")
+ print "Activity root is: %s" % str(activity.get_activity_root())
+ print "Data folder is: %s" % self.workingDir
+ else:
+ # On Linux, save to a .slicedata subdir of the user's homedir
+ self.workingDir = os.path.join(os.getenv("HOME"), ".slicedata")
+ self.proxies = {}
+ proxy_file = os.path.join(os.path.split(os.path.split(__file__)[0])[0], 'proxy.cfg')
+ if os.access(proxy_file, os.F_OK):
+ proxy_file_handle = open(proxy_file, "r")
+ for line in proxy_file_handle.readlines():
+ parts = line.split(':', 1)
+ #print "setting " + parts[0] + " proxy to " + parts[1]
+ self.proxies[parts[0].strip()] = parts[1].strip()
+ proxy_file_handle.close()
+ if self.proxies == {}:
+ self.proxies = None
+
+ def clean_title(self, title):
+ """
+ removes non-alphanumeric chars from titles and lowercases it
+ """
+ print "Cleaning: " + title
+ output = re.sub(re.compile('\W'), "_", title).lower()
+ print "Output: " + output
+ return output
+
+ def install_library(self):
+ if platform.system() == "Linux" and "olpc" in platform.platform().lower():
+ file_list = [('Lion (from en.wikipedia.org)', os.path.join(os.path.split(__file__)[0], "demolibrary", "lion-wikipedia.dita"), 'Wikipedia Articles'), ('Tiger (from en.wikipedia.org)', os.path.join(os.path.split(__file__)[0], "demolibrary", "tiger-wikipedia.dita"), 'Wikipedia Articles'), ('Giraffe (from en.wikipedia.org)', os.path.join(os.path.split(__file__)[0], "demolibrary", "giraffe-wikipedia.dita"), 'Wikipedia Articles'), ('Giraffe', os.path.join(os.path.split(__file__)[0], "demolibrary", "giraffe-blank.dita"), 'My Articles'), ('Zebra (from en.wikipedia.org)', os.path.join(os.path.split(__file__)[0], "demolibrary", "zebra-wikipedia.dita"), 'Wikipedia Articles')]
+ for file in file_list:
+ if file[2] not in self.get_themes():
+ print "install library: creating theme %s" % file[2]
+ self.add_theme_to_library(file[2])
+ print "install library: opening %s" % file[1]
+ open_file = open(file[1], "r")
+ contents = open_file.read()
+ open_file.close()
+ if contents:
+ print "install library: content read sucessfully"
+ print "install library: saving page %s" % file[0]
+ self.save_page(file[0], contents, file[2], get_images=True)
+ print "install library: save successful"
+
+ def __add_page_to_library(self, title, path, theme="My Articles"):
+ """
+ Adds a page to the library. If a theme is specified it is added to that theme, otherwise it is put into the 'No Assigned Theme' theme.
+
+ @param title: The title of the article to add to library.
+ @param path: The path of the article to add to library.
+ @param theme: Which theme to store the article in. (Optional, defaults to No Assigned Theme).
+ """
+ try:
+ #change to relative path
+ path = path.replace(os.path.join(self.workingDir, ""), "", 1)
+ map = self.load_map(theme)
+ existing_entry = map.find("topicref", attrs={"navtitle" : title})
+ if existing_entry != None:
+ existing_entry.extract()
+ map.map.append(Tag(map, "topicref", [("href", path), ("navtitle", title)]))
+ self.save_map(theme, map)
+ except Exception:
+ self.add_theme_to_library(theme)
+ self.__add_page_to_library(title, path, theme)
+
+ def add_theme_to_library(self, theme):
+ """
+ Adds themes to the library.
+ @param theme: Theme to add.
+ @raise theme_exists_error: If trying to add theme that already exists.
+ """
+ try:
+ map = self.load_map("Library")
+ # Ensure theme does not exist
+ if map.find(attrs={"navtitle" : theme}) == None:
+ # create a new entry in the library for the theme
+ map.map.append(Tag(map, "topicref", [("format", "ditamap"), ("href", "%s.ditamap" % self.clean_title(theme)), ("navtitle", theme)]))
+ # save the theme file
+ self.__create_map(theme)
+ else:
+ raise theme_exists_error("Theme already exists")
+ self.save_map("Library", map)
+ except theme_not_found_error:
+ # this error is caused by failing to open the library, so create the library and try again
+ self.__create_map("Library")
+ self.add_theme_to_library(theme)
+
+ def __create_map(self, map_name):
+ """
+ Creates a new map for the specified theme name.
+ @param map_name: name of map theme.
+ """
+ self.save_map(map_name, BeautifulStoneSoup(\
+ '<?xml version="1.0" encoding="utf-8"?>\
+ <!DOCTYPE map PUBLIC "-//IBM//DTD DITA IBM Map//EN" "ibm-map.dtd">\
+ <map title="%s">\
+ </map>' % map_name))
+
+ def download_wiki_article(self, title, theme, wiki=None, statuslabel = None):
+ """
+ manages downloading and saving of wiki articles.
+ @param title: Title of article to get
+ @param theme: Theme to save to
+ @param wiki: (optional) wiki to search - see MediaWiki helper for default behaviour
+ @param statuslabel: gtk status label to write to
+
+ """
+ if statuslabel != None:
+ statuslabel.set_label("%s download in progress..." % (title))
+ if wiki == None:
+ #article, url = MediaWiki_Helper().getArticleAsHTMLByTitle(title)
+ wiki = "en.wikipedia.org"
+
+ article, url = MediaWiki_Helper().getArticleAsHTMLByTitle(title, wiki)
+ if statuslabel != None:
+ statuslabel.set_label("Processing %s..." % (title))
+
+ parser = MediaWiki_Parser(article, title, url)
+ contents = parser.parse()
+ #TODO: change line below when taking from other sources:
+ self.save_page(title + " (from %s)" % wiki, contents, theme, False, True, statuslabel)
+ if statuslabel != None:
+ statuslabel.set_label("Done.")
+# unique=2
+# new_title = title.lower()
+# contents = self.image_handler(parser.parse(), title)
+# if not os.path.exists(os.path.join(self.workingDir, title.lower())):
+# os.makedirs(os.path.join(self.workingDir, title.lower()), 0777)
+# while os.access(os.path.join(self.workingDir, title.lower(), "%s.dita" % new_title), os.F_OK):
+# new_title = title.lower() + str(unique)
+# unique += 1
+# contents = contents.replace('<prolog>', '<prolog>\n<resourceid id="%d" />' % self.get_unique_article_ID(), 1)
+# file = open(os.path.join(self.workingDir, title.lower(), "%s.dita" % new_title), "w")
+# file.write(contents)
+# file.close()
+
+ def get_pages_in_theme(self, theme):
+ """
+ Returns a list of all pages in the specified theme.
+ @param theme: Theme to query
+ @return: List of dictionaries containing page 'path' and 'title'.
+ """
+ try:
+ map = self.load_map(theme)
+ except Exception:
+ return []
+ output = []
+ for page in map.map.findAll("topicref"):
+ output.append(page['navtitle'])
+ output.sort()
+ return output
+
+ def get_themes(self):
+ """
+ Returns a list of all themes stored in the library.
+ @return: List of theme names.
+ """
+ try:
+ map = self.load_map("Library")
+ output = []
+ for theme in map.findAll("topicref"):
+ output.append(theme["navtitle"])
+ output.sort()
+ return output
+ except Exception:
+ return []
+
+ def get_unique_article_ID(self):
+ """
+ Creates and maintains a file to record the last unique article ID issued.
+ when a new ID is requested, returns last id + 1 and upates file.
+ @returns: Unique numeric ID
+ """
+ if not os.access(os.path.join(self.workingDir, "idfile"), os.F_OK):
+ # if no ID file, take a guess at where to start numbering.
+ id = 1
+ # Worst case scenario is that every file is an article, so count all files
+ for item in os.walk(self.workingDir):
+ id += len(item[2])
+ # Multiply by 1000 to prevent any problems caused by deleting files
+ id = id * 1000
+ print "ID FILE NOT FOUND, setting ID to " + str(id)
+ id_file = open(os.path.join(self.workingDir, "idfile"), "w")
+ id_file.write(str(id))
+ id_file.close()
+ return id
+ else:
+ id_file = open(os.path.join(self.workingDir, "idfile"), "r")
+ id = long(id_file.read())
+ id_file.close()
+ id += 1
+ id_file = open(os.path.join(self.workingDir, "idfile"), "w")
+ id_file.write(str(id))
+ id_file.close()
+ return id
+
+ def image_handler(self, document, title, statuslabel=None):
+ """
+ Takes a DITA article and downloads images referenced in it (finding all <image> tags).
+ Attemps to fix incomplete paths using source url.
+ @param document: DITA to work on
+ @param title: Title of article
+ @return: The document with image tags adjusted to point to local paths
+ """
+ document = BeautifulStoneSoup(document)
+ dir_path = os.path.join(self.workingDir, self.clean_title(title), "images")
+ print dir_path
+ if not os.path.exists(dir_path):
+ os.makedirs(dir_path, 0777)
+ if statuslabel != None:
+ i = title.find(" (from ")
+ temptitle = title[0:i]
+ statuslabel.set_label("Downloading %s images..." % (temptitle, ))
+ for image in document.findAll("image"):
+ fail = False
+ path = image['href']
+ if "#DEMOLIBRARY#" in path:
+ path = path.replace("#DEMOLIBRARY#", os.path.join(os.path.split(__file__)[0], "demolibrary"))
+ image_title = os.path.split(path)[1]
+ shutil.copyfile(path, os.path.join(dir_path, image_title))
+ else:
+ image_title = path.rsplit("/", 1)[-1]
+ # attempt to fix incomplete paths
+ if (not path.startswith("http://")) and document.source != None and document.source.has_key("href"):
+ if path.startswith("/"):
+ path = document.source['href'].rsplit("/", 1)[0] + path
+ else:
+ path = document.source['href'].rsplit("/", 1)[0] + "/" + path
+ print "Retrieving image: " + path
+ file = open(os.path.join(dir_path, image_title), 'wb')
+ image_contents = self.__open_URL(path)
+ if image_contents == None:
+ fail = True
+ else:
+ file.write(image_contents)
+ file.close()
+ #change to relative paths:
+ if not fail:
+ image['href'] = os.path.join(dir_path.replace(os.path.join(self.workingDir, ""), "", 1), image_title)
+ else:
+ image.extract()
+ return document.prettify()
+
+ def load_map(self, map_name):
+ """
+ Loads the specified theme map.
+ @param map_name: Name of theme map to load
+ @return: map contents as a Soup
+ @raise theme_not_found_error: If theme map not found.
+ """
+ if not os.access(os.path.join(self.workingDir, "%s.ditamap" % self.clean_title(map_name)), os.F_OK):
+ raise theme_not_found_error("Theme '" + map_name + "' not found")
+ file = open(os.path.join(self.workingDir, "%s.ditamap" % self.clean_title(map_name)), "r")
+ map = BeautifulStoneSoup(file.read())
+ file.close()
+ return map
+
+ def load_raw_page(self, title, theme):
+ """
+ Returns contents of specified page.
+
+ @param title: Title of page to open.
+ @param theme: Theme of page to open.
+ @return: Contents of page.
+ """
+ theme_map = self.load_map(theme)
+ print title + theme
+ page_location = theme_map.find("topicref", attrs={ "navtitle" : title })
+ if page_location != None:
+ page_location = page_location['href']
+ else:
+ raise page_not_found_error("No match for " + title + " in " + theme)
+
+ #if not os.access(page_location, os.F_OK):
+ if os.access(os.path.join(self.workingDir, page_location), os.F_OK):
+ page_location = os.path.join(self.workingDir, page_location)
+ else:
+ raise page_not_found_error("Page not found at " + page_location)
+ page = open(page_location, "r")
+ output = page.read()
+ page.close()
+ return output
+
+ def load_page(self, title, theme):
+ return Article_Builder().get_article_from_dita(self.load_raw_page(title, theme))
+
+ def copy_page(self, title, fromtheme, totheme):
+ """
+ Copys a page from one theme to another. If no title specified, all pages in theme are moved.
+ @param page_title: Title of page to move. (Optional, defaults to None)
+ @param from_theme: Source theme.
+ @param to_theme: Destination theme.
+ """
+ print "COPY PAGE %s FROM %s TO %s" % (title, fromtheme, totheme)
+ article = self.load_raw_page(title, fromtheme)
+ self.save_page(title, article, totheme, overwrite=False)
+
+ def load_article(self, title, theme):
+ """
+ loads the specified article
+ """
+ article_data = self.load_page(title, theme)
+ article = Article(article_data)
+ article.article_title = title
+ article.article_theme = theme
+ return article
+
+ def move_page(self, from_theme, to_theme, page_title = None):
+ """
+ Moves a page from one theme to another. If no title specified, all pages in theme are moved.
+ @param page_title: Title of page to move. (Optional, defaults to None)
+ @param from_theme: Source theme.
+ @param to_theme: Destination theme.
+ """
+ try:
+ from_map = self.load_map(from_theme)
+ to_map = self.load_map(to_theme)
+ if page_title == None:
+ pages = from_map.findAll("topicref")
+ else:
+ pages = [from_map.find("topicref", attrs={"navtitle" : page_title})]
+ if pages == [None] or pages == []:
+ raise exception("not found")
+ for page in pages:
+ to_map.map.append(page)
+ self.save_map(to_theme, to_map)
+ for page in pages:
+ page.extract()
+ self.save_map(from_theme, from_map)
+ except Exception:
+ # Shouldn't ever happen
+ pass
+
+ def __open_URL(self, url):
+ """
+ retrieves content from specified url
+ """
+ urllib._urlopener = self.New_URL_Opener()
+ try:
+ print "opening " + url
+ print "proxies: " + str(self.proxies)
+ doc = urllib.urlopen(url, proxies=self.proxies)
+ output = doc.read()
+ doc.close()
+ print "url opened succesfully"
+ return output
+ except IOError, e:
+ print e
+
+ def page_exists(self, title, theme):
+ """
+ boolean check if an article exists
+ """
+ try:
+ map = self.load_map(theme)
+ if map.find("topicref", attrs={"navtitle" : title}) != None:
+ return True
+ else:
+ return False
+ except Exception:
+ return False
+
+ def theme_exists(self, theme):
+ """
+ boolean check if a theme exists
+ """
+ themes = self.get_themes()
+ if theme in themes:
+ return True
+ else:
+ return False
+
+ def remove_page(self, page, theme):
+ """
+ Removes specified page from the specified theme.
+ @param page: Page to remove
+ @param theme: Containing theme
+ """
+ if theme == "Downloaded Articles":
+ return
+ theme_map = self.load_map(theme)
+ entry = theme_map.find("topicref", attrs={"navtitle" : page})
+ try:
+ os.remove(entry['href'])
+ except Exception:
+ pass
+ entry.extract()
+ self.save_map(theme, theme_map)
+
+ def remove_theme(self, theme):
+ """
+ Removes specified theme, moving all articles in it to the 'No Assigned Theme' theme.
+ @param theme: Theme to remove
+ """
+ try:
+ #Just remove map from library at the moment
+ #self.move_pages(theme, "No Assigned Theme")
+ library = self.load_map("Library")
+ entry = library.find("topicref", attrs={"navtitle" : theme})
+ if entry != None:
+ os.remove(os.path.join(self.workingDir, entry['href']))
+ entry.extract()
+ self.save_map("Library", library)
+ except Exception:
+ # Trying to remove a theme that doesn't exist, so pretend it worked.
+ pass
+
+ def rename_page(self, theme, old_title, new_title):
+ """
+ renames specified page in specified theme
+ """
+ try:
+ map = self.load_map(theme)
+ page = map.find("topicref", attrs={"navtitle" : old_title})
+ if page != None:
+ page['navtitle'] = new_title
+ self.save_map(theme, map)
+ except Exception:
+ pass
+
+ def rename_theme(self, old_name, new_name):
+ """
+ renames specified theme
+ """
+ library = self.load_map("Library")
+ entry = library.find("topicref", attrs={"navtitle" : old_name})
+ if entry != None and library.find("topicref", attrs={"navtitle" : new_name}) == None:
+ self.add_theme_to_library(new_name)
+ theme = self.load_map(entry['navtitle'])
+ theme.map['name'] = new_name
+ self.save_map(new_name, theme)
+ self.remove_theme(old_name)
+
+ def save_article(self, article, overwrite = True):
+ """
+ wrapper method for save_page to allow saving article objects
+ """
+ title = article.article_title
+ theme = article.article_theme
+ if title != None and theme != None:
+ contents = Article_Builder().get_dita_from_article(article)
+ self.save_page(title, contents, theme, overwrite)
+ else:
+ raise theme_not_found_error("Theme or title not specified")
+
+ def save_map(self, map_name, map_data):
+ """
+ Saves the specified map.
+ @param map_name: Name of map
+ @param map_data: Contents of map
+ """
+ if not os.path.exists(self.workingDir):
+ os.makedirs(self.workingDir, 0777)
+ map = open(os.path.join(self.workingDir, "%s.ditamap" % self.clean_title(map_name)), "w")
+ map.write(map_data.prettify())
+ map.close()
+
+ def save_page(self, title, contents, theme="Downloaded Articles", overwrite=True, get_images=False, statuslabel=None):
+ """
+ Saves the specified page contents as specified title (in optional specified theme).
+ @param title: Title to save as.
+ @param contents: Contents to save.
+ @param theme: Theme to save in.
+ @param overwrite: Boolean to specify overwrite if file already exists.
+ """
+ unique=2
+ new_title = self.clean_title(title) + "-" + self.clean_title(theme)
+ if get_images:
+ contents = self.image_handler(contents, title, statuslabel)
+ directory = os.path.join(self.workingDir, self.clean_title(title))
+ if not os.path.exists(directory):
+ os.makedirs(directory, 0777)
+ if overwrite == False:
+ while os.access(os.path.join(directory, "%s.dita" % new_title), os.F_OK):
+ new_title = self.clean_title(title) + str(unique)
+ unique += 1
+ contents = contents.replace('<prolog>', '<prolog>\n<resourceid id="%d" />' % self.get_unique_article_ID(), 1)
+ file = open(os.path.join(directory, "%s.dita" % new_title), "w")
+ file.write(contents)
+ file.close()
+ self.__add_page_to_library(title, os.path.join(directory, "%s.dita" % new_title), theme)
+ print "Page saved to - " + os.path.join(directory, "%s.dita" % new_title)
+ return os.path.join(directory, "%s.dita" % new_title)
+
+ def validate_image_list(self, image_list):
+ """
+ provides a mechanism for validating image lists and expanding relative paths
+ @param image_list: list of images to validate
+ @return: list of images with corrected paths, and broken images removed
+ """
+ for i in xrange(len(image_list)):
+ if not os.access(image_list[i][0], os.F_OK):
+ if os.access(os.path.join(self.workingDir, image_list[i][0]), os.F_OK):
+ image_list[i] = (os.path.join(self.workingDir, image_list[i][0]), image_list[i][1])
+ else:
+ image = None
+ #removing during for loop was unreliable
+ while None in image_list:
+ image_list.remove(None)
+ return image_list
+
+ class New_URL_Opener(urllib.FancyURLopener):
+ version = "Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11" \ No newline at end of file