diff options
Diffstat (limited to 'JournalExport.py')
-rw-r--r-- | JournalExport.py | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/JournalExport.py b/JournalExport.py new file mode 100644 index 0000000..3c71d00 --- /dev/null +++ b/JournalExport.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012 Aneesh Dogra <lionaneesh@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +# Journal Export +from sugar3.datastore import datastore +from sugar3.activity import activity +import dataretriever +import platform +import StringIO +import os +from server import WPWikiDB +from server import HTMLOutputBuffer +from mwlib import parser, scanner, expander, rendermath, htmlwriter + +system_id = "%s%s" % (platform.system().lower(), + platform.architecture()[0][0:2]) + +instance_dir = os.path.join(activity.get_activity_root(), 'instance') + +class JournalExport: + def __init__(self, confvars): + self.wikidb = WPWikiDB(confvars['path'], confvars['lang'], + confvars['templateprefix'], confvars['templateblacklist']) + self.dataretriever = dataretriever.DataRetriever(system_id, confvars['path']) + self.confvars = confvars + + def export_all(self, progressbar): + article_list = self.search('') # get all articles + for index in range(0, len(article_list)): + + article_text = self.wikidb.getRawArticle(article_list[index]).encode('utf8') + article_text = self.wikidb.expandArticle(article_text, article_list[index]) + tokens = scanner.tokenize(article_text, article_list[index]) + wiki_parsed = parser.Parser(tokens, article_list[index]).parse() + wiki_parsed.caption = article_list[index] + out = StringIO.StringIO() + w = htmlwriter.HTMLWriter(out) + w.write(wiki_parsed) + htmloutput = out.getvalue() + filename = os.path.join(instance_dir, article_list[index] + '.html') + print filename + fp = open(filename, 'w') + fp.write(htmloutput) + fp.close() + + journal_entry = datastore.create() + journal_entry.metadata['title'] = article_list[index] + journal_entry.metadata['title_set_by_user'] = '1' + journal_entry.metadata['mime_type'] = 'text/html' + journal_entry.file_path = filename + datastore.write(journal_entry) + + progressbar.set_fraction((index + 1) / len(article_list)) + os.remove(filename) + + def search(self, article_title): + return self.wikidb.dataretriever.search(article_title) + + def getRawArticle(self, title): + + # Retrieve article text, recursively following #redirects. + if title == '': + return '' + + article_text = \ + self.dataretriever.get_text_article(title).decode('utf-8') + + # Stripping leading & trailing whitespace fixes template expansion. + article_text = article_text.lstrip() + article_text = article_text.rstrip() |