Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Dogra <lionaneesh@gmail.com>2012-12-25 18:32:03 (GMT)
committer Aneesh Dogra <lionaneesh@gmail.com>2012-12-25 18:32:03 (GMT)
commit4b5c3cf1afc2a447d8302e2d9bbdf592e50f2cc8 (patch)
tree9be4b25ea6d1d6041b79d427b6cea25d5cddb4f4
parentcd0167548b70263901e25b3b0fc4e715a7c0944b (diff)
Add ability to import text/html files from journal, which can then be used
to serve as article resources.
-rw-r--r--edit.py26
-rw-r--r--infoslicer/processing/Article_Builder.py2
-rw-r--r--infoslicer/processing/HTML_strip.py86
3 files changed, 113 insertions, 1 deletions
diff --git a/edit.py b/edit.py
index e48509e..90715f1 100644
--- a/edit.py
+++ b/edit.py
@@ -15,6 +15,7 @@
from gi.repository import Gtk
from gi.repository import GObject
from gettext import gettext as _
+from sugar3.graphics.objectchooser import ObjectChooser
from sugar3.graphics.toolbutton import ToolButton
from sugar3.graphics.toggletoolbutton import ToggleToolButton
@@ -23,6 +24,8 @@ from infoslicer.widgets.Edit_Pane import Edit_Pane
from infoslicer.widgets.Format_Pane import Format_Pane
from infoslicer.widgets.Image_Pane import Image_Pane
import book
+from infoslicer.processing.HTML_strip import dehtml
+from infoslicer.processing.Article import Article
TABS = (Edit_Pane(),
Image_Pane(),
@@ -54,6 +57,7 @@ class ToolbarBuilder():
self.txt_toggle = ToggleToolButton('ascii')
self.img_toggle = ToggleToolButton('image')
+ self.journal_chooser = ToolButton('ascii')
self.txt_toggle.set_tooltip(_('Text'))
self.txt_toggle.connect('toggled', self._toggle_cb,
@@ -65,19 +69,41 @@ class ToolbarBuilder():
[self.txt_toggle, self.img_toggle])
toolbar.insert(self.img_toggle, -1)
+ self.journal_chooser.set_tooltip(_('Choose Journal Images'))
+ self.journal_chooser.connect('clicked', self._journal_chooser_cb)
+ toolbar.insert(self.journal_chooser, -1)
+
for tab in TABS:
for i in tab.toolitems:
toolbar.insert(i, -1)
self.txt_toggle.set_active(True)
+ def _journal_chooser_cb(self, widget):
+ chooser = ObjectChooser(what_filter='text/html')
+ result = chooser.run()
+ if result == Gtk.ResponseType.ACCEPT:
+ jobject = chooser.get_selected_object()
+ if jobject and jobject.file_path:
+ title = str(jobject.metadata['title'])
+ path = str(jobject.file_path)
+ fp = open(path, 'r')
+ text = fp.read()
+ fp.close()
+ article_data = dehtml(text, title)
+ print Article(article_data), book.wiki.article
+ TABS[0].set_source_article(Article(article_data))
+
+
def sensitize_all(self):
self.txt_toggle.set_sensitive(True)
self.img_toggle.set_sensitive(True)
+ self.journal_chooser.set_sensitive(True)
def unsensitize_all(self):
self.txt_toggle.set_sensitive(False)
self.img_toggle.set_sensitive(False)
+ self.journal_chooser.set_sensitive(False)
def _toggle_cb(self, widget, toggles):
for tab in TABS:
diff --git a/infoslicer/processing/Article_Builder.py b/infoslicer/processing/Article_Builder.py
index b6bd750..80c76b3 100644
--- a/infoslicer/processing/Article_Builder.py
+++ b/infoslicer/processing/Article_Builder.py
@@ -150,7 +150,7 @@ def get_article_from_dita(image_path, dita):
else:
image_list.append((img['href'], caption, img['orig_href']))
- data = Article_Data(article_id, article_id, article_title, "theme", section_data_list, image_list)
+ data = Article_Data(article_id, article_id, article_title, "theme", section_data_list, image_list)
return data
diff --git a/infoslicer/processing/HTML_strip.py b/infoslicer/processing/HTML_strip.py
new file mode 100644
index 0000000..e41ce72
--- /dev/null
+++ b/infoslicer/processing/HTML_strip.py
@@ -0,0 +1,86 @@
+# Copyright (C) 2012 Aneesh Dogra <lionaneesh@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+from HTMLParser import HTMLParser
+from re import sub
+from infoslicer.processing.Article_Data import Sentence_Data, \
+ Paragraph_Data, \
+ Section_Data, \
+ Article_Data
+import string
+
+class HTML_Strip(HTMLParser):
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.__text = []
+
+ def handle_data(self, data):
+ text = data.strip()
+ if len(text) > 0:
+ text = sub('[\t\r\n]+', '', text)
+ # replace multiple spaces with one
+ text = sub('[ ]+', ' ', text)
+ text = filter(lambda x: x in string.printable, text)
+ self.__text.append(text + '')
+
+ def handle_starttag(self, tag, attrs):
+ if tag == 'p':
+ self.__text.append('<PARAGRAPH>')
+ elif tag == 'br':
+ self.__text.append('<SENTENCE>')
+ if tag == 'div':
+ self.__text.append('<SECTION>')
+
+ def text(self):
+ return ''.join(self.__text).strip()
+
+
+# takes in a HTML document and returns a list of Section objects.
+def dehtml(text, title):
+ try:
+ parser = HTML_Strip()
+ parser.feed(text)
+ parser.close()
+ text_stripped = parser.text()
+ except:
+ text_stripped = text
+
+ # We now need to convert this stripped data to an
+ # Article Data object.
+ sections = text_stripped.split('<SECTION>')
+ section_objs = []
+ for section in sections:
+ s = section.strip()
+ if s:
+ paragraphs = text_stripped.split('<PARAGRAPH>')
+ p_objs = []
+ for para in paragraphs:
+ if para[:len('<SECTION>')] == '<SECTION>':
+ para = para[len('<SECTION>'):]
+ if para.endswith('<SECTION>'):
+ para = para[:-len('<SECTION>')]
+ p = para.strip()
+ if p:
+ sentences = para.split('<SENTENCE>')
+ s_objs = []
+ for sentence in sentences:
+ s = sentence.strip()
+ if s:
+ s_objs += [Sentence_Data(text=s)]
+ s_objs += [Sentence_Data(text='\n')]
+ p_objs += [Paragraph_Data(sentences_data=s_objs)]
+ section_objs += [Section_Data(paragraphs_data=p_objs)]
+ return Article_Data(article_title=title, sections_data=section_objs)