diff options
author | Walter Bender <walter.bender@gmail.com> | 2013-03-27 12:45:09 (GMT) |
---|---|---|
committer | Walter Bender <walter.bender@gmail.com> | 2013-03-27 12:45:09 (GMT) |
commit | 5fe6198b47a0022c4731d09949a5bfe17656c2a4 (patch) | |
tree | f10a8d0e4fda90985186f22e497f09072605a802 | |
parent | 3a09ae6c3249a3151488d947e3d8052f551f633d (diff) |
Add journal text import; PL wikipedia; new artwork
-rw-r--r-- | NEWS | 8 | ||||
-rw-r--r-- | activity/activity.info | 2 | ||||
-rw-r--r-- | edit.py | 45 | ||||
-rw-r--r-- | icons/journal-image.svg | 76 | ||||
-rw-r--r-- | icons/load-image-from-journal.svg | 83 | ||||
-rw-r--r-- | icons/load-text-from-journal.svg | 97 | ||||
-rw-r--r-- | infoslicer/processing/HTML_strip.py | 89 | ||||
-rw-r--r-- | library.py | 11 |
8 files changed, 399 insertions, 12 deletions
@@ -1,3 +1,11 @@ +17 + +ENHANCEMENTS: +* Load text from Sugar Journal (w/ Aneesh Dogra) +* Load images from Sugar Journal (w/ Aneesh Dogra) +* Add Polish wikipedia to library (w/ ) +* New translations + 15 ENHANCEMENT: diff --git a/activity/activity.info b/activity/activity.info index 5ba8956..fbd30c2 100644 --- a/activity/activity.info +++ b/activity/activity.info @@ -1,6 +1,6 @@ [Activity] name = InfoSlicer -activity_version = 15 +activity_version = 17 host_version = 1 icon = slicelogo bundle_id = org.sugarlabs.InfoSlicer @@ -13,6 +13,7 @@ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA from gi.repository import Gtk +from gi.repository import Gdk from gi.repository import GObject from gettext import gettext as _ @@ -26,6 +27,8 @@ from infoslicer.widgets.Edit_Pane import Edit_Pane from infoslicer.widgets.Format_Pane import Format_Pane from infoslicer.widgets.Image_Pane import Image_Pane from infoslicer.widgets.Journal_Image_Pane import Journal_Image_Pane +from infoslicer.processing.HTML_strip import dehtml +from infoslicer.processing.Article import Article import book @@ -60,8 +63,9 @@ class ToolbarBuilder(): self.txt_toggle = ToggleToolButton('ascii') self.img_toggle = ToggleToolButton('image') - self.jimg_chooser_toggle = ToolButton('image') - self.jimg_toggle = ToggleToolButton('image') + self.jimg_toggle = ToggleToolButton('journal-image') + self.jimg_chooser_toggle = ToolButton('load-image-from-journal') + self.jtext_chooser_toggle = ToolButton('load-text-from-journal') self.txt_toggle.set_tooltip(_('Text')) self.txt_toggle.connect('toggled', self._toggle_cb, @@ -73,15 +77,18 @@ class ToolbarBuilder(): [self.txt_toggle, self.img_toggle, self.jimg_toggle]) toolbar.insert(self.img_toggle, -1) - self.jimg_chooser_toggle.set_tooltip(_('Choose Journal Images')) - self.jimg_chooser_toggle.connect('clicked', self._toggle_image_chooser) - toolbar.insert(self.jimg_chooser_toggle, -1) - self.jimg_toggle.set_tooltip(_('Journal Images')) self.jimg_toggle.connect('toggled', self._toggle_cb, [self.txt_toggle, self.img_toggle, self.jimg_toggle]) toolbar.insert(self.jimg_toggle, -1) + self.jimg_chooser_toggle.set_tooltip(_('Choose Journal Images')) + self.jimg_chooser_toggle.connect('clicked', self._toggle_image_chooser) + toolbar.insert(self.jimg_chooser_toggle, -1) + + self.jtext_chooser_toggle.set_tooltip(_('Choose Journal Text')) + self.jtext_chooser_toggle.connect('clicked', self._toggle_text_chooser) + toolbar.insert(self.jtext_chooser_toggle, -1) for tab in TABS: for i in tab.toolitems: @@ -100,6 +107,11 @@ class ToolbarBuilder(): self.jimg_toggle.set_sensitive(False) def _toggle_image_chooser(self, widget): + self._old_cursor = self.edit.get_window().get_cursor() + self.edit.get_window().set_cursor(Gdk.Cursor.new(Gdk.CursorType.WATCH)) + GObject.idle_add(self.__image_chooser) + + def __image_chooser(self): chooser = ObjectChooser(what_filter=mime.GENERIC_TYPE_IMAGE) result = chooser.run() if result == Gtk.ResponseType.ACCEPT: @@ -108,6 +120,27 @@ class ToolbarBuilder(): title = str(jobject.metadata['title']) path = str(jobject.file_path) TABS[2].gallery.add_image(path, title) + self.edit.get_window().set_cursor(self._old_cursor) + + def _toggle_text_chooser(self, widget): + self._old_cursor = self.edit.get_window().get_cursor() + self.edit.get_window().set_cursor(Gdk.Cursor.new(Gdk.CursorType.WATCH)) + GObject.idle_add(self.__text_chooser) + + def __text_chooser(self): + chooser = ObjectChooser(what_filter=mime.GENERIC_TYPE_TEXT) + result = chooser.run() + if result == Gtk.ResponseType.ACCEPT: + jobject = chooser.get_selected_object() + if jobject and jobject.file_path: + title = str(jobject.metadata['title']) + path = str(jobject.file_path) + fp = open(path, 'r') + text = fp.read() + fp.close() + article_data = dehtml(text, title) + TABS[0].set_source_article(Article(article_data)) + self.edit.get_window().set_cursor(self._old_cursor) def _toggle_cb(self, widget, toggles): for tab in TABS: diff --git a/icons/journal-image.svg b/icons/journal-image.svg new file mode 100644 index 0000000..e0118e9 --- /dev/null +++ b/icons/journal-image.svg @@ -0,0 +1,76 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + version="1.1" + width="55" + height="55" + viewBox="0 0 55 55" + id="svg2" + xml:space="preserve"><metadata + id="metadata25"><rdf:RDF><cc:Work + rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs + id="defs33" /><g + transform="matrix(0.65959435,0,0,0.65959435,2.2217886,-0.13950423)" + id="clipping-image" + style="display:block"><g + id="g5" + style="display:inline"><g + id="g7"><polygon + points="48.788,23.002 36.849,11.058 5.962,11.058 5.962,43.944 48.788,43.944 " + id="polygon9" + style="fill:#ffffff;stroke:#010101;stroke-width:3.5" /><polyline + style="fill:none;stroke:#010101;stroke-width:3.5" + points="36.849,11.058 36.849,23.002 48.788,23.002 " + id="polyline11" /></g></g><path + d="m 27.504,23.342 c -6.258,0 -11.471,6.241 -11.471,6.241 0,0 5.213,6.271 11.471,6.267 6.259,-0.005 11.475,-6.274 11.475,-6.274 0,0 -5.216,-6.238 -11.475,-6.234 z m 0,10.642 c -2.423,0 -4.387,-1.966 -4.387,-4.389 0,-2.419 1.964,-4.388 4.387,-4.388 2.42,0 4.386,1.969 4.386,4.388 0,2.424 -1.966,4.389 -4.386,4.389 z" + id="path13" + style="fill:#010101;display:inline" /><circle + cx="27.504" + cy="29.597" + r="1.9910001" + id="circle15" + style="fill:#010101;display:inline" /></g><g + transform="matrix(1.079797,0,0,1.079797,-5.2529966,-5.2292405)" + id="g3013" + style="stroke:#ffffff;stroke-opacity:1"><g + transform="matrix(0.55205508,0,0,0.55205508,77.118464,18.235971)" + id="g4382" + style="stroke:#ffffff;stroke-opacity:1"><g + transform="translate(-80.093659,12.220029)" + id="g4308" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><g + id="g4310" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><path + d="m 6.736,49.002 h 24.52 c 2.225,0 3.439,-1.447 3.439,-3.441 v -27.28 c 0,-1.73 -1.732,-3.441 -3.439,-3.441 h -4.389" + id="path4312" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /></g></g><g + transform="translate(-80.093659,12.220029)" + id="g4314" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><g + id="g4316" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><path + d="m 26.867,38.592 c 0,1.836 -1.345,3.201 -3.441,4.047 L 6.736,49.002 V 14.84 l 16.69,-8.599 c 2.228,-0.394 3.441,0.84 3.441,2.834 v 29.517 z" + id="path4318" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /></g></g><path + d="m -70.669659,54.827029 c 0,0 -1.351,-0.543 -2.702,-0.543 -1.351,0 -2.703,0.543 -2.703,0.543" + id="path4320" + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /><path + d="m -70.669659,44.226029 c 0,0 -1.239,-0.543 -2.815,-0.543 -1.577,0 -2.59,0.543 -2.59,0.543" + id="path4322" + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /><path + d="m -70.669659,33.898029 c 0,0 -1.125,-0.544 -2.927,-0.544 -1.802,0 -2.478,0.544 -2.478,0.544" + id="path4324" + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /><line + id="line4326" + y2="23.725029" + y1="58.753029" + x2="-66.884659" + x1="-66.884659" + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /></g></g></svg>
\ No newline at end of file diff --git a/icons/load-image-from-journal.svg b/icons/load-image-from-journal.svg new file mode 100644 index 0000000..b8307c8 --- /dev/null +++ b/icons/load-image-from-journal.svg @@ -0,0 +1,83 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + version="1.1" + width="55" + height="55" + viewBox="0 0 55 55" + id="svg2" + xml:space="preserve"><metadata + id="metadata25"><rdf:RDF><cc:Work + rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs + id="defs33" /><g + transform="matrix(0.55205508,0,0,0.55205508,77.118464,18.235971)" + id="g4382"><g + transform="translate(-80.093659,12.220029)" + id="g4308" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><g + id="g4310" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><path + d="m 6.736,49.002 h 24.52 c 2.225,0 3.439,-1.447 3.439,-3.441 v -27.28 c 0,-1.73 -1.732,-3.441 -3.439,-3.441 h -4.389" + id="path4312" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /></g></g><g + transform="translate(-80.093659,12.220029)" + id="g4314" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><g + id="g4316" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><path + d="m 26.867,38.592 c 0,1.836 -1.345,3.201 -3.441,4.047 L 6.736,49.002 V 14.84 l 16.69,-8.599 c 2.228,-0.394 3.441,0.84 3.441,2.834 v 29.517 z" + id="path4318" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /></g></g><path + d="m -70.669659,54.827029 c 0,0 -1.351,-0.543 -2.702,-0.543 -1.351,0 -2.703,0.543 -2.703,0.543" + id="path4320" + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /><path + d="m -70.669659,44.226029 c 0,0 -1.239,-0.543 -2.815,-0.543 -1.577,0 -2.59,0.543 -2.59,0.543" + id="path4322" + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /><path + d="m -70.669659,33.898029 c 0,0 -1.125,-0.544 -2.927,-0.544 -1.802,0 -2.478,0.544 -2.478,0.544" + id="path4324" + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /><line + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" + x1="-66.884659" + x2="-66.884659" + y1="58.753029" + y2="23.725029" + id="line4326" /></g><g + transform="matrix(1.1623273,0,0,1.1623273,-14.422024,-12.63995)" + id="g3882" + style="fill:none;stroke:#ffffff;stroke-width:2.15085721;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;display:inline"><g + id="g3884" + style="fill:none;stroke:#ffffff;stroke-width:2.15085721;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none"><polygon + points="35.281,13.812 15.204,13.812 15.204,35.189 43.041,35.189 43.041,21.577 " + id="polygon3886" + style="fill:none;stroke:#ffffff;stroke-width:2.15085721;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none" /><polyline + id="polyline3888" + points="35.281,13.812 35.281,21.577 43.041,21.577 " + style="fill:none;stroke:#ffffff;stroke-width:2.15085721;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none" /></g></g><path + d="m 19.426691,12.275117 c -4.727185,0 -8.666312,4.714399 -8.666312,4.714399 0,0 3.939127,4.737646 8.666312,4.735322 4.729509,-0.0046 8.668637,-4.739971 8.668637,-4.739971 0,0 -3.939128,-4.713237 -8.668637,-4.70975 z m 0,8.039818 c -1.830666,0 -3.314958,-1.484292 -3.314958,-3.31612 0,-1.827179 1.484292,-3.314958 3.314958,-3.314958 1.828341,0 3.312632,1.487779 3.312632,3.314958 0,1.831828 -1.484291,3.31612 -3.312632,3.31612 z" + id="path3890" + style="fill:#ffffff;fill-opacity:1;stroke:none;display:inline" /><circle + cx="29.207001" + cy="25.863001" + r="1.294" + transform="matrix(1.1623273,0,0,1.1623273,-14.520241,-13.061294)" + id="circle3892" + style="fill:#ffffff;fill-opacity:1;stroke:none;display:inline" /><g + transform="matrix(-1,0,0,1,90.661358,9.6560695)" + id="g4770"><g + transform="translate(34.0803,-1006.42)" + id="g4772"><polyline + transform="matrix(-0.469241,0.469241,-0.469241,-0.469241,66.2906,1019.03)" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-linecap:round;stroke-linejoin:round" + points="51.562,15.306 41.17,16.188 42.053,5.794" + id="polyline4774" /><path + d="m 39.363241,1033.1291 -0.05636,9.9115 -8.750608,0.067" + id="path4776" + style="fill:none;stroke:#ffffff;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none" /></g></g></svg>
\ No newline at end of file diff --git a/icons/load-text-from-journal.svg b/icons/load-text-from-journal.svg new file mode 100644 index 0000000..b3db477 --- /dev/null +++ b/icons/load-text-from-journal.svg @@ -0,0 +1,97 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + version="1.1" + width="55" + height="55" + viewBox="0 0 55 55" + id="svg2" + xml:space="preserve"><metadata + id="metadata25"><rdf:RDF><cc:Work + rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs + id="defs33" /><g + transform="matrix(0.55205508,0,0,0.55205508,77.118464,18.235971)" + id="g4382"><g + transform="translate(-80.093659,12.220029)" + id="g4308" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><g + id="g4310" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><path + d="m 6.736,49.002 h 24.52 c 2.225,0 3.439,-1.447 3.439,-3.441 v -27.28 c 0,-1.73 -1.732,-3.441 -3.439,-3.441 h -4.389" + id="path4312" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /></g></g><g + transform="translate(-80.093659,12.220029)" + id="g4314" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><g + id="g4316" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><path + d="m 26.867,38.592 c 0,1.836 -1.345,3.201 -3.441,4.047 L 6.736,49.002 V 14.84 l 16.69,-8.599 c 2.228,-0.394 3.441,0.84 3.441,2.834 v 29.517 z" + id="path4318" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /></g></g><path + d="m -70.669659,54.827029 c 0,0 -1.351,-0.543 -2.702,-0.543 -1.351,0 -2.703,0.543 -2.703,0.543" + id="path4320" + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /><path + d="m -70.669659,44.226029 c 0,0 -1.239,-0.543 -2.815,-0.543 -1.577,0 -2.59,0.543 -2.59,0.543" + id="path4322" + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /><path + d="m -70.669659,33.898029 c 0,0 -1.125,-0.544 -2.927,-0.544 -1.802,0 -2.478,0.544 -2.478,0.544" + id="path4324" + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" /><line + style="fill:none;stroke:#ffffff;stroke-width:2.25;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1" + x1="-66.884659" + x2="-66.884659" + y1="58.753029" + y2="23.725029" + id="line4326" /></g><g + transform="matrix(0,-1,1,0,9.8287336,88.386573)" + id="g4770"><g + transform="translate(34.0803,-1006.42)" + id="g4772"><polyline + transform="matrix(-0.469241,0.469241,-0.469241,-0.469241,66.2906,1019.03)" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-linecap:round;stroke-linejoin:round" + points="51.562,15.306 41.17,16.188 42.053,5.794" + id="polyline4774" /><path + d="m 39.363241,1033.1291 -0.05636,9.9115 -8.750608,0.067" + id="path4776" + style="fill:none;stroke:#ffffff;stroke-width:2.5;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none" /></g></g><g + transform="matrix(0.75578519,0,0,0.75578519,-4.9396196,-1.2911009)" + id="clipping-text" + style="fill:none;stroke:#ffffff;stroke-opacity:1;display:block"><g + id="g3152" + style="fill:none;stroke:#ffffff;stroke-opacity:1;display:inline"><g + id="g3154" + style="fill:none;stroke:#ffffff;stroke-opacity:1"><polygon + points="10.932,6.088 31.874,6.088 43.818,18.027 43.818,48.914 10.932,48.914 " + id="polygon3156" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-opacity:1" /><polyline + id="polyline3158" + points="43.818,18.027 31.874,18.027 31.874,6.088 " + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-opacity:1" /></g></g><line + id="line3160" + y2="26.25" + y1="26.25" + x2="36.875" + x1="17.875" + display="inline" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-opacity:1;display:inline" /><line + id="line3162" + y2="33.25" + y1="33.25" + x2="36.875" + x1="17.875" + display="inline" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-opacity:1;display:inline" /><line + id="line3164" + y2="40.25" + y1="40.25" + x2="36.875" + x1="17.875" + display="inline" + style="fill:none;stroke:#ffffff;stroke-width:3.5;stroke-opacity:1;display:inline" /></g></svg>
\ No newline at end of file diff --git a/infoslicer/processing/HTML_strip.py b/infoslicer/processing/HTML_strip.py new file mode 100644 index 0000000..cdd5108 --- /dev/null +++ b/infoslicer/processing/HTML_strip.py @@ -0,0 +1,89 @@ +# Copyright (C) 2012 Aneesh Dogra <lionaneesh@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +from HTMLParser import HTMLParser +from re import sub +from infoslicer.processing.Article_Data import Sentence_Data, \ + Paragraph_Data, \ + Section_Data, \ + Article_Data +import string + +def filter_non_printable(str): + return ''.join([c for c in str if ord(c) > 31 or ord(c) == 9]) + +class HTML_Strip(HTMLParser): + def __init__(self): + HTMLParser.__init__(self) + self.__text = [] + + def handle_data(self, data): + text = data.strip() + if len(text) > 0: + text = sub('[\t\r\n]+', '', text) + # replace multiple spaces with one + text = sub('[ ]+', ' ', text) + text = filter_non_printable(text) + self.__text.append(text + '') + + def handle_starttag(self, tag, attrs): + if tag == 'p': + self.__text.append('<PARAGRAPH>') + elif tag == 'br': + self.__text.append('<SENTENCE>') + if tag == 'div': + self.__text.append('<SECTION>') + + def text(self): + return ''.join(self.__text).strip() + + +# takes in a HTML document and returns a list of Section objects. +def dehtml(text, title): + try: + parser = HTML_Strip() + parser.feed(text) + parser.close() + text_stripped = parser.text() + except: + text_stripped = text + + # We now need to convert this stripped data to an + # Article Data object. + sections = text_stripped.split('<SECTION>') + section_objs = [] + for section in sections: + s = section.strip() + if s: + paragraphs = text_stripped.split('<PARAGRAPH>') + p_objs = [] + for para in paragraphs: + if para[:len('<SECTION>')] == '<SECTION>': + para = para[len('<SECTION>'):] + if para.endswith('<SECTION>'): + para = para[:-len('<SECTION>')] + p = para.strip() + if p: + sentences = para.split('<SENTENCE>') + s_objs = [] + for sentence in sentences: + s = sentence.strip() + if s: + s_objs += [Sentence_Data(text=s)] + s_objs += [Sentence_Data(text='\n')] + p_objs += [Paragraph_Data(sentences_data=s_objs)] + section_objs += [Section_Data(paragraphs_data=p_objs)] + return Article_Data(article_title=title, sections_data=section_objs) @@ -236,8 +236,9 @@ class ToolbarBuilder(): def _publish_clicked_cb(self, widget): xol.publish(self.activity) -WIKI = { _("English Wikipedia") : "en.wikipedia.org", - _("Simple English Wikipedia") : "simple.wikipedia.org", - _("German Wikipedia") : "de.wikipedia.org", - _("Spanish Wikipedia") : "es.wikipedia.org", - _("French Wikipedia") : "fr.wikipedia.org" } +WIKI = { _('English Wikipedia') : 'en.wikipedia.org', + _('Simple English Wikipedia') : 'simple.wikipedia.org', + _('French Wikipedia') : 'fr.wikipedia.org', + _('German Wikipedia') : 'de.wikipedia.org', + _('Polish Wikipedia') : 'pl.wikipedia.org', + _('Spanish Wikipedia') : 'es.wikipedia.org'} |