diff options
author | Manuel Kaufmann <humitos@gmail.com> | 2012-05-23 16:10:55 (GMT) |
---|---|---|
committer | Manuel Kaufmann <humitos@gmail.com> | 2012-05-23 16:10:55 (GMT) |
commit | cfdc23f0baa5b749a2e2ef62c4f8f62a37e2f9ee (patch) | |
tree | ecd66af165012810a108f2deec1a4a97ad8f72a0 | |
parent | 2897ffae4f850d9f6f6ed7b8a6513305e192d6bd (diff) |
Export content as HTML SL #3608
Added HTML files to the bundle so this content can be opened with Browse
Activity. The old dita/ditamap files are not removed because they are parsed to
generate the new HTML files.
Besides, the HTML generation uses these dita files to not be too aggressive
with the code in this cycle. We will improve this code in the future and we
will remove completely the dita generation.
-rw-r--r-- | article.html | 9 | ||||
-rw-r--r-- | parse.py | 61 | ||||
-rw-r--r-- | xol.py | 7 |
3 files changed, 76 insertions, 1 deletions
diff --git a/article.html b/article.html new file mode 100644 index 0000000..f57266a --- /dev/null +++ b/article.html @@ -0,0 +1,9 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <title>%(title)s</title> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> + <link rel="stylesheet" href="ditastyle.css" type="text/css" /> + </head> + <body>%(body)s</body> +</html> diff --git a/parse.py b/parse.py new file mode 100644 index 0000000..96383ac --- /dev/null +++ b/parse.py @@ -0,0 +1,61 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from gettext import gettext as _ +from BeautifulSoup import BeautifulSoup + + +def parse_dita(dita_str): + soup = BeautifulSoup(dita_str) + + html = open('article.html', 'r').read().decode('utf-8') + + html_tags = [] + + title = soup.find('title').string.strip() + h1_title = '<h1>%(title)s</h1>' % \ + {'title': title} + index_link = '<p><a href="librarymap.html">' + \ + _('Return to index') + '</a></p>' + + html_tags.append(index_link) + html_tags.append(h1_title) + + for section in soup.findAll('section'): + for p in section.findAll('p'): + images = p.findAll('image') + for img in images: + html_tags.append('<img src="%(src)s" />' % \ + {'src': img.get('href')}) + html_tags.append('<p>') + for ph in p.findAll('ph'): + html_tags.append(ph.string.strip()) + html_tags.append('</p>') + + html = html % {'title': title, + 'body': '\n'.join(html_tags)} + return html + + +def parse_ditamap(ditamap_str): + soup = BeautifulSoup(ditamap_str) + html = open('article.html', 'r').read().decode('utf-8') + + html_tags = [] + + title = soup.find('map').get('title') + + h1_title = '<h1>%(title)s</h1>' % \ + {'title': title} + html_tags.append(h1_title) + + html_tags.append('<li>') + for topic in soup.findAll('topicref'): + dita_path = topic.get('href') + html_tags.append('<ul><a href="%(href)s">%(name)s</a></ul>' % \ + {'href': dita_path.replace('.dita', '.html'), + 'name': topic.get('navtitle')}) + html_tags.append('</li>') + + html = html % {'title': title, + 'body': '\n'.join(html_tags)} + return html @@ -19,6 +19,7 @@ import gtk import zipfile import uuid import logging +import parse from glob import glob from gettext import gettext as _ @@ -152,6 +153,8 @@ def _dita_management(zip, uid, title): 'href="ditastylesheet.xsl"?>') zipstr(zip, os.path.join(uid, 'slicecontent', '%s.dita' % auid), content.prettify()) + zipstr(zip, os.path.join(uid, 'slicecontent', '%s.html' % auid), + parse.parse_dita(content.prettify())) map.append('<topicref href="%s.dita" navtitle="%s">' % (auid, atitle)) map.append('</topicref>') @@ -159,12 +162,14 @@ def _dita_management(zip, uid, title): map.append('</map>') zipstr(zip, os.path.join(uid, 'slicecontent', 'librarymap.ditamap'), "\n".join(map)) + zipstr(zip, os.path.join(uid, 'slicecontent', 'librarymap.html'), + parse.parse_ditamap("\n".join(map))) def _index_redirect(zip, uid): """ Creates the redirecting index.html """ - redirect_loc = 'slicecontent/librarymap.ditamap' + redirect_loc = 'slicecontent/librarymap.html' html = ['<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">',\ '<html>',\ |