Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-x[-rw-r--r--]html2mallard.py44
1 files changed, 39 insertions, 5 deletions
diff --git a/html2mallard.py b/html2mallard.py
index 1c39987..374782f 100644..100755
--- a/html2mallard.py
+++ b/html2mallard.py
@@ -1,10 +1,24 @@
+#!/bin/python
+
from HTMLParser import HTMLParser
import sys
output = []
-tag_conversion = { #'h1': {'name': 'page' },
- 'h1': {'name': 'section', 'next_tag': 'title', 'ignore_end': True}
+# name - name of the mallard tag
+# next_tag - next inline tag (ex: title)
+# ignore_end - ignore the closing tag (ex: section)
+# attrib - attributes of the tag
+# get_attrib - retain the specified attributes from the html tag
+
+tag_conversion = { 'h1': {'name': 'page', 'attrib': {'xmlns': 'http://projectmallard.org/1.0/', 'id': 'index'}, 'next_tag': 'title', 'ignore_end': True },
+ 'h2': {'name': 'section', 'next_tag': 'title', 'ignore_end': True},
+ 'strong': {'name': 'em'},
+ 'ul' : {'name': 'list'},
+ 'ol': {'name': 'list', 'attrib': {'type': 'numbered'}},
+ 'li' : {'name': 'item', 'next_tag': 'p'},
+ 'blockquote': {'name': 'quote'},
+ 'a': {'name': 'link', 'get_attrib': ['href']}
}
# next data should be in this tag, for example if the starting tag is 'section' the next data should be with in '<title>' tags
@@ -29,7 +43,7 @@ class MyHTMLParser(HTMLParser):
def interpret_endtag(tag):
temp = '</'
if tag.lower() in tag_conversion.keys():
- if tag_conversion[tag]['ignore_end']:
+ if 'ignore_end' in tag_conversion[tag].keys() and tag_conversion[tag]['ignore_end']:
return ''
temp += tag_conversion[tag]['name']
@@ -38,6 +52,13 @@ def interpret_endtag(tag):
temp += '>'
return temp
+#returns attributes in a dictionary as a text
+def attrib2text(attrib):
+ temp = ''
+ for key in attrib:
+ temp += ' ' + key + '="' + attrib[key] + '"'
+ return temp
+
# converts html start tags to mallard tags
def interpret_starttag(tag, attrs):
global next_tag
@@ -54,6 +75,14 @@ def interpret_starttag(tag, attrs):
if tag.lower() in tag_conversion.keys():
temp += tag_conversion[tag]['name']
+ if 'attrib' in tag_conversion[tag].keys():
+ temp += attrib2text(tag_conversion[tag]['attrib'])
+ if 'get_attrib' in tag_conversion[tag].keys():
+ for attrib in tag_conversion[tag]['get_attrib']:
+ for a in attrs:
+ if a[0] == attrib:
+ temp += attrib2text({attrib: a[1]})
+ break
if 'next_tag' in tag_conversion[tag].keys():
next_tag = tag_conversion[tag]['next_tag']
if 'ignore_end' in tag_conversion[tag].keys():
@@ -85,7 +114,7 @@ def write_footer():
while len(open_tags):
temp += '</' + tag_conversion[open_tags.pop()]['name'] + '>'
- temp += '</page>'
+ #temp += '</page>'
return temp
# reads the markdown source file
@@ -106,11 +135,16 @@ def write_file():
def main():
input_md = read_file()
- output.append(write_header())
+ #output.append(write_header())
# instantiate the parser and fed it some HTML
parser = MyHTMLParser()
parser.feed(input_md)
+
+ # add section links
+ if 'section' in output:
+ output.append('links type="section"')
+
output.append(write_footer())
write_file()