From 0cf1af4b3d1d731ebcabcb5068edfd522bea8d21 Mon Sep 17 00:00:00 2001
From: Kalpa Welivitigoda <callkalpa@gmail.com>
Date: Mon, 24 Jun 2013 03:02:16 +0000
Subject: implemented strong, ul, ol, li, blockquote and a tags

---
diff --git a/html2mallard.py b/html2mallard.py
index 1c39987..374782f 100644..100755
--- a/html2mallard.py
+++ b/html2mallard.py
@@ -1,10 +1,24 @@
+#!/bin/python
+
 from HTMLParser import HTMLParser
 import sys
 
 output = []
 
-tag_conversion = {	#'h1': {'name': 'page' },
-			'h1': {'name': 'section', 'next_tag': 'title', 'ignore_end': True}
+# name - name of the mallard tag
+# next_tag - next inline tag (ex: title)
+# ignore_end - ignore the closing tag (ex: section)
+# attrib - attributes of the tag
+# get_attrib - retain the specified attributes from the html tag
+
+tag_conversion = {	'h1': {'name': 'page', 'attrib': {'xmlns': 'http://projectmallard.org/1.0/', 'id': 'index'}, 'next_tag': 'title', 'ignore_end': True },
+			'h2': {'name': 'section', 'next_tag': 'title', 'ignore_end': True},
+			'strong': {'name': 'em'},
+			'ul' : {'name': 'list'},
+			'ol': {'name': 'list', 'attrib': {'type': 'numbered'}},
+			'li' : {'name': 'item', 'next_tag': 'p'},
+			'blockquote': {'name': 'quote'},
+			'a': {'name': 'link', 'get_attrib': ['href']}
 	}
 
 # next data should be in this tag, for example if the starting tag is 'section' the next data should be with in '<title>' tags
@@ -29,7 +43,7 @@ class MyHTMLParser(HTMLParser):
 def interpret_endtag(tag):
 	temp = '</'
 	if tag.lower() in tag_conversion.keys():
-		if tag_conversion[tag]['ignore_end']:
+		if 'ignore_end' in tag_conversion[tag].keys() and tag_conversion[tag]['ignore_end']:
 			return ''
 
 		temp += tag_conversion[tag]['name']
@@ -38,6 +52,13 @@ def interpret_endtag(tag):
 	temp += '>'
 	return temp
 
+#returns attributes in a dictionary as a text
+def attrib2text(attrib):
+	temp = ''
+	for key in attrib:
+		temp += ' ' + key + '="' + attrib[key] + '"'
+	return temp
+
 # converts html start tags to mallard tags
 def interpret_starttag(tag, attrs):
 	global next_tag
@@ -54,6 +75,14 @@ def interpret_starttag(tag, attrs):
 	if tag.lower() in tag_conversion.keys():
 		temp += tag_conversion[tag]['name']
 
+		if 'attrib' in tag_conversion[tag].keys():
+			temp += attrib2text(tag_conversion[tag]['attrib'])
+		if 'get_attrib' in tag_conversion[tag].keys():
+			for attrib in tag_conversion[tag]['get_attrib']:
+				for a in attrs:
+					if a[0] == attrib:
+						temp += attrib2text({attrib: a[1]})
+						break
 		if 'next_tag' in tag_conversion[tag].keys():
 			next_tag = tag_conversion[tag]['next_tag']
 		if 'ignore_end' in tag_conversion[tag].keys():
@@ -85,7 +114,7 @@ def write_footer():
 	while len(open_tags):
 		temp += '</' + tag_conversion[open_tags.pop()]['name'] + '>'
 
-	temp += '</page>'
+	#temp += '</page>'
 	return temp
 
 # reads the markdown source file
@@ -106,11 +135,16 @@ def write_file():
 
 def main():
 	input_md = read_file()
-	output.append(write_header())
+	#output.append(write_header())
 
 	# instantiate the parser and fed it some HTML
 	parser = MyHTMLParser()
 	parser.feed(input_md)
+
+	# add section links
+	if 'section' in output:
+		output.append('links type="section"')
+
 	output.append(write_footer())
 	write_file()
 
--
cgit v0.9.1