From 0cf1af4b3d1d731ebcabcb5068edfd522bea8d21 Mon Sep 17 00:00:00 2001 From: Kalpa Welivitigoda Date: Mon, 24 Jun 2013 03:02:16 +0000 Subject: implemented strong, ul, ol, li, blockquote and a tags --- diff --git a/html2mallard.py b/html2mallard.py index 1c39987..374782f 100644..100755 --- a/html2mallard.py +++ b/html2mallard.py @@ -1,10 +1,24 @@ +#!/bin/python + from HTMLParser import HTMLParser import sys output = [] -tag_conversion = { #'h1': {'name': 'page' }, - 'h1': {'name': 'section', 'next_tag': 'title', 'ignore_end': True} +# name - name of the mallard tag +# next_tag - next inline tag (ex: title) +# ignore_end - ignore the closing tag (ex: section) +# attrib - attributes of the tag +# get_attrib - retain the specified attributes from the html tag + +tag_conversion = { 'h1': {'name': 'page', 'attrib': {'xmlns': 'http://projectmallard.org/1.0/', 'id': 'index'}, 'next_tag': 'title', 'ignore_end': True }, + 'h2': {'name': 'section', 'next_tag': 'title', 'ignore_end': True}, + 'strong': {'name': 'em'}, + 'ul' : {'name': 'list'}, + 'ol': {'name': 'list', 'attrib': {'type': 'numbered'}}, + 'li' : {'name': 'item', 'next_tag': 'p'}, + 'blockquote': {'name': 'quote'}, + 'a': {'name': 'link', 'get_attrib': ['href']} } # next data should be in this tag, for example if the starting tag is 'section' the next data should be with in '' tags @@ -29,7 +43,7 @@ class MyHTMLParser(HTMLParser): def interpret_endtag(tag): temp = '</' if tag.lower() in tag_conversion.keys(): - if tag_conversion[tag]['ignore_end']: + if 'ignore_end' in tag_conversion[tag].keys() and tag_conversion[tag]['ignore_end']: return '' temp += tag_conversion[tag]['name'] @@ -38,6 +52,13 @@ def interpret_endtag(tag): temp += '>' return temp +#returns attributes in a dictionary as a text +def attrib2text(attrib): + temp = '' + for key in attrib: + temp += ' ' + key + '="' + attrib[key] + '"' + return temp + # converts html start tags to mallard tags def interpret_starttag(tag, attrs): global next_tag @@ -54,6 +75,14 @@ def interpret_starttag(tag, attrs): if tag.lower() in tag_conversion.keys(): temp += tag_conversion[tag]['name'] + if 'attrib' in tag_conversion[tag].keys(): + temp += attrib2text(tag_conversion[tag]['attrib']) + if 'get_attrib' in tag_conversion[tag].keys(): + for attrib in tag_conversion[tag]['get_attrib']: + for a in attrs: + if a[0] == attrib: + temp += attrib2text({attrib: a[1]}) + break if 'next_tag' in tag_conversion[tag].keys(): next_tag = tag_conversion[tag]['next_tag'] if 'ignore_end' in tag_conversion[tag].keys(): @@ -85,7 +114,7 @@ def write_footer(): while len(open_tags): temp += '</' + tag_conversion[open_tags.pop()]['name'] + '>' - temp += '</page>' + #temp += '</page>' return temp # reads the markdown source file @@ -106,11 +135,16 @@ def write_file(): def main(): input_md = read_file() - output.append(write_header()) + #output.append(write_header()) # instantiate the parser and fed it some HTML parser = MyHTMLParser() parser.feed(input_md) + + # add section links + if 'section' in output: + output.append('links type="section"') + output.append(write_footer()) write_file() -- cgit v0.9.1