' tags next_tag = '' # hold open tags, (such as page, section) until the respective block is over open_tags = [] # create a subclass and override the handler methods class MyHTMLParser(HTMLParser): def handle_starttag(self, tag, attrs): output.append(interpret_starttag(tag, attrs)) def handle_endtag(self, tag): output.append(interpret_endtag(tag)) def handle_data(self, data): output.append(parse_data(data)) #converts html end tags to mallard tags def interpret_endtag(tag): temp = '</' if tag.lower() in tag_conversion.keys(): if 'ignore_end' in tag_conversion[tag].keys() and tag_conversion[tag]['ignore_end']: return '' temp += tag_conversion[tag]['name'] else: temp += tag temp += '>' return temp #returns attributes in a dictionary as a text def attrib2text(attrib): temp = '' for key in attrib: temp += ' ' + key + '="' + attrib[key] + '"' return temp # converts html start tags to mallard tags def interpret_starttag(tag, attrs): global next_tag global open_tags temp = '' # if the current tag is similar to the currently open tag, first close the previous tag if open_tags and open_tags[len(open_tags)-1] == tag: temp += '</' + tag_conversion[tag]['name'] + '>' open_tags.pop() temp += '<' if tag.lower() in tag_conversion.keys(): temp += tag_conversion[tag]['name'] if 'attrib' in tag_conversion[tag].keys(): temp += attrib2text(tag_conversion[tag]['attrib']) if 'get_attrib' in tag_conversion[tag].keys(): for attrib in tag_conversion[tag]['get_attrib']: for a in attrs: if a[0] == attrib: temp += attrib2text({attrib: a[1]}) break if 'next_tag' in tag_conversion[tag].keys(): next_tag = tag_conversion[tag]['next_tag'] if 'ignore_end' in tag_conversion[tag].keys(): open_tags.append(tag) else: temp += tag temp += '>' return temp # parse data according to the previous tag def parse_data(data): global next_tag if(next_tag): data = '<' + next_tag + '>' + data + '</' + next_tag + '>' next_tag = '' return data # writes the header of the mallard file def write_header(): return '<page xmlns="http://projectmallard.org/1.0/" id="index">' # writes the footer of the mallard file def write_footer(): global open_tags temp = '' # close the currently open tags while len(open_tags): temp += '</' + tag_conversion[open_tags.pop()]['name'] + '>' #temp += '</page>' return temp # reads the markdown source file def read_file(): fi = open(sys.argv[1]) temp = '' for line in fi.readlines(): temp += line return temp # writes to page file def write_file(): fi = open(sys.argv[1].replace('.html','.page'), 'w') for line in output: fi.write(line + '\n') fi.flush() fi.close() def main(): input_md = read_file() #output.append(write_header()) # instantiate the parser and fed it some HTML parser = MyHTMLParser() parser.feed(input_md) # add section links if 'section' in output: output.append('links type="section"') output.append(write_footer()) write_file() if __name__ == '__main_

#!/bin/python from HTMLParser import HTMLParser import sys output = [] # name - name of the mallard tag # next_tag - next inline tag (ex: title) # ignore_end - ignore the closing tag (ex: section) # attrib - attributes of the tag # get_attrib - retain the specified attributes from the html tag tag_conversion = { 'h1': {'name': 'page', 'attrib': {'xmlns': 'http://projectmallard.org/1.0/', 'id': 'index'}, 'next_tag': 'title', 'ignore_end': True }, 'h2': {'name': 'section', 'next_tag': 'title', 'ignore_end': True}, 'strong': {'name': 'em'}, 'ul' : {'name': 'list'}, 'ol': {'name': 'list', 'attrib': {'type': 'numbered'}}, 'li' : {'name': 'item', 'next_tag': 'p'}, 'blockquote': {'name': 'quote'}, 'a': {'name': 'link', 'get_attrib': ['href']} } # next data should be in this tag, for example if the starting tag is 'section' the next data should be with in '' tags next_tag = '' # hold open tags, (such as page, section) until the respective block is over open_tags = [] # create a subclass and override the handler methods class MyHTMLParser(HTMLParser): def handle_starttag(self, tag, attrs): output.append(interpret_starttag(tag, attrs)) def handle_endtag(self, tag): output.append(interpret_endtag(tag)) def handle_data(self, data): output.append(parse_data(data)) #converts html end tags to mallard tags def interpret_endtag(tag): temp = '</' if tag.lower() in tag_conversion.keys(): if 'ignore_end' in tag_conversion[tag].keys() and tag_conversion[tag]['ignore_end']: return '' temp += tag_conversion[tag]['name'] else: temp += tag temp += '>' return temp #returns attributes in a dictionary as a text def attrib2text(attrib): temp = '' for key in attrib: temp += ' ' + key + '="' + attrib[key] + '"' return temp # converts html start tags to mallard tags def interpret_starttag(tag, attrs): global next_tag global open_tags temp = '' # if the current tag is similar to the currently open tag, first close the previous tag if open_tags and open_tags[len(open_tags)-1] == tag: temp += '</' + tag_conversion[tag]['name'] + '>' open_tags.pop() temp += '<' if tag.lower() in tag_conversion.keys(): temp += tag_conversion[tag]['name'] if 'attrib' in tag_conversion[tag].keys(): temp += attrib2text(tag_conversion[tag]['attrib']) if 'get_attrib' in tag_conversion[tag].keys(): for attrib in tag_conversion[tag]['get_attrib']: for a in attrs: if a[0] == attrib: temp += attrib2text({attrib: a[1]}) break if 'next_tag' in tag_conversion[tag].keys(): next_tag = tag_conversion[tag]['next_tag'] if 'ignore_end' in tag_conversion[tag].keys(): open_tags.append(tag) else: temp += tag temp += '>' return temp # parse data according to the previous tag def parse_data(data): global next_tag if(next_tag): data = '<' + next_tag + '>' + data + '</' + next_tag + '>' next_tag = '' return data # writes the header of the mallard file def write_header(): return '<page xmlns="http://projectmallard.org/1.0/" id="index">' # writes the footer of the mallard file def write_footer(): global open_tags temp = '' # close the currently open tags while len(open_tags): temp += '</' + tag_conversion[open_tags.pop()]['name'] + '>' #temp += '</page>' return temp # reads the markdown source file def read_file(): fi = open(sys.argv[1]) temp = '' for line in fi.readlines(): temp += line return temp # writes to page file def write_file(): fi = open(sys.argv[1].replace('.html','.page'), 'w') for line in output: fi.write(line + '\n') fi.flush() fi.close() def main(): input_md = read_file() #output.append(write_header()) # instantiate the parser and fed it some HTML parser = MyHTMLParser() parser.feed(input_md) # add section links if 'section' in output: output.append('links type="section"') output.append(write_footer()) write_file() if __name__ == '__main__': main()