import codecs, optparse, os parser = optparse.OptionParser() parser.add_option("-f", "--file", dest="src_path", help="Path to the .rst file to clean") parser.add_option("-r", "--replace", dest="replace", default=False, help="Replace previous file with clean one?") (options, args) = parser.parse_args() if not options.src_path: print '-f option is required' exit(0) elif not os.path.exists(options.src_path): print 'source path not found' exit(0) if options.replace: dest_path = options.src_path else: dest_path = '__clean.rst' content = None with codecs.open(options.src_path, 'rb', 'utf-8') as f: content = f.read().split('\n\n') def _join(lines): row = '' for l in lines.split('\n'): l = l.strip() if l.startswith('==')\ or l.startswith('--')\ or l.startswith('^^')\ or l.startswith('~~'): row += '\n%s' % l elif l.endswith('-'): row += l.replace('-', '') else: row += '%s ' % l return row with codecs.open(dest_path, 'wb', 'utf-8') as f: f.write('\n\n'.join([_join(lines.strip()) for lines in content]))