diff options
author | James Simmons <jim@simmons.olpc> | 2009-11-29 22:34:11 (GMT) |
---|---|---|
committer | James Simmons <jim@simmons.olpc> | 2009-11-29 22:34:11 (GMT) |
commit | 720affc15736a2a7cac2fd1c882027c37f5d594f (patch) | |
tree | b320b40364dcfd72c3e6d339159989acff3ec8f9 /pgconvert.py | |
parent | 35bf417b8f5d36cc389ddb7ad25c07bf31008220 (diff) |
modified: ReadEtextsActivity.py
modified: pgconvert.py
Modify automatic conversion of PG books to books
without newlines at the ends of lines so that
books with really long paragraphs are left
unconverted.
Diffstat (limited to 'pgconvert.py')
-rwxr-xr-x | pgconvert.py | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/pgconvert.py b/pgconvert.py index a0d47df..f3bd198 100755 --- a/pgconvert.py +++ b/pgconvert.py @@ -19,7 +19,11 @@ import getopt import sys -# This is a script to take the a file in PG format and convert it to a text file readable by Read Etexts that does +LINE_LENGTH = 80 +MAX_PARAGRAPH_LINES = 25 +MAX_LENGTH = (LINE_LENGTH * MAX_PARAGRAPH_LINES) + +# This is a script to take the a file in PG format and convert it to a text file readable by Read Etexts that do # not have newlines at the end of each line. # My first attempt to make a pg converter that would remove unneeded line endings from PG files @@ -43,6 +47,7 @@ def convert(file_path, output_path): out = open(output_path, 'w') out.write('\t\t\t\t\r\n') previous_line_length = 0 + paragraph_length = 0 while pg_file: line = pg_file.readline() @@ -52,24 +57,35 @@ def convert(file_path, output_path): if len(line) == 2 and not previous_line_length == 2: # Blank line separates paragraphs outline = line + '\r\n' + paragraph_length = 0 elif len(line) == 2 and previous_line_length == 2: outline = line + paragraph_length = 0 elif line[0] == ' ' or (line[0] >= '0' and line[0] <= '9'): outline = '\r\n' + line[0:len(line)-2] + paragraph_length = 0 else: outline = line[0:len(line)-2] + ' ' + paragraph_length = paragraph_length + len(outline) out.write(outline) previous_line_length = len(line) + if paragraph_length > MAX_LENGTH: + break pg_file.close() out.close() print "All done!" + if paragraph_length > MAX_LENGTH: + return False + else: + return True if __name__ == "__main__": try: opts, args = getopt.getopt(sys.argv[1:], "") if check(args[0]): print 'It has NOT been converted yet.' - convert(args[0], args[1]) + success = convert(args[0], args[1]) + print 'Success', success else: print 'It is ALREADY converted.' except getopt.error, msg: |