blob: a9dc662cdffeca0db522e422adff7f909087d927 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
#!/usr/bin/python
#clean up tags in html conversion of Siyavula module
#write cleaned up version as source.txt in source folder
from path import path
import os, sys, subprocess
from BeautifulSoup import BeautifulSoup
from optparse import OptionParser
from cvtFile import Cvt
SOURCE = path('../')
def makesoup(txtin):
soup = BeautifulSoup(txtin,fromEncoding="utf-8")
return soup
#use BeautifulSoup to clean up tags
def applyFix(soup):
#remove <head>
try:
soup.head.extract()
except:
pass
#perform conversions
cvt = Cvt(soup)
for method in cvt.processlist:
cvt.methods[method]()
#set up txt
txtout = cvt.soup.prettify()
#return
return txtout
parser = OptionParser(usage="Usage: %prog [options] file")
(options, args) = parser.parse_args()
if not args:
print 'Specify a folder and module (e.g. Z4 z4m1 arguments.'
parser.print_help()
sys.exit(1)
SUBJECT = args[0]
COURSE = args[1]
MODULE = args[2]
tag = 'page-break-before: always">'
basepath = SOURCE / SUBJECT / COURSE / MODULE
fin = open(basepath / MODULE + '.html','r')
txt = fin.read()
fin.close()
txt = txt.replace('\n',' ')
txtout = ''
while txt.find(tag) > -1:
pos = txt.find(tag)
txtin = txt[:pos+len(tag)]
txt = txt[pos+len(tag):]
if txtout:
txtout += '\n\n<hr />\n\n'
#set up soup
soup = makesoup(txtin)
txtout += applyFix(soup)
fout = open(basepath / 'source.txt','w')
fout.write(txtout)
fout.close
|