Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/cleanUp.py
diff options
context:
space:
mode:
Diffstat (limited to 'cleanUp.py')
-rwxr-xr-xcleanUp.py60
1 files changed, 60 insertions, 0 deletions
diff --git a/cleanUp.py b/cleanUp.py
new file mode 100755
index 0000000..a9dc662
--- /dev/null
+++ b/cleanUp.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python
+#clean up tags in html conversion of Siyavula module
+#write cleaned up version as source.txt in source folder
+from path import path
+import os, sys, subprocess
+from BeautifulSoup import BeautifulSoup
+from optparse import OptionParser
+from cvtFile import Cvt
+
+SOURCE = path('../')
+
+def makesoup(txtin):
+ soup = BeautifulSoup(txtin,fromEncoding="utf-8")
+ return soup
+
+#use BeautifulSoup to clean up tags
+def applyFix(soup):
+ #remove <head>
+ try:
+ soup.head.extract()
+ except:
+ pass
+ #perform conversions
+ cvt = Cvt(soup)
+ for method in cvt.processlist:
+ cvt.methods[method]()
+ #set up txt
+ txtout = cvt.soup.prettify()
+ #return
+ return txtout
+
+parser = OptionParser(usage="Usage: %prog [options] file")
+(options, args) = parser.parse_args()
+if not args:
+ print 'Specify a folder and module (e.g. Z4 z4m1 arguments.'
+ parser.print_help()
+ sys.exit(1)
+
+SUBJECT = args[0]
+COURSE = args[1]
+MODULE = args[2]
+tag = 'page-break-before: always">'
+basepath = SOURCE / SUBJECT / COURSE / MODULE
+fin = open(basepath / MODULE + '.html','r')
+txt = fin.read()
+fin.close()
+txt = txt.replace('\n',' ')
+txtout = ''
+while txt.find(tag) > -1:
+ pos = txt.find(tag)
+ txtin = txt[:pos+len(tag)]
+ txt = txt[pos+len(tag):]
+ if txtout:
+ txtout += '\n\n<hr />\n\n'
+ #set up soup
+ soup = makesoup(txtin)
+ txtout += applyFix(soup)
+fout = open(basepath / 'source.txt','w')
+fout.write(txtout)
+fout.close