diff options
author | Martin Langhoff <martin@laptop.org> | 2010-11-03 20:22:24 (GMT) |
---|---|---|
committer | Martin Langhoff <martin@laptop.org> | 2010-11-03 20:22:24 (GMT) |
commit | 6af768c3b75e0c790fbd6ce09f9f83105948323b (patch) | |
tree | a760b3ae50539286ce6d5b83ae9e19040079a366 | |
parent | 5c5a19550ea5a9e1caf5b96dde528d4672640a5e (diff) |
Tidy up of the merge and process updates scripts.
-rwxr-xr-x | tools/mergeupdates.py | 8 | ||||
-rwxr-xr-x | woip/sh/process-updates | 31 |
2 files changed, 38 insertions, 1 deletions
diff --git a/tools/mergeupdates.py b/tools/mergeupdates.py index 4533e2c..0462a47 100755 --- a/tools/mergeupdates.py +++ b/tools/mergeupdates.py @@ -9,6 +9,7 @@ END_TEXT = chr(3) def process_article(title, text): fpath = os.path.join(wikidir, title) if os.path.exists(fpath): + sys.stderr.write('Merging %s\n' % fpath) fc = open(fpath).read() fc = re.sub('^\n+', '', fc) fc = re.sub('\n+$', '', fc) @@ -22,19 +23,24 @@ def process_article(title, text): buf = '' mode = 'title' -wikidir = sys.argv[1] +wikidir = os.path.join(sys.argv[1], 'wiki') +if not os.path.exists(wikidir): + print "Does not exist: " + wikidir + sys.exit(1) while True: b = sys.stdin.read(1) if not b: break if b == START_HEADING: + #sys.stderr.write('d start heading\n') pass elif b == START_TEXT: buf = re.sub('^\n+', '', buf) title = buf.split('\n')[0] bytes = buf.split('\n')[1] buf = '' + #sys.stderr.write('d start text\n') elif b == END_TEXT: buf = re.sub('^\n+', '', buf) buf = re.sub('\n+$', '', buf) diff --git a/woip/sh/process-updates b/woip/sh/process-updates new file mode 100755 index 0000000..57258c5 --- /dev/null +++ b/woip/sh/process-updates @@ -0,0 +1,31 @@ +#!/bin/sh + +set -e + +if [ ! -n "$1" ]; then + echo "process <file.processed>" + exit 1 +else + file=`readlink -f $1` +fi + +# drop the trailing '.processed' +basename=${file%.processed} + +ifile="$basename.index.txt" +echo "Creating index..." +pushd woip/rb +ruby ./index.rb $file > $ifile + +sfile="$basename.locate.db" +echo "Creating locate index..." +cat $ifile | LC_ALL=C /usr/libexec/locate.mklocatedb > $sfile + +spfile="$basename.locate.prefixdb" +echo "Creating locate prefix index..." +../c/lsearcher -f $sfile -c $spfile -n + +bfile="$basename.blocks.db" +echo "Creating block index" +../c/bzipreader -f $ofile -l | awk '{print $2;}' | ../c/blocks $bfile + |