diff options
Diffstat (limited to 'woip/rb/xapian-index.rb')
-rw-r--r-- | woip/rb/xapian-index.rb | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/woip/rb/xapian-index.rb b/woip/rb/xapian-index.rb new file mode 100644 index 0000000..2f06fa4 --- /dev/null +++ b/woip/rb/xapian-index.rb @@ -0,0 +1,30 @@ +require 'xapian' + +db = Xapian::WritableDatabase.new(ARGV[0], Xapian::DB_CREATE_OR_OPEN) +stem = Xapian::TermGenerator.new() +f = File.open(ARGV[1], 'r') +processed = 0 + +begin + while (line = f.readline) + begin + split = line.split("|") + next if split.first == "" + + doc = Xapian::Document.new + doc.data = line + doc.add_posting(split.first.downcase, 1) + db.add_document(doc) + processed += 1 + + if processed % 100 == 0 + $stderr.puts "#{processed}\t#{split.first}" + end + rescue + puts line + raise $! + end + end +rescue EOFError + $stderr.puts "Done" +end |