Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/woip/rb/xapian-index.rb
diff options
context:
space:
mode:
Diffstat (limited to 'woip/rb/xapian-index.rb')
-rw-r--r--woip/rb/xapian-index.rb30
1 files changed, 30 insertions, 0 deletions
diff --git a/woip/rb/xapian-index.rb b/woip/rb/xapian-index.rb
new file mode 100644
index 0000000..2f06fa4
--- /dev/null
+++ b/woip/rb/xapian-index.rb
@@ -0,0 +1,30 @@
+require 'xapian'
+
+db = Xapian::WritableDatabase.new(ARGV[0], Xapian::DB_CREATE_OR_OPEN)
+stem = Xapian::TermGenerator.new()
+f = File.open(ARGV[1], 'r')
+processed = 0
+
+begin
+ while (line = f.readline)
+ begin
+ split = line.split("|")
+ next if split.first == ""
+
+ doc = Xapian::Document.new
+ doc.data = line
+ doc.add_posting(split.first.downcase, 1)
+ db.add_document(doc)
+ processed += 1
+
+ if processed % 100 == 0
+ $stderr.puts "#{processed}\t#{split.first}"
+ end
+ rescue
+ puts line
+ raise $!
+ end
+ end
+rescue EOFError
+ $stderr.puts "Done"
+end