Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/dictionary.py
diff options
context:
space:
mode:
Diffstat (limited to 'dictionary.py')
-rw-r--r--dictionary.py181
1 files changed, 107 insertions, 74 deletions
diff --git a/dictionary.py b/dictionary.py
index b962714..a7ad273 100644
--- a/dictionary.py
+++ b/dictionary.py
@@ -2,91 +2,124 @@
import sys
# Provides the API to control the dictionary.
-global __root__
-global __element__
-global __word__
-global __def__
-global __phnm__
-global __pos__
-global __src__
-global __fld__
global __debug
-global __numword
-global __DBName
-global __DBTableName
+global DBname
+global word_list
-__root__ = "dictionary"
-__element__ = "p"
-__word__ = "hw"
-__def__ = "def"
-__phnm__ = "pr"
-__pos__ = "pos"
-__src__ = "source"
-__fld__ = "fld"
-__debug = True
-__numword = -1
-__DBName = "/tmp/las"
-__DBTableName = "dictionary" #Currently not used
+__debug = True
+DBname = "dict.db"
+word_list = []
#strings which are tag in XML
class dictionary:
- def __init__(self, filename):
- from xml.etree.ElementTree import ElementTree
- #create an ElementTree instance from an XML file
- self.ETree = ElementTree(file=filename)
- if self.ETree.getroot().tag != __root__:
- print "Invalid File"
- sys.exit(0)
-
- def getroottag(self):
- return self.ETree.getroot().tag
- def getnumwords(self):
- self.iter = self.ETree.getiterator(__element__)
- __numword = len(self.iter)
- return __numword
- def loadict(self):
- self.MakeDB()
- for element in self.iter:
- if element.getchildren():
- #Can also use: "for child in element.getchildren():"
- tempDict = {}
- tempDict["word"] = ""
- tempDict["def"] = ""
- tempDict["phnm"] = ""
- tempDict["src"] = ""
- tempDict["pos"] = ""
- for child in element:
- #Child element tag name
- if child.tag == __word__:
- tempDict["word"] = child.text
- elif child.tag == __def__:
- tempDict["def"] = child.text
- elif child.tag == __phnm__:
- tempDict["phnm"] = child.text
- elif child.tag == __src__:
- tempDict["src"] = child.text
- elif child.tag == __pos__:
- tempDict["pos"] = child.text
- t = ( tempDict["word"],tempDict["def"],tempDict["phnm"],tempDict["src"],tempDict["pos"] )
- self.c.execute( "insert into dict values (?, ? , ?, ?, ?)", t )
- self.conn.commit()
- self.c.close()
- self.conn.close()
+ def __init__(self, DBname):
- def MakeDB(self):
import sqlite3
+ self.conn = sqlite3.connect(DBname, isolation_level=None)
+ # Turn on autocommit mode
+ # Set isolation_level to "IMMEDIATE"
+ self.conn.isolation_level = "IMMEDIATE"
+ self.cur = self.conn.cursor()
+ self.numwords = -1
+ self.wordid_list = []
+ self.level = 0
+
+ def getnumwords(self, level = 0):
+ if self.numwords == -1:
+ if level == 0:
+ self.cur.execute("SELECT COUNT(wordid) from las_word")
+ else:
+ self.cur.execute("SELECT COUNT(wordid) from las_word where length = ?", (level, ))
+ self.numwords = self.cur.fetchone()
+ return self.numwords
+
+
+ def getrandomwordid(self, level=0):
+ if self.wordid_list == [] or self.level != level:
+ if level == 0:
+ self.cur.execute("SELECT wordid from las_word")
+ else:
+ self.level = level
+ self.cur.execute("SELECT wordid from las_word where length = ?", (level, ))
+ self.wordid_list = self.cur.fetchall()
+ #count = self.wordid_list.count
+ count = len(self.wordid_list)
import random
- self.conn = sqlite3.connect("las.db")
- self.c = self.conn.cursor()
- self.c.execute('''DROP TABLE IF EXISTS dict''')
- self.c.execute('''CREATE table IF NOT EXISTS dict (word text, def text, phnm text, src text, pos text)''')
+ randid = random.randint(0,count)
+ (id,) = self.wordid_list[randid]
+ return id
+
+class word:
+
+ def __init__(self, identifier, value):
+ import sqlite3
+ self.conn = sqlite3.connect(DBname, isolation_level=None)
+ # Turn on autocommit mode
+ # Set isolation_level to "IMMEDIATE"
+ self.conn.isolation_level = "IMMEDIATE"
+ self.cur = self.conn.cursor()
+ if identifier == "las_word_id":
+ self.las_word_id = value
+ self.cur.execute("SELECT * from las_word where laswid = ?", (value,))
+ elif identifier == "wordid":
+ self.wordid = value
+ self.cur.execute("SELECT * from las_word where wordid = ?", (value,))
+ elif identifier == "word":
+ self.word = value
+ self.cur.execute("SELECT * from las_word where lemma = ?", (value,))
+ else:
+ return "Invalid Usage"
+
+ (laswid, wordid, lemma, length) = self.cur.fetchone()
+ self.las_word_id = laswid
+ self.wordid = wordid
+ self.word = lemma
+ self.length = length
+
+ def getword(self):
+ return self.word
+
+ def getsynsetid(self):
+ self.synsetid_list = []
+ self.cur.execute("SELECT * from las_sense where wordid = ?", (self.wordid,))
+ for (wordid, synsetid, rank) in self.cur:
+ self.synsetid_list.append(synsetid)
+ return self.synsetid_list
+
+ def getdef(self):
+ self.def_list = []
+ if self.synsetid_list == []:
+ self.getsynsetid()
+ for synsetid in self.synsetid_list:
+ self.cur.execute("SELECT * from las_synset where synsetid = ?", (synsetid,) )
+ for (synsetid, pos, defination) in self.cur:
+ self.def_list.append( (synsetid, pos, defination))
+ return self.def_list
+
+ def getusage(self):
+ if self.synsetid_list == []:
+ self.getsynsetid()
+ self.usage_list = []
+ for synsetid in self.synsetid_list:
+ self.cur.execute("SELECT * from las_sample where synsetid = ?", (synsetid,))
+ for (synsetid, sampleid, sample) in self.cur:
+ self.usage_list.append( (synsetid, sampleid, sample))
+ return self.usage_list
+
if __name__ == "__main__":
- k = dictionary("dict/k.xml")
- print k.getroottag()
- print k.getnumwords()
- k.loadict() \ No newline at end of file
+ k = dictionary("dict.db")
+ num_words = k.getnumwords()
+ print num_words
+
+ wordid = k.getrandomwordid(15)
+ l = word("wordid", wordid )
+
+ print l.getword()
+ l.getsynsetid()
+ print l.getdef()
+ print l.getusage() \ No newline at end of file