diff options
Diffstat (limited to 'dictionary.py')
-rw-r--r-- | dictionary.py | 181 |
1 files changed, 107 insertions, 74 deletions
diff --git a/dictionary.py b/dictionary.py index b962714..a7ad273 100644 --- a/dictionary.py +++ b/dictionary.py @@ -2,91 +2,124 @@ import sys # Provides the API to control the dictionary. -global __root__ -global __element__ -global __word__ -global __def__ -global __phnm__ -global __pos__ -global __src__ -global __fld__ global __debug -global __numword -global __DBName -global __DBTableName +global DBname +global word_list -__root__ = "dictionary" -__element__ = "p" -__word__ = "hw" -__def__ = "def" -__phnm__ = "pr" -__pos__ = "pos" -__src__ = "source" -__fld__ = "fld" -__debug = True -__numword = -1 -__DBName = "/tmp/las" -__DBTableName = "dictionary" #Currently not used +__debug = True +DBname = "dict.db" +word_list = [] #strings which are tag in XML class dictionary: - def __init__(self, filename): - from xml.etree.ElementTree import ElementTree - #create an ElementTree instance from an XML file - self.ETree = ElementTree(file=filename) - if self.ETree.getroot().tag != __root__: - print "Invalid File" - sys.exit(0) - - def getroottag(self): - return self.ETree.getroot().tag - def getnumwords(self): - self.iter = self.ETree.getiterator(__element__) - __numword = len(self.iter) - return __numword - def loadict(self): - self.MakeDB() - for element in self.iter: - if element.getchildren(): - #Can also use: "for child in element.getchildren():" - tempDict = {} - tempDict["word"] = "" - tempDict["def"] = "" - tempDict["phnm"] = "" - tempDict["src"] = "" - tempDict["pos"] = "" - for child in element: - #Child element tag name - if child.tag == __word__: - tempDict["word"] = child.text - elif child.tag == __def__: - tempDict["def"] = child.text - elif child.tag == __phnm__: - tempDict["phnm"] = child.text - elif child.tag == __src__: - tempDict["src"] = child.text - elif child.tag == __pos__: - tempDict["pos"] = child.text - t = ( tempDict["word"],tempDict["def"],tempDict["phnm"],tempDict["src"],tempDict["pos"] ) - self.c.execute( "insert into dict values (?, ? , ?, ?, ?)", t ) - self.conn.commit() - self.c.close() - self.conn.close() + def __init__(self, DBname): - def MakeDB(self): import sqlite3 + self.conn = sqlite3.connect(DBname, isolation_level=None) + # Turn on autocommit mode + # Set isolation_level to "IMMEDIATE" + self.conn.isolation_level = "IMMEDIATE" + self.cur = self.conn.cursor() + self.numwords = -1 + self.wordid_list = [] + self.level = 0 + + def getnumwords(self, level = 0): + if self.numwords == -1: + if level == 0: + self.cur.execute("SELECT COUNT(wordid) from las_word") + else: + self.cur.execute("SELECT COUNT(wordid) from las_word where length = ?", (level, )) + self.numwords = self.cur.fetchone() + return self.numwords + + + def getrandomwordid(self, level=0): + if self.wordid_list == [] or self.level != level: + if level == 0: + self.cur.execute("SELECT wordid from las_word") + else: + self.level = level + self.cur.execute("SELECT wordid from las_word where length = ?", (level, )) + self.wordid_list = self.cur.fetchall() + #count = self.wordid_list.count + count = len(self.wordid_list) import random - self.conn = sqlite3.connect("las.db") - self.c = self.conn.cursor() - self.c.execute('''DROP TABLE IF EXISTS dict''') - self.c.execute('''CREATE table IF NOT EXISTS dict (word text, def text, phnm text, src text, pos text)''') + randid = random.randint(0,count) + (id,) = self.wordid_list[randid] + return id + +class word: + + def __init__(self, identifier, value): + import sqlite3 + self.conn = sqlite3.connect(DBname, isolation_level=None) + # Turn on autocommit mode + # Set isolation_level to "IMMEDIATE" + self.conn.isolation_level = "IMMEDIATE" + self.cur = self.conn.cursor() + if identifier == "las_word_id": + self.las_word_id = value + self.cur.execute("SELECT * from las_word where laswid = ?", (value,)) + elif identifier == "wordid": + self.wordid = value + self.cur.execute("SELECT * from las_word where wordid = ?", (value,)) + elif identifier == "word": + self.word = value + self.cur.execute("SELECT * from las_word where lemma = ?", (value,)) + else: + return "Invalid Usage" + + (laswid, wordid, lemma, length) = self.cur.fetchone() + self.las_word_id = laswid + self.wordid = wordid + self.word = lemma + self.length = length + + def getword(self): + return self.word + + def getsynsetid(self): + self.synsetid_list = [] + self.cur.execute("SELECT * from las_sense where wordid = ?", (self.wordid,)) + for (wordid, synsetid, rank) in self.cur: + self.synsetid_list.append(synsetid) + return self.synsetid_list + + def getdef(self): + self.def_list = [] + if self.synsetid_list == []: + self.getsynsetid() + for synsetid in self.synsetid_list: + self.cur.execute("SELECT * from las_synset where synsetid = ?", (synsetid,) ) + for (synsetid, pos, defination) in self.cur: + self.def_list.append( (synsetid, pos, defination)) + return self.def_list + + def getusage(self): + if self.synsetid_list == []: + self.getsynsetid() + self.usage_list = [] + for synsetid in self.synsetid_list: + self.cur.execute("SELECT * from las_sample where synsetid = ?", (synsetid,)) + for (synsetid, sampleid, sample) in self.cur: + self.usage_list.append( (synsetid, sampleid, sample)) + return self.usage_list + if __name__ == "__main__": - k = dictionary("dict/k.xml") - print k.getroottag() - print k.getnumwords() - k.loadict()
\ No newline at end of file + k = dictionary("dict.db") + num_words = k.getnumwords() + print num_words + + wordid = k.getrandomwordid(15) + l = word("wordid", wordid ) + + print l.getword() + l.getsynsetid() + print l.getdef() + print l.getusage()
\ No newline at end of file |