From 85236e57acb3c2f2533c00ea06b3afb6cffb0649 Mon Sep 17 00:00:00 2001 From: Chris Ball Date: Sun, 09 Mar 2008 20:04:31 +0000 Subject: Initial import --- (limited to 'LanguageModel.py') diff --git a/LanguageModel.py b/LanguageModel.py new file mode 100644 index 0000000..abe63bd --- /dev/null +++ b/LanguageModel.py @@ -0,0 +1,66 @@ +# Copyright 2008 Chris Ball. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +from __future__ import with_statement +import time + +class LanguageModel(): + def SetLanguages(self, lang1, lang2): + """Take a language pair, prepare the language model.""" + self.lang1_lang2 = {} + self.lang2_lang1 = {} + + # Since we only have English->lang mappings, ignore lang1. + filename = "lang/" + lang2 + ".txt" + with open(filename, 'r') as f: + for line in f.readlines(): + line = line.rstrip() + if line.startswith("#"): + continue + + words_list = line.split('\t') + if words_list[0] in self.lang1_lang2: + self.lang1_lang2 [ words_list[0].lower() ] += ", " + words_list[-1].lower() + else: + self.lang1_lang2 [ words_list[0].lower() ] = words_list[-1].lower() + + if words_list[-1] in self.lang2_lang1: + self.lang2_lang1 [ words_list[-1].lower() ] += ", " + words_list[0].lower() + else: + self.lang2_lang1 [ words_list[-1].lower() ] = words_list[0].lower() + + def GetSuggestions(self, string): + """Take a string, provide two lists of possible each lang completions.""" + list_1 = [k for k in self.lang1_lang2.iterkeys() if k.startswith(string)] + list_2 = [k for k in self.lang2_lang1.iterkeys() if (k.startswith(string) or k.rfind(" " + string) > -1)] + return [sorted(list_1), sorted(list_2)] + + def GetTranslations(self, lang, string): + """Take a word and lang (0 for first, 1 for second), provide a list + (empty allowed) of translations.""" + if lang == 0: # lang1 is source + trans_list = [self.lang1_lang2[string]] + elif lang == 1: # lang2 is source + trans_list = [self.lang2_lang1[string]] + else: + raise AssertionError("lang must be 0 or 1") + return trans_list + +if __name__ == "__main__": + a = LanguageModel() + a.SetLanguages("English", "Spanish") + print a.GetSuggestions("tru") + print a.GetTranslations(0, "dog") -- cgit v0.9.1