started to add non-url text identification

author: Andi_G <andigros72@googlemail.com> 2011-08-17 03:17:08 (GMT)
committer: Andi_G <andigros72@googlemail.com> 2011-08-17 03:17:08 (GMT)
commit: 0b12f9a6ec2409bdaf0fb355c77e7915d3507f54 (patch)
tree: c94a7e895ec0c215597d5be03d3453400654b558
parent: 620f33411bb91181797eb1050e7906246a73e065 (diff)
1 files changed, 36 insertions, 25 deletions
diff --git a/readdb.py b/readdb.py
index e308203..d0f6542 100644
--- a/readdb.py
+++ b/readdb.py
@@ -62,7 +62,7 @@ def _init_db():
     if not os.path.exists(dbpath):
         conn = sqlite3.connect(dbpath)
 
-        #conn.execute("DROP TABLE annotations")
+        conn.execute("DROP TABLE annotations")
         conn.execute("CREATE TABLE annotations (id INTEGER PRIMARY KEY, md5, page, title, content, bodyurl, texttitle, textcreator, created TIMESTAMP, modified TIMESTAMP, creator, annotates, color, local, mimetype, uuid, annotationurl)")
         
         conn.execute("CREATE TABLE annuserid (username, userid)")
@@ -188,6 +188,8 @@ class AnnotationManager:
         self.current_annotation = None
         self._userid = ''
         self._filehash = filehash
+        self._texttitle = '' 
+        self._textcreator = ''
         self._uuid = filehash
         self._mimetype = mimetype
         self._annotitle = ''
@@ -205,16 +207,13 @@ class AnnotationManager:
         self._modified = '' 
         self._bodysvg = '' 
         self._id = ''
-        self._rdf = ''
         self.modifiedtolerance = 10
         #self._annotationserver='http://localhost/anno/index.php'
         self._annotationserver='http://anno.treehouse.su/anno/index.php'
         #self._annotationserver='http://www.andreasgros.net/wp-content/plugins/annotation/annotation.php'
-        self.get_etext_url()
+        self.get_etext_metadata()
         self._to_delete = []
 
-        self._texttitle = '' 
-        self._textcreator = ''
         self._annojson = ''
         self.remotecreators = []
         self.remotecolors = {}
@@ -564,23 +563,31 @@ class AnnotationManager:
 
 
 
-    def get_etext_url(self):    
-        url_re = re.compile('Link:\s+(http.*)')
-        count = 0
-        (results,count) = datastore.find({'mime_type' : ['application/epub+zip', 'application/pdf', mime.GENERIC_TYPE_TEXT]}, ['mime_type', 'checksum', 'description'])
+    def get_etext_metadata(self):    
+        url_re  = re.compile('Link:\s+(http.*)')
+        count   = 0
+        url     = ""
+        (results,count) = datastore.find({'mime_type' : ['application/epub+zip', 'application/pdf', mime.GENERIC_TYPE_TEXT]}, ['mime_type', 'checksum', 'description', 'title', 'author', 'publisher'])
         if count > 0:
             for r in results:
                 m = r.get_metadata()
                 if m['checksum'] == self._filehash:
-                    url = m['url']
-                    if len(url) == 0:
-                        if len(m['description']) > 0:
-                            t = url_re.search(m['description'])
-                            if t:
-                                url = t.groups()[0].strip()
-                    
+                    if 'title' in m.keys():
+                        self._texttitle = m['title']
+                    if 'author' in m.keys():    
+                        self._textcreator = m['author']
+                    if 'url' in m.keys():    
+                        url = m['url']
+                        if len(url) == 0:
+                            if len(m['description']) > 0:
+                                t = url_re.search(m['description'])
+                                if t:
+                                    url = t.groups()[0].strip()
+                        
                     self._annotates = url
                     _logger.debug('found url %s - self._annotates' % self._annotates)
+                    _logger.debug('author: %s' % self._textcreator)
+                    _logger.debug('title: %s' % self._texttitle)
 
 
 
@@ -615,9 +622,10 @@ class AnnotationManager:
         annotations = []
         annojson = ""
         _logger.debug("contacting annotationserver %s", url)
-        if self._annotates == "":
-            self._annotates = "_"
-        values = {'w3c_hasTarget' : self._annotates}
+        #if self._annotates == "":
+        #    self._annotates = "_"
+        #values = {'w3c_hasTarget' : self._annotates}
+        values = {'w3c_hasTarget' : self._filehash}
         _logger.debug('download annotations -- annotates is: %s ' % self._annotates)
         try:
             data = urllib.urlencode(values)          
@@ -678,15 +686,17 @@ class AnnotationManager:
 
 
     def sync_annotations(self):
-        url = self._annotationserver
+        url         = self._annotationserver
         annotations = []
+        annojson    = None
         _logger.debug("contacting annotationserver %s", url)
-        if self._annotates == "":
-            self._annotates = self._texttitle
+        #if self._annotates == "":
+        #    self._annotates = self._texttitle
         #check if there are annotations to be deleted:
         if len(self._to_delete) > 0:
             for delete_anid in self._to_delete:
-                values = {'w3c_hasTarget' : self._annotates, 'delete_anid': delete_anid }
+                #values = {'w3c_hasTarget' : self._annotates, 'delete_anid': delete_anid }
+                values = {'w3c_hasTarget' : self._filehash, 'delete_anid': delete_anid }
                 try:
                     data = urllib.urlencode(values)          
                     req = urllib2.Request(url, data)
@@ -704,7 +714,8 @@ class AnnotationManager:
                 self._userid = self.get_userid_for_username( self.get_user_string( user ) )
             self._creator = self._userid 
 
-            values = {'w3c_hasTarget' : self._annotates, 'creator' : self._creator }
+            #values = {'w3c_hasTarget' : self._annotates, 'creator' : self._creator }
+            values = {'w3c_hasTarget' : self._filehash, 'creator' : self._creator }
             _logger.debug('sync annotations -- annotates is: %s ' % self._annotates)
             try:
                 data = urllib.urlencode(values)          
@@ -715,7 +726,7 @@ class AnnotationManager:
 
             except Exception, detail: 
                 _logger.debug("readdb: failure at initial sync request f. annotations; detail: %s ", detail) 
-        if (annojson != None) and (len(annojson) > 0):    
+        if ( not annojson == None ) and ( len( annojson ) > 0 ):    
             anno_arr = self.parse_annotations(annojson)  
             _logger.debug('length anno_arr %d', len(anno_arr))
             remote_uuids = []
author	Andi_G <andigros72@googlemail.com>	2011-08-17 03:17:08 (GMT)
committer	Andi_G <andigros72@googlemail.com>	2011-08-17 03:17:08 (GMT)
commit	0b12f9a6ec2409bdaf0fb355c77e7915d3507f54 (patch)
tree	c94a7e895ec0c215597d5be03d3453400654b558
parent	620f33411bb91181797eb1050e7906246a73e065 (diff)