diff options
author | Andi_G <andigros72@googlemail.com> | 2011-08-17 03:17:08 (GMT) |
---|---|---|
committer | Andi_G <andigros72@googlemail.com> | 2011-08-17 03:17:08 (GMT) |
commit | 0b12f9a6ec2409bdaf0fb355c77e7915d3507f54 (patch) | |
tree | c94a7e895ec0c215597d5be03d3453400654b558 | |
parent | 620f33411bb91181797eb1050e7906246a73e065 (diff) |
started to add non-url text identification
-rw-r--r-- | readdb.py | 61 |
1 files changed, 36 insertions, 25 deletions
@@ -62,7 +62,7 @@ def _init_db(): if not os.path.exists(dbpath): conn = sqlite3.connect(dbpath) - #conn.execute("DROP TABLE annotations") + conn.execute("DROP TABLE annotations") conn.execute("CREATE TABLE annotations (id INTEGER PRIMARY KEY, md5, page, title, content, bodyurl, texttitle, textcreator, created TIMESTAMP, modified TIMESTAMP, creator, annotates, color, local, mimetype, uuid, annotationurl)") conn.execute("CREATE TABLE annuserid (username, userid)") @@ -188,6 +188,8 @@ class AnnotationManager: self.current_annotation = None self._userid = '' self._filehash = filehash + self._texttitle = '' + self._textcreator = '' self._uuid = filehash self._mimetype = mimetype self._annotitle = '' @@ -205,16 +207,13 @@ class AnnotationManager: self._modified = '' self._bodysvg = '' self._id = '' - self._rdf = '' self.modifiedtolerance = 10 #self._annotationserver='http://localhost/anno/index.php' self._annotationserver='http://anno.treehouse.su/anno/index.php' #self._annotationserver='http://www.andreasgros.net/wp-content/plugins/annotation/annotation.php' - self.get_etext_url() + self.get_etext_metadata() self._to_delete = [] - self._texttitle = '' - self._textcreator = '' self._annojson = '' self.remotecreators = [] self.remotecolors = {} @@ -564,23 +563,31 @@ class AnnotationManager: - def get_etext_url(self): - url_re = re.compile('Link:\s+(http.*)') - count = 0 - (results,count) = datastore.find({'mime_type' : ['application/epub+zip', 'application/pdf', mime.GENERIC_TYPE_TEXT]}, ['mime_type', 'checksum', 'description']) + def get_etext_metadata(self): + url_re = re.compile('Link:\s+(http.*)') + count = 0 + url = "" + (results,count) = datastore.find({'mime_type' : ['application/epub+zip', 'application/pdf', mime.GENERIC_TYPE_TEXT]}, ['mime_type', 'checksum', 'description', 'title', 'author', 'publisher']) if count > 0: for r in results: m = r.get_metadata() if m['checksum'] == self._filehash: - url = m['url'] - if len(url) == 0: - if len(m['description']) > 0: - t = url_re.search(m['description']) - if t: - url = t.groups()[0].strip() - + if 'title' in m.keys(): + self._texttitle = m['title'] + if 'author' in m.keys(): + self._textcreator = m['author'] + if 'url' in m.keys(): + url = m['url'] + if len(url) == 0: + if len(m['description']) > 0: + t = url_re.search(m['description']) + if t: + url = t.groups()[0].strip() + self._annotates = url _logger.debug('found url %s - self._annotates' % self._annotates) + _logger.debug('author: %s' % self._textcreator) + _logger.debug('title: %s' % self._texttitle) @@ -615,9 +622,10 @@ class AnnotationManager: annotations = [] annojson = "" _logger.debug("contacting annotationserver %s", url) - if self._annotates == "": - self._annotates = "_" - values = {'w3c_hasTarget' : self._annotates} + #if self._annotates == "": + # self._annotates = "_" + #values = {'w3c_hasTarget' : self._annotates} + values = {'w3c_hasTarget' : self._filehash} _logger.debug('download annotations -- annotates is: %s ' % self._annotates) try: data = urllib.urlencode(values) @@ -678,15 +686,17 @@ class AnnotationManager: def sync_annotations(self): - url = self._annotationserver + url = self._annotationserver annotations = [] + annojson = None _logger.debug("contacting annotationserver %s", url) - if self._annotates == "": - self._annotates = self._texttitle + #if self._annotates == "": + # self._annotates = self._texttitle #check if there are annotations to be deleted: if len(self._to_delete) > 0: for delete_anid in self._to_delete: - values = {'w3c_hasTarget' : self._annotates, 'delete_anid': delete_anid } + #values = {'w3c_hasTarget' : self._annotates, 'delete_anid': delete_anid } + values = {'w3c_hasTarget' : self._filehash, 'delete_anid': delete_anid } try: data = urllib.urlencode(values) req = urllib2.Request(url, data) @@ -704,7 +714,8 @@ class AnnotationManager: self._userid = self.get_userid_for_username( self.get_user_string( user ) ) self._creator = self._userid - values = {'w3c_hasTarget' : self._annotates, 'creator' : self._creator } + #values = {'w3c_hasTarget' : self._annotates, 'creator' : self._creator } + values = {'w3c_hasTarget' : self._filehash, 'creator' : self._creator } _logger.debug('sync annotations -- annotates is: %s ' % self._annotates) try: data = urllib.urlencode(values) @@ -715,7 +726,7 @@ class AnnotationManager: except Exception, detail: _logger.debug("readdb: failure at initial sync request f. annotations; detail: %s ", detail) - if (annojson != None) and (len(annojson) > 0): + if ( not annojson == None ) and ( len( annojson ) > 0 ): anno_arr = self.parse_annotations(annojson) _logger.debug('length anno_arr %d', len(anno_arr)) remote_uuids = [] |