diff options
author | Aleksey Lim <alsroot@member.fsf.org> | 2009-03-02 03:52:58 (GMT) |
---|---|---|
committer | Walter Bender <walter@walter-laptop.(none)> | 2009-03-02 14:45:13 (GMT) |
commit | 7eeb3fd5bd894b12126a5022c0f303024427a626 (patch) | |
tree | acf36bc13a0c20960543dcf773eef34561529d67 | |
parent | 9a98d3a088ecd32ee988c09e38317771e7dc2cdd (diff) |
Skip removed images
-rw-r--r-- | Processing/Article_Builder.py | 19 | ||||
-rw-r--r-- | book.py | 9 |
2 files changed, 15 insertions, 13 deletions
diff --git a/Processing/Article_Builder.py b/Processing/Article_Builder.py index 0e98c6f..5029150 100644 --- a/Processing/Article_Builder.py +++ b/Processing/Article_Builder.py @@ -4,6 +4,7 @@ from BeautifulSoup import Tag from NewtifulSoup import NewtifulStoneSoup as BeautifulStoneSoup
from Processing.Article.Article_Data import *
import re
+import os
import logging
logger = logging.getLogger('infoslicer')
@@ -126,9 +127,12 @@ def get_article_from_dita(image_path, dita): if tag.parent.name == "p":
source_article_id = article_id
text = image_path + '/' + tag['href']
- picture_data = Picture_Data(source_article_id, text,
- tag['orig_href'])
- sentence_data_list.insert(0, picture_data)
+ if not os.path.exists(text):
+ logger.info('cannot find image %s' % text)
+ else:
+ picture_data = Picture_Data(source_article_id, text,
+ tag['orig_href'])
+ sentence_data_list.insert(0, picture_data)
article_title = input.find("title").renderContents().replace("\n", "").strip()
@@ -141,7 +145,10 @@ def get_article_from_dita(image_path, dita): caption = caption.renderContents().replace("\n", "").strip()
else:
caption = ""
- image_list.append((img['href'], caption, img['orig_href']))
+ if not os.path.exists(os.path.join(image_path, img['href'])):
+ logger.info('cannot find image %s' % img['href'])
+ else:
+ image_list.append((img['href'], caption, img['orig_href']))
data = Article_Data(article_id, article_id, article_title, "theme", section_data_list, image_list)
@@ -154,7 +161,6 @@ def get_dita_from_article(image_path, article): It calls the getData method of the article class to get the article_data representation of the article.
It then constructs the corresponding DITA representation of the article.
"""
- image_sources = {}
article_data = article.getData()
output = BeautifulStoneSoup("<?xml version='1.0' encoding='utf-8'?><!DOCTYPE reference PUBLIC \"-//IBM//DTD DITA IBM Reference//EN\" \"ibm-reference.dtd\"><reference><title>%s</title><prolog></prolog></reference>" % article_data.article_title)
current_ref = output.reference
@@ -200,7 +206,6 @@ def get_dita_from_article(image_path, article): # switch image to relative path
text = sentence.text.replace(image_path, '') \
.lstrip('/')
- image_sources[text.split('/')[0]] = None
image_tag = _tag_generator(output,
"image", attrs=[("href", text),
('orig_href', sentence.orig)])
@@ -225,7 +230,7 @@ def get_dita_from_article(image_path, article): image_list_body.append(image_tag)
dita = output.prettify()
- return (dita, image_sources)
+ return dita
def _tag_generator(soup, name, attrs=[], contents=None):
if attrs != []:
@@ -60,8 +60,8 @@ class Book(gobject.GObject): if entry: content = self._load(entry['uid']) if content: - data = Article_Builder.get_article_from_dita(image_root, - content) + data = Article_Builder.get_article_from_dita( + image_root, content) self._article = Article(data) else: self._article = Article() @@ -253,10 +253,7 @@ class CustomBook(Book): self.find_by_uuid(self._article.uid)['title'] = \ self._article.article_title - contents, image_sources = Article_Builder.get_dita_from_article( + contents = Article_Builder.get_dita_from_article( image_root, self._article) - #for i in image_sources.keys(): - # image_sources[i] = wiki[i]. - self._save(self._article.uid, contents) |