diff options
author | Gonzalo Odiard <godiard@sugarlabs.org> | 2010-11-18 21:02:06 (GMT) |
---|---|---|
committer | Sascha Silbe <sascha-pgp@silbe.org> | 2010-11-23 19:28:44 (GMT) |
commit | 81c834f80becf46d9dbb8d1dea89960c6640d289 (patch) | |
tree | b3220f71e4781ad3fa82052dbb683f269a61b589 | |
parent | 3cd239117a300ee7f9752fbad17cd9e5d8793ca3 (diff) |
Don't choke when downloading a URL containing non-UTF-8 characters (OLPC#8857)
A URL can contain arbitrary characters; there's no guarantee they are encoded
using UTF-8. Using the encoding of the page that contains the URL is as good a
guess as any.
Instead of throwing an exception and failing to download the file, we will now
record a "garbled" file name in the Journal if there's an encoding mismatch.
Tested-By: Simon Schampijer <simon@schampijer.de>
Reviewed-By: Sascha Silbe <sascha-pgp@silbe.org>
Acked-By: Lucian Branescu <lucian.branescu@gmail.com>
-rw-r--r-- | downloadmanager.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/downloadmanager.py b/downloadmanager.py index c396317..90d22a5 100644 --- a/downloadmanager.py +++ b/downloadmanager.py @@ -306,9 +306,13 @@ class Download: elif self._source.scheme == 'data': return 'Data URI' else: - path = urlparse.urlparse(self._source.spec).path + uri = self._source + if uri == None: + return '' + cls = components.classes['@mozilla.org/intl/texttosuburi;1'] + texttosuburi = cls.getService(interfaces.nsITextToSubURI) + path = texttosuburi.unEscapeURIForUI(uri.originCharset, uri.spec) location, file_name = os.path.split(path) - file_name = urllib.unquote(file_name.encode('utf-8', 'replace')) return file_name def _create_journal_object(self): |