From 6c3c6a0cce460f1560a597b9d02d6284a156c13d Mon Sep 17 00:00:00 2001
From: Gonzalo Odiard <godiard@sugarlabs.org>
Date: Thu, 25 Nov 2010 06:11:48 +0000
Subject: Don't choke when downloading a URL containing non-UTF-8 characters (OLPC#8857)

A URL can contain arbitrary characters; there's no guarantee they are encoded
using UTF-8. Using the encoding of the page that contains the URL is as good a
guess as any.

Instead of throwing an exception and failing to download the file, we will now
record a "garbled" file name in the Journal if there's an encoding mismatch.
---
diff --git a/downloadmanager.py b/downloadmanager.py
index 82afa0f..2337779 100644
--- a/downloadmanager.py
+++ b/downloadmanager.py
@@ -296,7 +296,12 @@ class Download:
         if self._display_name:
             return self._display_name
         else:
-            path = urlparse.urlparse(self._source.spec).path
+            uri = self._source
+            if uri == None:
+                return ''
+            cls = components.classes['@mozilla.org/intl/texttosuburi;1']
+            texttosuburi = cls.getService(interfaces.nsITextToSubURI)
+            path = texttosuburi.unEscapeURIForUI(uri.originCharset, uri.spec)
             location, file_name = os.path.split(path)
             file_name = urllib.unquote(file_name.encode('utf-8', 'replace'))
             return file_name
--
cgit v0.9.1