diff options
author | Florent Pigout <florent.pigout@gmail.com> | 2011-12-17 16:11:09 (GMT) |
---|---|---|
committer | Florent Pigout <florent.pigout@gmail.com> | 2011-12-17 16:11:09 (GMT) |
commit | 5a3be3b35185aeffd44f99d6fefeb1edd80486a2 (patch) | |
tree | 84cf25d3b398334f9047a4ffcf52cc1b67e7eec0 | |
parent | b60ebb27b6cac928dc08fa31fbda806cf2621b10 (diff) |
add config and csv tools
-rw-r--r-- | datastore_reader/utils/__init__.py | 0 | ||||
-rw-r--r-- | datastore_reader/utils/config.py | 16 | ||||
-rw-r--r-- | datastore_reader/utils/csv.py | 66 |
3 files changed, 82 insertions, 0 deletions
diff --git a/datastore_reader/utils/__init__.py b/datastore_reader/utils/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/datastore_reader/utils/__init__.py diff --git a/datastore_reader/utils/config.py b/datastore_reader/utils/config.py new file mode 100644 index 0000000..76caed6 --- /dev/null +++ b/datastore_reader/utils/config.py @@ -0,0 +1,16 @@ +# config obj +from ConfigObject import config_module + +def _init_config_obj(): + # init config obj + config_module(__name__, __file__, 'config.ini') + +# singleton flag +__initialized__ = False + +# do init +if __initialized__ is False: + # init config + _init_config_obj() + # update flag + __initialized__ = True diff --git a/datastore_reader/utils/csv.py b/datastore_reader/utils/csv.py new file mode 100644 index 0000000..c31eafb --- /dev/null +++ b/datastore_reader/utils/csv.py @@ -0,0 +1,66 @@ +import csv, codecs, cStringIO + +class UTF8Recoder: + """ + Iterator that reads an encoded stream and reencodes the input to UTF-8 + """ + def __init__(self, f, encoding): + self.reader = codecs.getreader(encoding)(f) + + def __iter__(self): + return self + + def next(self): + return self.reader.next().encode("utf-8") + +class Reader: + """ + A CSV reader which will iterate over lines in the CSV file "f", + which is encoded in the given encoding. + """ + + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + f = UTF8Recoder(f, encoding) + self.reader = csv.reader(f, dialect=dialect, **kwds) + + def next(self): + row = self.reader.next() + return [unicode(s, "utf-8") for s in row] + + def __iter__(self): + return self + +class Writer: + """ + A CSV writer which will write rows to CSV file "f", + which is encoded in the given encoding. + """ + + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() + + def writerow(self, row): + for i, s in enumerate(row): + # ensure string for encoding + if not isinstance(s, str)\ + and not isinstance(s, unicode): + s = str(s) + row[i] = s.encode("utf-8") + self.writer.writerow(row) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data) + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + def writerows(self, rows): + for row in rows: + self.writerow(row) |