]> git.llucax.com Git - software/pymin.git/blob - ucsv.py
Add a module to serialize a set of tabular data as a CSV file.
[software/pymin.git] / ucsv.py
1 # vim: set encoding=utf-8 et sw=4 sts=4 :
2
3 import csv
4 import codecs
5 try:
6     from cStringIO import StringIO
7 except ImportError:
8     from StringIO import StringIO
9
10 r"""
11 CSV parsing and writing supporting unicode and encodings.
12
13 This module is copied from Python 2.5 csv module documentation:
14 http://docs.python.org/lib/csv-examples.html
15
16 It's adapted to work, at least, on Python 2.4.
17 """
18
19 class UTF8Recoder:
20     """
21     Iterator that reads an encoded stream and reencodes the input to UTF-8
22     """
23     def __init__(self, f, encoding):
24         self.reader = codecs.getreader(encoding)(f)
25
26     def __iter__(self):
27         return self
28
29     def next(self):
30         return self.reader.next().encode("utf-8")
31
32 class UnicodeReader:
33     """
34     A CSV reader which will iterate over lines in the CSV file "f",
35     which is encoded in the given encoding.
36     """
37
38     def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
39         f = UTF8Recoder(f, encoding)
40         self.reader = csv.reader(f, dialect=dialect, **kwds)
41
42     def next(self):
43         row = self.reader.next()
44         return [unicode(s, "utf-8") for s in row]
45
46     def __iter__(self):
47         return self
48
49 class UnicodeWriter:
50     """
51     A CSV writer which will write rows to CSV file "f",
52     which is encoded in the given encoding.
53     """
54
55     def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
56         # Redirect output to a queue
57         self.queue = StringIO()
58         self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
59         self.stream = f
60         if hasattr(codecs, 'getincrementalencoder'):
61             self.encoder = codecs.getincrementalencoder(encoding)()
62         else:
63             class E:
64                 def __init__(self, encoding):
65                     self.encoding = encoding
66                 def encode(self, obj):
67                     return codecs.encode(obj, encoding)
68             self.encoder = E(encoding)
69
70     def writerow(self, row):
71         self.writer.writerow([unicode(s).encode("utf-8") for s in row])
72         # Fetch UTF-8 output from the queue ...
73         data = self.queue.getvalue()
74         data = data.decode("utf-8")
75         # ... and reencode it into the target encoding
76         data = self.encoder.encode(data)
77         # write to the target stream
78         self.stream.write(data)
79         # empty queue
80         self.queue.truncate(0)
81
82     def writerows(self, rows):
83         for row in rows:
84             self.writerow(row)
85
86 writer = UnicodeWriter
87
88 reader = UnicodeReader
89
90
91 if __name__ == '__main__':
92
93     sio = StringIO()
94
95     writer = writer(sio)
96     writer.writerows([[u"adfj", u"ñjdfhk"], [u"áalskdjal", u"1uas"]])
97
98     print sio.getvalue()
99
100     sio.seek(0)
101
102     for row in reader(sio):
103         print row
104