Ticket #534: fsencoding.py

File fsencoding.py, 2.9 KB (added by zooko, at 2009-04-28T18:11:37Z)

PEP 383'ish implementation of listdir()

Line 
1# A wrapper around the Python Standard Library's filename access functions to
2# provide a uniform API for all platforms and to prevent lossy en/de-coding.
3
4class Fname:
5    def __init__(self, name, failed_decode=False, alleged_encoding=None):
6        self.name = name
7        self.failed_decode = failed_decode
8        self.alleged_encoding = alleged_encoding
9
10if platform.system() in ('Linux', 'Solaris'):
11    # on byte-oriented filesystems, such as Linux and Solaris
12
13    def unicode_to_fs(fn):
14        """ Encode an unicode object to bytes. """
15        precondition(isinstance(fn, Fname), fn)
16        precondition(isinstance(fn.name, unicode), fn.name)
17
18        if fn.failed_decode:
19            # This means that the unicode string in .name is not actually the
20            # result of a successful decoding with a suggested codec, but is
21            # instead the result of stuffing the bytes into a unicode by dint
22            # of the utf-8b trick.  This means that on a byte-oriented system,
23            # you shouldn't treat the .name as a unicode string containing
24            # chars, but instead you should get the original bytes back out of
25            # it.
26            return fn.name.encode('utf-8b', 'python-replace')
27        else:
28            fsencoding = sys.getfilesystemencoding()
29            if fsencoding in (None, '', 'ascii', 'utf-8'):
30                fsencoding = 'utf-8b'
31            try:
32                return fn.name.encode(encoding, 'python-escape')
33            except UnicodeEncodeError:
34                raise usage.UsageError("Filename '%s' cannot be encoded using  \
35the current encoding of your filesystem (%s). Please configure your locale \
36correctly or rename this file." % (s, sys.getfilesystemencoding()))
37
38    def fs_to_unicode(bytesfn):
39        """ Decode bytes from the filesystem to a unicode object. """
40        precondition(isinstance(bytesfn, str), str)
41
42        alleged_encoding = sys.getfilesystemencoding()
43        if alleged_encoding in (None, '', 'ascii', 'utf-8'):
44            alleged_encoding = 'utf-8b'
45           
46        try:
47            unicodefn = bytesfn.decode(alleged_encoding, 'strict')
48        except UnicodeEncodeError:
49            unicodefn = bytesfn.decode('utf-8b', 'python-escape')
50            return Fname(unicodefn)
51        else:
52            unicodefn = unicodedata.normalize('NFC', unicodefn)
53            if alleged_encoding == 'utf-8b':
54                return Fname(unicodefn)
55            else:
56                return Fname(unicodefn, alleged_encoding)
57
58    def listdir(fn):
59        assert isinstance(fn, Fname), fn
60        assert isinstance(fn.name, unicode), fn.name
61        bytesfn = unicode_to_fs(fn.name)
62        res = os.listdir(bytesfn)
63        return([fs_to_unicode(fn) for fn in res])
64
65else:
66    # on unicode-oriented filesystems, such as Mac and Windows
67    def listdir(fn):
68        assert isinstance(fn, Fname), fn
69        assert isinstance(fn.name, unicode), fn.name
70        return [Fname(n) for n in os.listdir(fn.name)]