Ticket #534: fsencode.py

File fsencode.py, 3.0 KB (added by zooko, at 2009-04-29T06:11:26Z)
Line 
1# A wrapper around the Python Standard Library's filename access functions to
2# provide a uniform API for all platforms and to prevent lossy en/de-coding.
3
4class Fname:
5    def __init__(self, name, failed_decode=False, alleged_encoding=None):
6        self.name = name
7        self.failed_decode = failed_decode
8        self.alleged_encoding = alleged_encoding
9
10if platform.system() in ('Linux', 'Solaris'):
11    # on byte-oriented filesystems, such as Linux and Solaris
12
13    def unicode_to_fs(fn):
14        """ Encode an unicode object to bytes. """
15        precondition(isinstance(fn, Fname), fn)
16        precondition(isinstance(fn.name, unicode), fn.name)
17
18        if fn.failed_decode:
19            # This means that the unicode string in .name is not
20            # actually the result of a successful decoding with a
21            # suggested codec, but is instead the result of stuffing the
22            # bytes into a unicode by dint of the utf-8b trick.  This
23            # means that on a byte-oriented system, you shouldn't treat
24            # the .name as a unicode string containing chars, but
25            # instead you should get the original bytes back out of it.
26            return fn.name.encode('utf-8b', 'python-replace')
27        else:
28            fsencoding = sys.getfilesystemencoding()
29            if fsencoding in (None, '', 'ascii', 'utf-8'):
30                fsencoding = 'utf-8b'
31            try:
32                return fn.name.encode(encoding, 'python-escape')
33            except UnicodeEncodeError:
34                raise usage.UsageError("Filename '%s' cannot be \
35encoded using the current encoding of your filesystem (%s). Please \
36configure your locale correctly or rename this file." %
37                                       (s, sys.getfilesystemencoding()))
38
39    def fs_to_unicode(bytesfn):
40        """ Decode bytes from the filesystem to a unicode object. """
41        precondition(isinstance(bytesfn, str), str)
42
43        alleged_encoding = sys.getfilesystemencoding()
44        if alleged_encoding in (None, '', 'ascii'):
45            alleged_encoding = 'utf-8'
46           
47        try:
48            unicodefn = bytesfn.decode(alleged_encoding, 'strict')
49        except UnicodeEncodeError:
50            unicodefn = bytesfn.decode('utf-8b', 'python-escape')
51            return Fname(unicodefn, failed_decode=True)
52        else:
53            unicodefn = unicodedata.normalize('NFC', unicodefn)
54            if alleged_encoding == 'utf-8':
55                return Fname(unicodefn)
56            else:
57                return Fname(unicodefn, alleged_encoding)
58
59    def listdir(fn):
60        assert isinstance(fn, Fname), fn
61        assert isinstance(fn.name, unicode), fn.name
62        bytesfn = unicode_to_fs(fn.name)
63        res = os.listdir(bytesfn)
64        return([fs_to_unicode(fn) for fn in res])
65
66else:
67    # on unicode-oriented filesystems, such as Mac and Windows
68    def listdir(fn):
69        assert isinstance(fn, Fname), fn
70        assert isinstance(fn.name, unicode), fn.name
71        return [Fname(n) for n in os.listdir(fn.name)]