| 1 | # A wrapper around the Python Standard Library's filename access functions to |
|---|
| 2 | # provide a uniform API for all platforms and to prevent lossy en/de-coding. |
|---|
| 3 | |
|---|
| 4 | class Fname: |
|---|
| 5 | def __init__(self, name, failed_decode=False, alleged_encoding=None): |
|---|
| 6 | self.name = name |
|---|
| 7 | self.failed_decode = failed_decode |
|---|
| 8 | self.alleged_encoding = alleged_encoding |
|---|
| 9 | |
|---|
| 10 | if platform.system() in ('Linux', 'Solaris'): |
|---|
| 11 | # on byte-oriented filesystems, such as Linux and Solaris |
|---|
| 12 | |
|---|
| 13 | def unicode_to_fs(fn): |
|---|
| 14 | """ Encode an unicode object to bytes. """ |
|---|
| 15 | precondition(isinstance(fn, Fname), fn) |
|---|
| 16 | precondition(isinstance(fn.name, unicode), fn.name) |
|---|
| 17 | |
|---|
| 18 | if fn.failed_decode: |
|---|
| 19 | # This means that the unicode string in .name is not |
|---|
| 20 | # actually the result of a successful decoding with a |
|---|
| 21 | # suggested codec, but is instead the result of stuffing the |
|---|
| 22 | # bytes into a unicode by dint of the utf-8b trick. This |
|---|
| 23 | # means that on a byte-oriented system, you shouldn't treat |
|---|
| 24 | # the .name as a unicode string containing chars, but |
|---|
| 25 | # instead you should get the original bytes back out of it. |
|---|
| 26 | return fn.name.encode('utf-8b', 'python-replace') |
|---|
| 27 | else: |
|---|
| 28 | fsencoding = sys.getfilesystemencoding() |
|---|
| 29 | if fsencoding in (None, '', 'ascii', 'utf-8'): |
|---|
| 30 | fsencoding = 'utf-8b' |
|---|
| 31 | try: |
|---|
| 32 | return fn.name.encode(encoding, 'python-escape') |
|---|
| 33 | except UnicodeEncodeError: |
|---|
| 34 | raise usage.UsageError("Filename '%s' cannot be \ |
|---|
| 35 | encoded using the current encoding of your filesystem (%s). Please \ |
|---|
| 36 | configure your locale correctly or rename this file." % |
|---|
| 37 | (s, sys.getfilesystemencoding())) |
|---|
| 38 | |
|---|
| 39 | def fs_to_unicode(bytesfn): |
|---|
| 40 | """ Decode bytes from the filesystem to a unicode object. """ |
|---|
| 41 | precondition(isinstance(bytesfn, str), str) |
|---|
| 42 | |
|---|
| 43 | alleged_encoding = sys.getfilesystemencoding() |
|---|
| 44 | if alleged_encoding in (None, '', 'ascii'): |
|---|
| 45 | alleged_encoding = 'utf-8' |
|---|
| 46 | |
|---|
| 47 | try: |
|---|
| 48 | unicodefn = bytesfn.decode(alleged_encoding, 'strict') |
|---|
| 49 | except UnicodeDecodeError: |
|---|
| 50 | unicodefn = bytesfn.decode('utf-8b', 'python-escape') |
|---|
| 51 | return Fname(unicodefn, failed_decode=True) |
|---|
| 52 | else: |
|---|
| 53 | unicodefn = unicodedata.normalize('NFC', unicodefn) |
|---|
| 54 | if alleged_encoding == 'utf-8': |
|---|
| 55 | return Fname(unicodefn) |
|---|
| 56 | else: |
|---|
| 57 | return Fname(unicodefn, alleged_encoding) |
|---|
| 58 | |
|---|
| 59 | def listdir(fn): |
|---|
| 60 | assert isinstance(fn, Fname), fn |
|---|
| 61 | assert isinstance(fn.name, unicode), fn.name |
|---|
| 62 | bytesfn = unicode_to_fs(fn.name) |
|---|
| 63 | res = os.listdir(bytesfn) |
|---|
| 64 | return([fs_to_unicode(fn) for fn in res]) |
|---|
| 65 | |
|---|
| 66 | else: |
|---|
| 67 | # on unicode-oriented filesystems, such as Mac and Windows |
|---|
| 68 | def listdir(fn): |
|---|
| 69 | assert isinstance(fn, Fname), fn |
|---|
| 70 | assert isinstance(fn.name, unicode), fn.name |
|---|
| 71 | return [Fname(n) for n in os.listdir(fn.name)] |
|---|