| 1 | |
|---|
| 2 | lumiere_nfc = u"lumi\u00E8re" |
|---|
| 3 | Artonwall_nfc = u"\u00C4rtonwall.mp3" |
|---|
| 4 | Artonwall_nfd = u"A\u0308rtonwall.mp3" |
|---|
| 5 | |
|---|
| 6 | TEST_FILENAMES = ( |
|---|
| 7 | Artonwall_nfc, |
|---|
| 8 | u'test_file', |
|---|
| 9 | u'Blah blah.txt', |
|---|
| 10 | ) |
|---|
| 11 | |
|---|
| 12 | # The following main helps to generate a test class for other operating |
|---|
| 13 | # systems. |
|---|
| 14 | |
|---|
| 15 | if __name__ == "__main__": |
|---|
| 16 | import sys, os |
|---|
| 17 | import tempfile |
|---|
| 18 | import shutil |
|---|
| 19 | import platform |
|---|
| 20 | |
|---|
| 21 | if len(sys.argv) != 2: |
|---|
| 22 | print("Usage: %s lumi<e-grave>re" % sys.argv[0]) |
|---|
| 23 | sys.exit(1) |
|---|
| 24 | |
|---|
| 25 | if sys.platform == "win32": |
|---|
| 26 | try: |
|---|
| 27 | from allmydata.windows.fixups import initialize |
|---|
| 28 | except ImportError: |
|---|
| 29 | print("set PYTHONPATH to the src directory") |
|---|
| 30 | sys.exit(1) |
|---|
| 31 | initialize() |
|---|
| 32 | |
|---|
| 33 | print() |
|---|
| 34 | print("class MyWeirdOS(EncodingUtil, unittest.TestCase):") |
|---|
| 35 | print(" uname = '%s'" % ' '.join(platform.uname())) |
|---|
| 36 | print(" argv = %s" % repr(sys.argv[1])) |
|---|
| 37 | print(" platform = '%s'" % sys.platform) |
|---|
| 38 | print(" filesystem_encoding = '%s'" % sys.getfilesystemencoding()) |
|---|
| 39 | print(" io_encoding = '%s'" % sys.stdout.encoding) |
|---|
| 40 | try: |
|---|
| 41 | tmpdir = tempfile.mkdtemp() |
|---|
| 42 | for fname in TEST_FILENAMES: |
|---|
| 43 | open(os.path.join(tmpdir, fname), 'w').close() |
|---|
| 44 | |
|---|
| 45 | dirlist = os.listdir(tmpdir) |
|---|
| 46 | |
|---|
| 47 | print(" dirlist = %s" % repr(dirlist)) |
|---|
| 48 | except: |
|---|
| 49 | print(" # Oops, I cannot write filenames containing non-ascii characters") |
|---|
| 50 | print() |
|---|
| 51 | |
|---|
| 52 | shutil.rmtree(tmpdir) |
|---|
| 53 | sys.exit(0) |
|---|
| 54 | |
|---|
| 55 | |
|---|
| 56 | import os, sys |
|---|
| 57 | |
|---|
| 58 | from twisted.trial import unittest |
|---|
| 59 | |
|---|
| 60 | from twisted.python.filepath import FilePath |
|---|
| 61 | |
|---|
| 62 | from allmydata.test.common_util import ( |
|---|
| 63 | ReallyEqualMixin, skip_if_cannot_represent_filename, |
|---|
| 64 | ) |
|---|
| 65 | from allmydata.util import encodingutil, fileutil |
|---|
| 66 | from allmydata.util.encodingutil import unicode_to_url, \ |
|---|
| 67 | unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ |
|---|
| 68 | quote_filepath, unicode_platform, listdir_unicode, \ |
|---|
| 69 | get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ |
|---|
| 70 | to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ |
|---|
| 71 | unicode_to_argv |
|---|
| 72 | |
|---|
| 73 | class MockStdout: |
|---|
| 74 | pass |
|---|
| 75 | |
|---|
| 76 | |
|---|
| 77 | class EncodingUtil(ReallyEqualMixin): |
|---|
| 78 | def setUp(self): |
|---|
| 79 | self.addCleanup(_reload) |
|---|
| 80 | self.patch(sys, "platform", self.platform) |
|---|
| 81 | |
|---|
| 82 | def test_unicode_to_url(self): |
|---|
| 83 | self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") |
|---|
| 84 | |
|---|
| 85 | def test_unicode_to_output_py3(self): |
|---|
| 86 | self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) |
|---|
| 87 | |
|---|
| 88 | def test_unicode_to_argv(self): |
|---|
| 89 | """ |
|---|
| 90 | unicode_to_argv() returns its unicode argument on Windows and Python 2 and |
|---|
| 91 | converts to bytes using UTF-8 elsewhere. |
|---|
| 92 | """ |
|---|
| 93 | result = unicode_to_argv(lumiere_nfc) |
|---|
| 94 | expected_value = lumiere_nfc |
|---|
| 95 | |
|---|
| 96 | self.assertIsInstance(result, type(expected_value)) |
|---|
| 97 | self.assertEqual(result, expected_value) |
|---|
| 98 | |
|---|
| 99 | def test_unicode_platform_py3(self): |
|---|
| 100 | _reload() |
|---|
| 101 | self.failUnlessReallyEqual(unicode_platform(), True) |
|---|
| 102 | |
|---|
| 103 | def test_listdir_unicode(self): |
|---|
| 104 | if 'dirlist' not in dir(self): |
|---|
| 105 | return |
|---|
| 106 | |
|---|
| 107 | try: |
|---|
| 108 | u"test".encode(self.filesystem_encoding) |
|---|
| 109 | except (LookupError, AttributeError): |
|---|
| 110 | raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding " |
|---|
| 111 | "that we are testing for the benefit of a different platform." |
|---|
| 112 | % (self.filesystem_encoding,)) |
|---|
| 113 | |
|---|
| 114 | def call_os_listdir(path): |
|---|
| 115 | # Python 3 always lists unicode filenames: |
|---|
| 116 | return [d.decode(self.filesystem_encoding) if isinstance(d, bytes) |
|---|
| 117 | else d |
|---|
| 118 | for d in self.dirlist] |
|---|
| 119 | |
|---|
| 120 | self.patch(os, 'listdir', call_os_listdir) |
|---|
| 121 | |
|---|
| 122 | def call_sys_getfilesystemencoding(): |
|---|
| 123 | return self.filesystem_encoding |
|---|
| 124 | self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding) |
|---|
| 125 | |
|---|
| 126 | _reload() |
|---|
| 127 | filenames = listdir_unicode(u'/dummy') |
|---|
| 128 | |
|---|
| 129 | self.failUnlessEqual(set([encodingutil.normalize(fname) for fname in filenames]), |
|---|
| 130 | set(TEST_FILENAMES)) |
|---|
| 131 | |
|---|
| 132 | |
|---|
| 133 | class StdlibUnicode(unittest.TestCase): |
|---|
| 134 | """This mainly tests that some of the stdlib functions support Unicode paths, but also that |
|---|
| 135 | listdir_unicode works for valid filenames.""" |
|---|
| 136 | |
|---|
| 137 | def test_mkdir_open_exists_abspath_listdir_expanduser(self): |
|---|
| 138 | skip_if_cannot_represent_filename(lumiere_nfc) |
|---|
| 139 | |
|---|
| 140 | try: |
|---|
| 141 | os.mkdir(lumiere_nfc) |
|---|
| 142 | except EnvironmentError as e: |
|---|
| 143 | raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run " |
|---|
| 144 | "does not support Unicode, even though the platform does." % (e,)) |
|---|
| 145 | |
|---|
| 146 | fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt' |
|---|
| 147 | open(fn, 'wb').close() |
|---|
| 148 | self.failUnless(os.path.exists(fn)) |
|---|
| 149 | getcwdu = os.getcwd |
|---|
| 150 | self.failUnless(os.path.exists(os.path.join(getcwdu(), fn))) |
|---|
| 151 | filenames = listdir_unicode(lumiere_nfc) |
|---|
| 152 | |
|---|
| 153 | # We only require that the listing includes a filename that is canonically equivalent |
|---|
| 154 | # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent). |
|---|
| 155 | self.failUnlessIn(lumiere_nfc + u".txt", set([encodingutil.normalize(fname) for fname in filenames])) |
|---|
| 156 | |
|---|
| 157 | expanded = fileutil.expanduser(u"~/" + lumiere_nfc) |
|---|
| 158 | self.failIfIn(u"~", expanded) |
|---|
| 159 | self.failUnless(expanded.endswith(lumiere_nfc), expanded) |
|---|
| 160 | |
|---|
| 161 | def test_open_unrepresentable(self): |
|---|
| 162 | if unicode_platform(): |
|---|
| 163 | raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.") |
|---|
| 164 | |
|---|
| 165 | enc = get_filesystem_encoding() |
|---|
| 166 | fn = u'\u2621.txt' |
|---|
| 167 | try: |
|---|
| 168 | fn.encode(enc) |
|---|
| 169 | raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.") |
|---|
| 170 | except UnicodeEncodeError: |
|---|
| 171 | self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb') |
|---|
| 172 | |
|---|
| 173 | |
|---|
| 174 | class QuoteOutput(ReallyEqualMixin, unittest.TestCase): |
|---|
| 175 | def tearDown(self): |
|---|
| 176 | _reload() |
|---|
| 177 | |
|---|
| 178 | def _check(self, inp, out, enc, optional_quotes, quote_newlines): |
|---|
| 179 | if isinstance(out, bytes): |
|---|
| 180 | out = out.decode(enc or encodingutil.io_encoding) |
|---|
| 181 | out2 = out |
|---|
| 182 | if optional_quotes: |
|---|
| 183 | out2 = out2[1:-1] |
|---|
| 184 | self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quote_newlines=quote_newlines), out) |
|---|
| 185 | self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) |
|---|
| 186 | if out[0:2] == 'b"': |
|---|
| 187 | pass |
|---|
| 188 | elif isinstance(inp, bytes): |
|---|
| 189 | try: |
|---|
| 190 | unicode_inp = inp.decode("utf-8") |
|---|
| 191 | except UnicodeDecodeError: |
|---|
| 192 | # Some things decode on Python 2, but not Python 3... |
|---|
| 193 | return |
|---|
| 194 | self.failUnlessReallyEqual(quote_output(unicode_inp, encoding=enc, quote_newlines=quote_newlines), out) |
|---|
| 195 | self.failUnlessReallyEqual(quote_output(unicode_inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) |
|---|
| 196 | else: |
|---|
| 197 | try: |
|---|
| 198 | bytes_inp = inp.encode('utf-8') |
|---|
| 199 | except UnicodeEncodeError: |
|---|
| 200 | # Some things encode on Python 2, but not Python 3, e.g. |
|---|
| 201 | # surrogates like u"\uDC00\uD800"... |
|---|
| 202 | return |
|---|
| 203 | self.failUnlessReallyEqual(quote_output(bytes_inp, encoding=enc, quote_newlines=quote_newlines), out) |
|---|
| 204 | self.failUnlessReallyEqual(quote_output(bytes_inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) |
|---|
| 205 | |
|---|
| 206 | def _test_quote_output_all(self, enc): |
|---|
| 207 | def check(inp, out, optional_quotes=False, quote_newlines=None): |
|---|
| 208 | out = out.decode("ascii") |
|---|
| 209 | self._check(inp, out, enc, optional_quotes, quote_newlines) |
|---|
| 210 | |
|---|
| 211 | # optional single quotes |
|---|
| 212 | check(b"foo", b"'foo'", True) |
|---|
| 213 | check(b"\\", b"'\\'", True) |
|---|
| 214 | check(b"$\"`", b"'$\"`'", True) |
|---|
| 215 | check(b"\n", b"'\n'", True, quote_newlines=False) |
|---|
| 216 | |
|---|
| 217 | # mandatory single quotes |
|---|
| 218 | check(b"\"", b"'\"'") |
|---|
| 219 | |
|---|
| 220 | # double quotes |
|---|
| 221 | check(b"'", b"\"'\"") |
|---|
| 222 | check(b"\n", b"\"\\x0a\"", quote_newlines=True) |
|---|
| 223 | check(b"\x00", b"\"\\x00\"") |
|---|
| 224 | |
|---|
| 225 | # invalid Unicode and astral planes |
|---|
| 226 | check(u"\uFDD0\uFDEF", b"\"\\ufdd0\\ufdef\"") |
|---|
| 227 | check(u"\uDC00\uD800", b"\"\\udc00\\ud800\"") |
|---|
| 228 | check(u"\uDC00\uD800\uDC00", b"\"\\udc00\\U00010000\"") |
|---|
| 229 | check(u"\uD800\uDC00", b"\"\\U00010000\"") |
|---|
| 230 | check(u"\uD800\uDC01", b"\"\\U00010001\"") |
|---|
| 231 | check(u"\uD801\uDC00", b"\"\\U00010400\"") |
|---|
| 232 | check(u"\uDBFF\uDFFF", b"\"\\U0010ffff\"") |
|---|
| 233 | check(u"'\uDBFF\uDFFF", b"\"'\\U0010ffff\"") |
|---|
| 234 | check(u"\"\uDBFF\uDFFF", b"\"\\\"\\U0010ffff\"") |
|---|
| 235 | |
|---|
| 236 | # invalid UTF-8 |
|---|
| 237 | check(b"\xFF", b"b\"\\xff\"") |
|---|
| 238 | check(b"\x00\"$\\`\x80\xFF", b"b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"") |
|---|
| 239 | |
|---|
| 240 | def test_quote_output_ascii(self, enc='ascii'): |
|---|
| 241 | def check(inp, out, optional_quotes=False, quote_newlines=None): |
|---|
| 242 | self._check(inp, out, enc, optional_quotes, quote_newlines) |
|---|
| 243 | |
|---|
| 244 | self._test_quote_output_all(enc) |
|---|
| 245 | check(u"\u00D7", b"\"\\xd7\"") |
|---|
| 246 | check(u"'\u00D7", b"\"'\\xd7\"") |
|---|
| 247 | check(u"\"\u00D7", b"\"\\\"\\xd7\"") |
|---|
| 248 | check(u"\u2621", b"\"\\u2621\"") |
|---|
| 249 | check(u"'\u2621", b"\"'\\u2621\"") |
|---|
| 250 | check(u"\"\u2621", b"\"\\\"\\u2621\"") |
|---|
| 251 | check(u"\n", b"'\n'", True, quote_newlines=False) |
|---|
| 252 | check(u"\n", b"\"\\x0a\"", quote_newlines=True) |
|---|
| 253 | |
|---|
| 254 | def test_quote_output_latin1(self, enc='latin1'): |
|---|
| 255 | def check(inp, out, optional_quotes=False, quote_newlines=None): |
|---|
| 256 | self._check(inp, out.encode('latin1'), enc, optional_quotes, quote_newlines) |
|---|
| 257 | |
|---|
| 258 | self._test_quote_output_all(enc) |
|---|
| 259 | check(u"\u00D7", u"'\u00D7'", True) |
|---|
| 260 | check(u"'\u00D7", u"\"'\u00D7\"") |
|---|
| 261 | check(u"\"\u00D7", u"'\"\u00D7'") |
|---|
| 262 | check(u"\u00D7\"", u"'\u00D7\"'", True) |
|---|
| 263 | check(u"\u2621", u"\"\\u2621\"") |
|---|
| 264 | check(u"'\u2621", u"\"'\\u2621\"") |
|---|
| 265 | check(u"\"\u2621", u"\"\\\"\\u2621\"") |
|---|
| 266 | check(u"\n", u"'\n'", True, quote_newlines=False) |
|---|
| 267 | check(u"\n", u"\"\\x0a\"", quote_newlines=True) |
|---|
| 268 | |
|---|
| 269 | def test_quote_output_utf8(self, enc='utf-8'): |
|---|
| 270 | def check(inp, out, optional_quotes=False, quote_newlines=None): |
|---|
| 271 | self._check(inp, out, enc, optional_quotes, quote_newlines) |
|---|
| 272 | |
|---|
| 273 | self._test_quote_output_all(enc) |
|---|
| 274 | check(u"\u2621", u"'\u2621'", True) |
|---|
| 275 | check(u"'\u2621", u"\"'\u2621\"") |
|---|
| 276 | check(u"\"\u2621", u"'\"\u2621'") |
|---|
| 277 | check(u"\u2621\"", u"'\u2621\"'", True) |
|---|
| 278 | check(u"\n", u"'\n'", True, quote_newlines=False) |
|---|
| 279 | check(u"\n", u"\"\\x0a\"", quote_newlines=True) |
|---|
| 280 | |
|---|
| 281 | def test_quote_output_default(self): |
|---|
| 282 | """Default is the encoding of sys.stdout if known, otherwise utf-8.""" |
|---|
| 283 | encoding = getattr(sys.stdout, "encoding") or "utf-8" |
|---|
| 284 | self.assertEqual(quote_output(u"\u2621"), |
|---|
| 285 | quote_output(u"\u2621", encoding=encoding)) |
|---|
| 286 | |
|---|
| 287 | |
|---|
| 288 | def win32_other(win32, other): |
|---|
| 289 | return win32 if sys.platform == "win32" else other |
|---|
| 290 | |
|---|
| 291 | class QuotePaths(ReallyEqualMixin, unittest.TestCase): |
|---|
| 292 | |
|---|
| 293 | def assertPathsEqual(self, actual, expected): |
|---|
| 294 | expected = expected.decode("ascii") |
|---|
| 295 | self.failUnlessReallyEqual(actual, expected) |
|---|
| 296 | |
|---|
| 297 | def test_quote_path(self): |
|---|
| 298 | self.assertPathsEqual(quote_path([u'foo', u'bar']), b"'foo/bar'") |
|---|
| 299 | self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'") |
|---|
| 300 | self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar") |
|---|
| 301 | self.assertPathsEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"') |
|---|
| 302 | self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"') |
|---|
| 303 | self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"') |
|---|
| 304 | |
|---|
| 305 | self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"), |
|---|
| 306 | win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'")) |
|---|
| 307 | self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True), |
|---|
| 308 | win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'")) |
|---|
| 309 | self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False), |
|---|
| 310 | win32_other(b"C:\\foo", b"\\\\?\\C:\\foo")) |
|---|
| 311 | self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"), |
|---|
| 312 | win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'")) |
|---|
| 313 | self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True), |
|---|
| 314 | win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'")) |
|---|
| 315 | self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False), |
|---|
| 316 | win32_other(b"\\\\foo\\bar", b"\\\\?\\UNC\\foo\\bar")) |
|---|
| 317 | |
|---|
| 318 | def test_quote_filepath(self): |
|---|
| 319 | foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar')) |
|---|
| 320 | self.assertPathsEqual(quote_filepath(foo_bar_fp), |
|---|
| 321 | win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'")) |
|---|
| 322 | self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=True), |
|---|
| 323 | win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'")) |
|---|
| 324 | self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=False), |
|---|
| 325 | win32_other(b"C:\\foo\\bar", b"/foo/bar")) |
|---|
| 326 | |
|---|
| 327 | if sys.platform == "win32": |
|---|
| 328 | foo_longfp = FilePath(u'\\\\?\\C:\\foo') |
|---|
| 329 | self.assertPathsEqual(quote_filepath(foo_longfp), |
|---|
| 330 | b"'C:\\foo'") |
|---|
| 331 | self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=True), |
|---|
| 332 | b"'C:\\foo'") |
|---|
| 333 | self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=False), |
|---|
| 334 | b"C:\\foo") |
|---|
| 335 | |
|---|
| 336 | |
|---|
| 337 | class FilePaths(ReallyEqualMixin, unittest.TestCase): |
|---|
| 338 | def test_to_filepath(self): |
|---|
| 339 | foo_u = win32_other(u'C:\\foo', u'/foo') |
|---|
| 340 | |
|---|
| 341 | nosep_fp = to_filepath(foo_u) |
|---|
| 342 | sep_fp = to_filepath(foo_u + os.path.sep) |
|---|
| 343 | |
|---|
| 344 | for fp in (nosep_fp, sep_fp): |
|---|
| 345 | self.failUnlessReallyEqual(fp, FilePath(foo_u)) |
|---|
| 346 | self.failUnlessReallyEqual(fp.path, foo_u) |
|---|
| 347 | |
|---|
| 348 | if sys.platform == "win32": |
|---|
| 349 | long_u = u'\\\\?\\C:\\foo' |
|---|
| 350 | longfp = to_filepath(long_u + u'\\') |
|---|
| 351 | self.failUnlessReallyEqual(longfp, FilePath(long_u)) |
|---|
| 352 | self.failUnlessReallyEqual(longfp.path, long_u) |
|---|
| 353 | |
|---|
| 354 | def test_extend_filepath(self): |
|---|
| 355 | foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo')) |
|---|
| 356 | foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo')) |
|---|
| 357 | foo_bar_baz_u = win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz') |
|---|
| 358 | |
|---|
| 359 | for foo_fp in (foo_bfp, foo_ufp): |
|---|
| 360 | fp = extend_filepath(foo_fp, [u'bar', u'baz']) |
|---|
| 361 | self.failUnlessReallyEqual(fp, FilePath(foo_bar_baz_u)) |
|---|
| 362 | self.failUnlessReallyEqual(fp.path, foo_bar_baz_u) |
|---|
| 363 | |
|---|
| 364 | def test_unicode_from_filepath(self): |
|---|
| 365 | foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo')) |
|---|
| 366 | foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo')) |
|---|
| 367 | foo_u = win32_other(u'C:\\foo', u'/foo') |
|---|
| 368 | |
|---|
| 369 | for foo_fp in (foo_bfp, foo_ufp): |
|---|
| 370 | self.failUnlessReallyEqual(unicode_from_filepath(foo_fp), foo_u) |
|---|
| 371 | |
|---|
| 372 | def test_unicode_segments_from(self): |
|---|
| 373 | foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo')) |
|---|
| 374 | foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo')) |
|---|
| 375 | foo_bar_baz_bfp = FilePath(win32_other(b'C:\\foo\\bar\\baz', b'/foo/bar/baz')) |
|---|
| 376 | foo_bar_baz_ufp = FilePath(win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz')) |
|---|
| 377 | |
|---|
| 378 | for foo_fp in (foo_bfp, foo_ufp): |
|---|
| 379 | for foo_bar_baz_fp in (foo_bar_baz_bfp, foo_bar_baz_ufp): |
|---|
| 380 | self.failUnlessReallyEqual(unicode_segments_from(foo_bar_baz_fp, foo_fp), |
|---|
| 381 | [u'bar', u'baz']) |
|---|
| 382 | |
|---|
| 383 | |
|---|
| 384 | class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase): |
|---|
| 385 | uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' |
|---|
| 386 | argv = b'lumi\xc3\xa8re' |
|---|
| 387 | platform = 'linux2' |
|---|
| 388 | filesystem_encoding = 'UTF-8' |
|---|
| 389 | io_encoding = 'UTF-8' |
|---|
| 390 | dirlist = [b'test_file', b'\xc3\x84rtonwall.mp3', b'Blah blah.txt'] |
|---|
| 391 | |
|---|
| 392 | class Windows(EncodingUtil, unittest.TestCase): |
|---|
| 393 | uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' |
|---|
| 394 | argv = b'lumi\xc3\xa8re' |
|---|
| 395 | platform = 'win32' |
|---|
| 396 | filesystem_encoding = 'mbcs' |
|---|
| 397 | io_encoding = 'utf-8' |
|---|
| 398 | dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] |
|---|
| 399 | |
|---|
| 400 | class MacOSXLeopard(EncodingUtil, unittest.TestCase): |
|---|
| 401 | uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' |
|---|
| 402 | output = b'lumi\xc3\xa8re' |
|---|
| 403 | platform = 'darwin' |
|---|
| 404 | filesystem_encoding = 'utf-8' |
|---|
| 405 | io_encoding = 'UTF-8' |
|---|
| 406 | dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] |
|---|
| 407 | |
|---|
| 408 | |
|---|
| 409 | class TestToFromStr(ReallyEqualMixin, unittest.TestCase): |
|---|
| 410 | def test_to_bytes(self): |
|---|
| 411 | self.failUnlessReallyEqual(to_bytes(b"foo"), b"foo") |
|---|
| 412 | self.failUnlessReallyEqual(to_bytes(b"lumi\xc3\xa8re"), b"lumi\xc3\xa8re") |
|---|
| 413 | self.failUnlessReallyEqual(to_bytes(b"\xFF"), b"\xFF") # passes through invalid UTF-8 -- is this what we want? |
|---|
| 414 | self.failUnlessReallyEqual(to_bytes(u"lumi\u00E8re"), b"lumi\xc3\xa8re") |
|---|
| 415 | self.failUnlessReallyEqual(to_bytes(None), None) |
|---|
| 416 | |
|---|
| 417 | def test_from_utf8_or_none(self): |
|---|
| 418 | self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo") |
|---|
| 419 | self.failUnlessReallyEqual(from_utf8_or_none(b"lumi\xc3\xa8re"), u"lumi\u00E8re") |
|---|
| 420 | self.failUnlessReallyEqual(from_utf8_or_none(None), None) |
|---|
| 421 | self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, b"\xFF") |
|---|