Ticket #534: unicode-bundle-v4.darcspatch

File unicode-bundle-v4.darcspatch, 59.6 KB (added by francois, at 2010-05-20T00:55:19Z)
Line 
1Thu May 20 01:33:25 CEST 2010  Francois Deppierraz <francois@ctrlaltdel.ch>
2  * Add dependency on Michael Foord's mock library
3
4Thu May 20 02:41:05 CEST 2010  Francois Deppierraz <francois@ctrlaltdel.ch>
5  * stringutils.py: Unicode helper functions + associated tests
6 
7  This file contains a bunch of helper functions which converts
8  unicode string from and to argv, filenames and stdout.
9
10Thu May 20 02:43:56 CEST 2010  Francois Deppierraz <francois@ctrlaltdel.ch>
11  * Fix handling of correctly encoded unicode filenames (#534)
12 
13  Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
14  backup', have been improved to correctly handle filenames containing non-ASCII
15  characters.
16   
17  In the case where Tahoe encounters a filename which cannot be decoded using the
18  system encoding, an error will be returned and the operation will fail.  Under
19  Linux, this typically happens when the filesystem contains filenames encoded
20  with another encoding, for instance latin1, than the system locale, for
21  instance UTF-8.  In such case, you'll need to fix your system with tools such
22  as 'convmv' before using Tahoe CLI.
23   
24  All CLI commands have been improved to support non-ASCII parameters such as
25  filenames and aliases on all supported Operating Systems except Windows as of
26  now.
27
28New patches:
29
30[Add dependency on Michael Foord's mock library
31Francois Deppierraz <francois@ctrlaltdel.ch>**20100519233325
32 Ignore-this: 9bb01bf1e4780f6b98ed394c3b772a80
33] hunk ./_auto_deps.py 34
34                   # Needed for SFTP. Commented-out pending tests, see #953.
35                   # "pycrypto >= 2.0.1",
36 
37+                  # Mock - Mocking and Testing Library
38+                  # http://www.voidspace.org.uk/python/mock/
39+                  "mock",
40+
41                   # Will be needed to test web apps, but not yet. See #1001.
42                   #"windmill >= 1.3",
43                   ]
44[stringutils.py: Unicode helper functions + associated tests
45Francois Deppierraz <francois@ctrlaltdel.ch>**20100520004105
46 Ignore-this: 7a73fc31de2fd39d437d6abd278bfa9a
47 
48 This file contains a bunch of helper functions which converts
49 unicode string from and to argv, filenames and stdout.
50] {
51addfile ./src/allmydata/test/test_stringutils.py
52hunk ./src/allmydata/test/test_stringutils.py 1
53+# coding=utf-8
54+
55+TEST_FILENAMES = (
56+  u'Ärtonwall.mp3',
57+  u'test_file',
58+  u'Blah blah.txt',
59+)
60+
61+# The following main helps to generate a test class for other operating
62+# systems.
63+
64+if __name__ == "__main__":
65+    import sys, os
66+    import tempfile
67+    import shutil
68+    import platform
69+   
70+    if len(sys.argv) != 2:
71+        print "Usage: %s lumière" % sys.argv[0]
72+        sys.exit(1)
73+   
74+    print
75+    print "class MyWeirdOS(StringUtils, unittest.TestCase):"
76+    print "    uname = '%s'" % ' '.join(platform.uname())
77+    print "    argv = %s" % repr(sys.argv[1])
78+    print "    platform = '%s'" % sys.platform
79+    print "    filesystemencoding = '%s'" % sys.getfilesystemencoding()
80+    print "    stdoutencoding = '%s'" % sys.stdout.encoding
81+
82+    try:
83+        tmpdir = tempfile.mkdtemp()
84+        for fname in TEST_FILENAMES:
85+            open(os.path.join(tmpdir, fname), 'w').close()
86+
87+        # Use Unicode API under Windows or MacOS X
88+        if sys.platform in ('win32', 'darwin'):
89+            dirlist = os.listdir(unicode(tmpdir))
90+        else:
91+            dirlist = os.listdir(tmpdir)
92+
93+        print "    dirlist = %s" % repr(dirlist)
94+    except:
95+        print "    # Oops, I cannot write filenames containing non-ascii characters"
96+    print
97+
98+    shutil.rmtree(tmpdir)
99+    sys.exit(0)
100+
101+from twisted.trial import unittest
102+from mock import patch
103+import sys
104+
105+from allmydata.util.stringutils import argv_to_unicode, unicode_to_url, \
106+    unicode_to_stdout, unicode_platform, listdir_unicode, open_unicode, \
107+    FilenameEncodingError, get_term_encoding
108+from twisted.python import usage
109+
110+class StringUtilsErrors(unittest.TestCase):
111+    @patch('sys.stdout')
112+    def test_get_term_encoding(self, mock):
113+        mock.encoding = None
114+       
115+        self.failUnlessEqual(get_term_encoding(), 'ascii')
116+
117+    @patch('sys.stdout')
118+    def test_argv_to_unicode(self, mock):
119+        mock.encoding = 'utf-8'
120+
121+        self.failUnlessRaises(usage.UsageError,
122+                              argv_to_unicode,
123+                              u'lumière'.encode('latin1'))
124+
125+    def test_unicode_to_url(self):
126+        pass
127+
128+    @patch('sys.stdout')
129+    def test_unicode_to_stdout(self, mock):
130+        # Encoding koi8-r cannot represent 'è'
131+        mock.encoding = 'koi8-r'
132+        self.failUnlessEqual(unicode_to_stdout(u'lumière'), 'lumi?re')
133+
134+    @patch('os.listdir')
135+    def test_unicode_normalization(self, mock):
136+        # Pretend to run on an Unicode platform such as Windows
137+        orig_platform = sys.platform
138+        sys.platform = 'win32'
139+
140+        mock.return_value = [u'A\u0308rtonwall.mp3']
141+        self.failUnlessEqual(listdir_unicode(u'/dummy'), [u'\xc4rtonwall.mp3'])
142+
143+        sys.platform = orig_platform
144+
145+# The following tests applies only to platforms which don't store filenames as
146+# Unicode entities on the filesystem.
147+class StringUtilsNonUnicodePlatform(unittest.TestCase):
148+    def setUp(self):
149+        # Mock sys.platform because unicode_platform() uses it
150+        self.original_platform = sys.platform
151+        sys.platform = 'linux'
152+
153+    def tearDown(self):
154+        sys.platform = self.original_platform
155+
156+    @patch('sys.getfilesystemencoding')
157+    @patch('os.listdir')
158+    def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
159+        # What happen if a latin1-encoded filenames is encountered on an UTF-8
160+        # filesystem?
161+        mock_listdir.return_value = [
162+            u'lumière'.encode('utf-8'),
163+            u'lumière'.encode('latin1')]
164+
165+        mock_getfilesystemencoding.return_value = 'utf-8'
166+       
167+        self.failUnlessRaises(FilenameEncodingError,
168+                              listdir_unicode,
169+                              u'/dummy')
170+       
171+        # We're trying to list a directory whose name cannot be represented in
172+        # the filesystem encoding.  This should fail.
173+        mock_getfilesystemencoding.return_value = 'ascii'
174+        self.failUnlessRaises(FilenameEncodingError,
175+                              listdir_unicode,
176+                              u'/lumière')
177+
178+    @patch('sys.getfilesystemencoding')
179+    def test_open_unicode(self, mock):
180+        mock.return_value = 'ascii'
181+
182+        self.failUnlessRaises(FilenameEncodingError,
183+                              open_unicode,
184+                              u'lumière')
185+
186+class StringUtils():
187+    def setUp(self):
188+        # Mock sys.platform because unicode_platform() uses it
189+        self.original_platform = sys.platform
190+        sys.platform = self.platform
191+
192+    def tearDown(self):
193+        sys.platform = self.original_platform
194+
195+    @patch('sys.stdout')
196+    def test_argv_to_unicode(self, mock):
197+        if 'argv' not in dir(self):
198+            raise unittest.SkipTest("There's no way to pass non-ASCII arguments in CLI on this (mocked) platform")
199+
200+        mock.encoding = self.stdoutencoding
201+
202+        argu = u'lumière'
203+        argv = self.argv
204+
205+        self.failUnlessEqual(argv_to_unicode(argv), argu)
206+
207+    def test_unicode_to_url(self):
208+        self.failUnless(unicode_to_url(u'lumière'), u'lumière'.encode('utf-8'))
209+
210+    @patch('sys.stdout')
211+    def test_unicode_to_stdout(self, mock):
212+        if 'argv' not in dir(self):
213+            raise unittest.SkipTest("There's no way to pass non-ASCII arguments in CLI on this (mocked) platform")
214+
215+        mock.encoding = self.stdoutencoding
216+        self.failUnlessEqual(unicode_to_stdout(u'lumière'), self.argv)
217+
218+    def test_unicode_platform(self):
219+        matrix = {
220+          'linux2': False,
221+          'win32':  True,
222+          'darwin': True,
223+        }
224+
225+        self.failUnlessEqual(unicode_platform(), matrix[self.platform])
226+
227+    @patch('sys.getfilesystemencoding')
228+    @patch('os.listdir')
229+    def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
230+
231+        mock_listdir.return_value = self.dirlist
232+        mock_getfilesystemencoding.return_value = self.filesystemencoding
233+       
234+        filenames = listdir_unicode(u'/dummy')
235+
236+        for fname in TEST_FILENAMES:
237+            self.failUnless(isinstance(fname, unicode))
238+
239+            if fname not in filenames:
240+                self.fail("Cannot find %r in %r" % (fname, filenames))
241+
242+    @patch('os.open')
243+    def test_open_unicode(self, mock):
244+
245+        self.failUnlessRaises(IOError,
246+                              open_unicode,
247+                              u'/dummy_directory/lumière.txt')
248+
249+
250+class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
251+    uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
252+    argv = 'lumi\xc3\xa8re'
253+    platform = 'linux2'
254+    filesystemencoding = 'UTF-8'
255+    stdoutencoding = 'UTF-8'
256+    dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
257+
258+
259+class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
260+    uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
261+    argv = 'lumi\xe8re'
262+    platform = 'linux2'
263+    filesystemencoding = 'ISO-8859-1'
264+    stdoutencoding = 'ISO-8859-1'
265+    dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
266+
267+class WindowsXP(StringUtils, unittest.TestCase):
268+    uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
269+    argv = 'lumi\xe8re'
270+    platform = 'win32'
271+    filesystemencoding = 'mbcs'
272+    stdoutencoding = 'cp850'
273+    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
274+
275+    todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565."
276+
277+class WindowsXP_UTF8(StringUtils, unittest.TestCase):
278+    uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
279+    argv = 'lumi\xe8re'
280+    platform = 'win32'
281+    filesystemencoding = 'mbcs'
282+    stdoutencoding = 'cp65001'
283+    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
284+
285+    todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565."
286+
287+class WindowsVista(StringUtils, unittest.TestCase):
288+    uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
289+    argv = 'lumi\xe8re'
290+    platform = 'win32'
291+    filesystemencoding = 'mbcs'
292+    stdoutencoding = 'cp850'
293+    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
294+
295+    todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565."
296+
297+class MacOSXLeopard(StringUtils, unittest.TestCase):
298+    uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
299+    argv = 'lumi\xc3\xa8re'
300+    platform = 'darwin'
301+    filesystemencoding = 'utf-8'
302+    stdoutencoding = 'UTF-8'
303+    dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
304+
305+class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
306+    uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
307+    #argv = 'lumiere'
308+    platform = 'darwin'
309+    filesystemencoding = 'utf-8'
310+    stdoutencoding = 'US-ASCII'
311+    dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
312addfile ./src/allmydata/util/stringutils.py
313hunk ./src/allmydata/util/stringutils.py 1
314+"""
315+Functions used to convert inputs from whatever encoding used in the system to
316+unicode and back.
317+"""
318+
319+import sys
320+import os
321+import unicodedata
322+from allmydata.util.assertutil import precondition
323+from twisted.python import usage
324+
325+def get_term_encoding():
326+    """
327+    Returns expected encoding for writing to the terminal and reading
328+    arguments from the command-line.
329+    """
330+
331+    if sys.stdout.encoding == None:
332+        return 'ascii'
333+    else:
334+        return sys.stdout.encoding
335+
336+def argv_to_unicode(s):
337+    """
338+    Decode given argv element to unicode.
339+    """
340+    # Try to decode the command-line argument with the encoding returned by
341+    # get_term_encoding(), if this fails print an error message to the user.
342+
343+    precondition(isinstance(s, str), s)
344+
345+    try:
346+        return unicode(s, get_term_encoding())
347+    except UnicodeDecodeError:
348+        raise usage.UsageError("Argument '%s' cannot be decoded as %s." %
349+                               (s, get_term_encoding()))
350+
351+def unicode_to_url(s):
352+    """
353+    Encode an unicode object used in an URL.
354+    """
355+    # According to RFC 2718, non-ascii characters in url's must be UTF-8 encoded.
356+
357+    precondition(isinstance(s, unicode), s)
358+    return s.encode('utf-8')
359+
360+def unicode_to_stdout(s):
361+    """
362+    Encode an unicode object for representation on stdout.
363+    """
364+
365+    precondition(isinstance(s, unicode), s)
366+    return s.encode(get_term_encoding(), 'replace')
367+
368+def unicode_platform():
369+    """
370+    Does the current platform handle Unicode filenames natively ?
371+    """
372+
373+    return sys.platform in ('win32', 'darwin')
374+
375+class FilenameEncodingError(Exception):
376+    """
377+    Filename cannot be encoded using the current encoding of your filesystem
378+    (%s). Please configure your locale correctly or rename this file.
379+    """
380+
381+    pass
382+
383+def listdir_unicode_unix(path):
384+    """
385+    This function emulates an Unicode API under Unix similar to one available
386+    under Windows or MacOS X.
387+
388+    If badly encoded filenames are encountered, an exception is raised.
389+    """
390+    precondition(isinstance(path, unicode), path)
391+
392+    encoding = sys.getfilesystemencoding()
393+    try:
394+        byte_path = path.encode(encoding)
395+    except UnicodeEncodeError:
396+        raise FilenameEncodingError(path)
397+
398+    try:
399+        return [unicode(fn, encoding) for fn in os.listdir(byte_path)]
400+    except UnicodeDecodeError:
401+        raise FilenameEncodingError(fn)
402+
403+def listdir_unicode(path, encoding = None):
404+    """
405+    Wrapper around listdir() which provides safe access to the convenient
406+    Unicode API even under Unix.
407+    """
408+
409+    precondition(isinstance(path, unicode), path)
410+
411+    # On Windows and MacOS X, the Unicode API is used
412+    if unicode_platform():
413+        dirlist = os.listdir(path)
414+
415+    # On other platforms (ie. Unix systems), the byte-level API is used
416+    else:
417+        dirlist = listdir_unicode_unix(path)
418+
419+    # Normalize the resulting unicode filenames
420+    #
421+    # This prevents different OS from generating non-equal unicode strings for
422+    # the same filename representation
423+    return [unicodedata.normalize('NFC', fname) for fname in dirlist]
424+
425+def open_unicode(path, mode='r'):
426+    """
427+    Wrapper around open() which provides safe access to the convenient Unicode
428+    API even under Unix.
429+    """
430+
431+    precondition(isinstance(path, unicode), path)
432+
433+    if unicode_platform():
434+        return open(path, mode)
435+    else:
436+        encoding = sys.getfilesystemencoding()
437+
438+        try:
439+            return open(path.encode(encoding), mode)
440+        except UnicodeEncodeError:
441+            raise FilenameEncodingError(path)
442}
443[Fix handling of correctly encoded unicode filenames (#534)
444Francois Deppierraz <francois@ctrlaltdel.ch>**20100520004356
445 Ignore-this: 8a3a7df214a855f5a12dc0eeab6f2e39
446 
447 Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
448 backup', have been improved to correctly handle filenames containing non-ASCII
449 characters.
450   
451 In the case where Tahoe encounters a filename which cannot be decoded using the
452 system encoding, an error will be returned and the operation will fail.  Under
453 Linux, this typically happens when the filesystem contains filenames encoded
454 with another encoding, for instance latin1, than the system locale, for
455 instance UTF-8.  In such case, you'll need to fix your system with tools such
456 as 'convmv' before using Tahoe CLI.
457   
458 All CLI commands have been improved to support non-ASCII parameters such as
459 filenames and aliases on all supported Operating Systems except Windows as of
460 now.
461] {
462hunk ./NEWS 3
463 User visible changes in Tahoe-LAFS.  -*- outline -*-
464 
465+* Release 1.7.0
466+
467+** Bugfixes
468+
469+*** Unicode filenames handling
470+
471+Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
472+backup', have been improved to correctly handle filenames containing non-ASCII
473+characters.
474+
475+In the case where Tahoe encounters a filename which cannot be decoded using the
476+system encoding, an error will be returned and the operation will fail.  Under
477+Linux, this typically happens when the filesystem contains filenames encoded
478+with another encoding, for instance latin1, than the system locale, for
479+instance UTF-8.  In such case, you'll need to fix your system with tools such
480+as 'convmv' before using Tahoe CLI.
481+
482+All CLI commands have been improved to support non-ASCII parameters such as
483+filenames and aliases on all supported Operating Systems except Windows as of
484+now.
485+
486 * Release 1.6.1 (2010-02-27)
487 
488 ** Bugfixes
489hunk ./docs/frontends/CLI.txt 126
490 perspective on the graph of files and directories.
491 
492 Each tahoe node remembers a list of starting points, named "aliases",
493-in a file named ~/.tahoe/private/aliases . These aliases are short
494-strings that stand in for a directory read- or write- cap. If you use
495-the command line "ls" without any "[STARTING_DIR]:" argument, then it
496-will use the default alias, which is "tahoe", therefore "tahoe ls" has
497-the same effect as "tahoe ls tahoe:".  The same goes for the other
498-commands which can reasonably use a default alias: get, put, mkdir,
499-mv, and rm.
500+in a file named ~/.tahoe/private/aliases . These aliases are short UTF-8
501+encoded strings that stand in for a directory read- or write- cap. If
502+you use the command line "ls" without any "[STARTING_DIR]:" argument,
503+then it will use the default alias, which is "tahoe", therefore "tahoe
504+ls" has the same effect as "tahoe ls tahoe:".  The same goes for the
505+other commands which can reasonably use a default alias: get, put,
506+mkdir, mv, and rm.
507 
508 For backwards compatibility with Tahoe-1.0, if the "tahoe": alias is not
509 found in ~/.tahoe/private/aliases, the CLI will use the contents of
510hunk ./src/allmydata/scripts/cli.py 4
511 import os.path, re, sys, fnmatch
512 from twisted.python import usage
513 from allmydata.scripts.common import BaseOptions, get_aliases
514+from allmydata.util.stringutils import argv_to_unicode
515 
516 NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?")
517 
518hunk ./src/allmydata/scripts/cli.py 53
519 
520 class MakeDirectoryOptions(VDriveOptions):
521     def parseArgs(self, where=""):
522-        self.where = where
523+        self.where = argv_to_unicode(where)
524     longdesc = """Create a new directory, either unlinked or as a subdirectory."""
525 
526 class AddAliasOptions(VDriveOptions):
527hunk ./src/allmydata/scripts/cli.py 58
528     def parseArgs(self, alias, cap):
529-        self.alias = alias
530+        self.alias = argv_to_unicode(alias)
531         self.cap = cap
532 
533     def getSynopsis(self):
534hunk ./src/allmydata/scripts/cli.py 68
535 
536 class CreateAliasOptions(VDriveOptions):
537     def parseArgs(self, alias):
538-        self.alias = alias
539+        self.alias = argv_to_unicode(alias)
540 
541     def getSynopsis(self):
542         return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),)
543hunk ./src/allmydata/scripts/cli.py 87
544         ("json", None, "Show the raw JSON output"),
545         ]
546     def parseArgs(self, where=""):
547-        self.where = where
548+        self.where = argv_to_unicode(where)
549 
550     longdesc = """
551     List the contents of some portion of the grid.
552hunk ./src/allmydata/scripts/cli.py 122
553         # tahoe get FOO bar              # write to local file
554         # tahoe get tahoe:FOO bar        # same
555 
556-        self.from_file = arg1
557-        self.to_file = arg2
558+        self.from_file = argv_to_unicode(arg1)
559+
560+        if arg2:
561+            self.to_file = argv_to_unicode(arg2)
562+        else:
563+            self.to_file = None
564+
565         if self.to_file == "-":
566             self.to_file = None
567 
568hunk ./src/allmydata/scripts/cli.py 160
569         # see Examples below
570 
571         if arg1 is not None and arg2 is not None:
572-            self.from_file = arg1
573-            self.to_file = arg2
574+            self.from_file = argv_to_unicode(arg1)
575+            self.to_file =  argv_to_unicode(arg2)
576         elif arg1 is not None and arg2 is None:
577hunk ./src/allmydata/scripts/cli.py 163
578-            self.from_file = arg1 # might be "-"
579+            self.from_file = argv_to_unicode(arg1) # might be "-"
580             self.to_file = None
581         else:
582             self.from_file = None
583hunk ./src/allmydata/scripts/cli.py 168
584             self.to_file = None
585-        if self.from_file == "-":
586+        if self.from_file == u"-":
587             self.from_file = None
588 
589     def getSynopsis(self):
590hunk ./src/allmydata/scripts/cli.py 206
591     def parseArgs(self, *args):
592         if len(args) < 2:
593             raise usage.UsageError("cp requires at least two arguments")
594-        self.sources = args[:-1]
595-        self.destination = args[-1]
596+        self.sources = map(argv_to_unicode, args[:-1])
597+        self.destination = argv_to_unicode(args[-1])
598     def getSynopsis(self):
599         return "Usage: tahoe [options] cp FROM.. TO"
600     longdesc = """
601hunk ./src/allmydata/scripts/cli.py 237
602 
603 class RmOptions(VDriveOptions):
604     def parseArgs(self, where):
605-        self.where = where
606+        self.where = argv_to_unicode(where)
607 
608     def getSynopsis(self):
609         return "%s rm REMOTE_FILE" % (os.path.basename(sys.argv[0]),)
610hunk ./src/allmydata/scripts/cli.py 244
611 
612 class MvOptions(VDriveOptions):
613     def parseArgs(self, frompath, topath):
614-        self.from_file = frompath
615-        self.to_file = topath
616+        self.from_file = argv_to_unicode(frompath)
617+        self.to_file = argv_to_unicode(topath)
618 
619     def getSynopsis(self):
620         return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),)
621hunk ./src/allmydata/scripts/cli.py 263
622 
623 class LnOptions(VDriveOptions):
624     def parseArgs(self, frompath, topath):
625-        self.from_file = frompath
626-        self.to_file = topath
627+        self.from_file = argv_to_unicode(frompath)
628+        self.to_file = argv_to_unicode(topath)
629 
630     def getSynopsis(self):
631         return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),)
632hunk ./src/allmydata/scripts/cli.py 288
633         self['exclude'] = set()
634 
635     def parseArgs(self, localdir, topath):
636-        self.from_dir = localdir
637-        self.to_dir = topath
638+        self.from_dir = argv_to_unicode(localdir)
639+        self.to_dir = argv_to_unicode(topath)
640 
641     def getSynopsis(Self):
642         return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0])
643hunk ./src/allmydata/scripts/cli.py 346
644         ("info", "i", "Open the t=info page for the file"),
645         ]
646     def parseArgs(self, where=''):
647-        self.where = where
648+        self.where = argv_to_unicode(where)
649 
650     def getSynopsis(self):
651         return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
652hunk ./src/allmydata/scripts/cli.py 363
653         ("raw", "r", "Display raw JSON data instead of parsed"),
654         ]
655     def parseArgs(self, where=''):
656-        self.where = where
657+        self.where = argv_to_unicode(where)
658 
659     def getSynopsis(self):
660         return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
661hunk ./src/allmydata/scripts/cli.py 376
662         ("raw", "r", "Display raw JSON data instead of parsed"),
663         ]
664     def parseArgs(self, where=''):
665-        self.where = where
666+        self.where = argv_to_unicode(where)
667 
668     def getSynopsis(self):
669         return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
670hunk ./src/allmydata/scripts/cli.py 392
671         ("add-lease", None, "Add/renew lease on all shares"),
672         ]
673     def parseArgs(self, where=''):
674-        self.where = where
675+        self.where = argv_to_unicode(where)
676 
677     def getSynopsis(self):
678         return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
679hunk ./src/allmydata/scripts/cli.py 411
680         ("verbose", "v", "Be noisy about what is happening."),
681         ]
682     def parseArgs(self, where=''):
683-        self.where = where
684+        self.where = argv_to_unicode(where)
685 
686     def getSynopsis(self):
687         return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
688hunk ./src/allmydata/scripts/common.py 3
689 
690 import os, sys, urllib
691+import codecs
692 from twisted.python import usage
693hunk ./src/allmydata/scripts/common.py 5
694-
695+from allmydata.util.stringutils import unicode_to_url
696+from allmydata.util.assertutil import precondition
697 
698 class BaseOptions:
699     # unit tests can override these to point at StringIO instances
700hunk ./src/allmydata/scripts/common.py 105
701     except EnvironmentError:
702         pass
703     try:
704-        f = open(aliasfile, "r")
705+        f = codecs.open(aliasfile, "r", "utf-8")
706         for line in f.readlines():
707             line = line.strip()
708             if line.startswith("#") or not line:
709hunk ./src/allmydata/scripts/common.py 112
710                 continue
711             name, cap = line.split(":", 1)
712             # normalize it: remove http: prefix, urldecode
713-            cap = cap.strip()
714+            cap = cap.strip().encode('utf-8')
715             aliases[name] = uri.from_string_dirnode(cap).to_string()
716     except EnvironmentError:
717         pass
718hunk ./src/allmydata/scripts/common.py 143
719     # and default is not found in aliases, an UnknownAliasError is
720     # raised.
721     path = path.strip()
722-    if uri.has_uri_prefix(path):
723+    if uri.has_uri_prefix(path.encode('utf-8')):
724         # We used to require "URI:blah:./foo" in order to get a subpath,
725         # stripping out the ":./" sequence. We still allow that for compatibility,
726         # but now also allow just "URI:blah/foo".
727hunk ./src/allmydata/scripts/common.py 185
728 
729 def escape_path(path):
730     segments = path.split("/")
731-    return "/".join([urllib.quote(s) for s in segments])
732+    return "/".join([urllib.quote(unicode_to_url(s)) for s in segments])
733hunk ./src/allmydata/scripts/tahoe_add_alias.py 3
734 
735 import os.path
736+import codecs
737+import sys
738 from allmydata import uri
739 from allmydata.scripts.common_http import do_http, check_http_error
740 from allmydata.scripts.common import get_aliases
741hunk ./src/allmydata/scripts/tahoe_add_alias.py 9
742 from allmydata.util.fileutil import move_into_place
743+from allmydata.util.stringutils import unicode_to_stdout
744+
745 
746 def add_line_to_aliasfile(aliasfile, alias, cap):
747     # we use os.path.exists, rather than catching EnvironmentError, to avoid
748hunk ./src/allmydata/scripts/tahoe_add_alias.py 17
749     # clobbering the valuable alias file in case of spurious or transient
750     # filesystem errors.
751     if os.path.exists(aliasfile):
752-        f = open(aliasfile, "r")
753+        f = codecs.open(aliasfile, "r", "utf-8")
754         aliases = f.read()
755         f.close()
756         if not aliases.endswith("\n"):
757hunk ./src/allmydata/scripts/tahoe_add_alias.py 25
758     else:
759         aliases = ""
760     aliases += "%s: %s\n" % (alias, cap)
761-    f = open(aliasfile+".tmp", "w")
762+    f = codecs.open(aliasfile+".tmp", "w", "utf-8")
763     f.write(aliases)
764     f.close()
765     move_into_place(aliasfile+".tmp", aliasfile)
766hunk ./src/allmydata/scripts/tahoe_add_alias.py 48
767 
768     add_line_to_aliasfile(aliasfile, alias, cap)
769 
770-    print >>stdout, "Alias '%s' added" % (alias,)
771+    print >>stdout, "Alias '%s' added" % (unicode_to_stdout(alias),)
772     return 0
773 
774 def create_alias(options):
775hunk ./src/allmydata/scripts/tahoe_add_alias.py 81
776 
777     add_line_to_aliasfile(aliasfile, alias, new_uri)
778 
779-    print >>stdout, "Alias '%s' created" % (alias,)
780+    print >>stdout, "Alias '%s' created" % (unicode_to_stdout(alias),)
781     return 0
782 
783 def list_aliases(options):
784hunk ./src/allmydata/scripts/tahoe_backup.py 12
785 from allmydata.scripts.common_http import do_http
786 from allmydata.util import time_format
787 from allmydata.scripts import backupdb
788+import sys
789+from allmydata.util.stringutils import unicode_to_stdout, listdir_unicode, open_unicode
790+from allmydata.util.assertutil import precondition
791+from twisted.python import usage
792+
793 
794 class HTTPError(Exception):
795     pass
796hunk ./src/allmydata/scripts/tahoe_backup.py 162
797 
798     def verboseprint(self, msg):
799         if self.verbosity >= 2:
800+            if isinstance(msg, unicode):
801+                msg = unicode_to_stdout(msg)
802+
803             print >>self.options.stdout, msg
804 
805     def warn(self, msg):
806hunk ./src/allmydata/scripts/tahoe_backup.py 171
807         print >>self.options.stderr, msg
808 
809     def process(self, localpath):
810+        precondition(isinstance(localpath, unicode), localpath)
811         # returns newdircap
812 
813         self.verboseprint("processing %s" % localpath)
814hunk ./src/allmydata/scripts/tahoe_backup.py 179
815         compare_contents = {} # childname -> rocap
816 
817         try:
818-            children = os.listdir(localpath)
819+            children = listdir_unicode(localpath)
820         except EnvironmentError:
821             self.directories_skipped += 1
822             self.warn("WARNING: permission denied on directory %s" % localpath)
823hunk ./src/allmydata/scripts/tahoe_backup.py 295
824 
825     # This function will raise an IOError exception when called on an unreadable file
826     def upload(self, childpath):
827+        precondition(isinstance(childpath, unicode), childpath)
828+
829         #self.verboseprint("uploading %s.." % childpath)
830         metadata = get_local_metadata(childpath)
831 
832hunk ./src/allmydata/scripts/tahoe_backup.py 305
833 
834         if must_upload:
835             self.verboseprint("uploading %s.." % childpath)
836-            infileobj = open(os.path.expanduser(childpath), "rb")
837+            infileobj = open_unicode(os.path.expanduser(childpath), "rb")
838             url = self.options['node-url'] + "uri"
839             resp = do_http("PUT", url, infileobj)
840             if resp.status not in (200, 201):
841hunk ./src/allmydata/scripts/tahoe_cp.py 5
842 import os.path
843 import urllib
844 import simplejson
845+import sys
846 from cStringIO import StringIO
847 from twisted.python.failure import Failure
848 from allmydata.scripts.common import get_alias, escape_path, \
849hunk ./src/allmydata/scripts/tahoe_cp.py 12
850                                      DefaultAliasMarker, UnknownAliasError
851 from allmydata.scripts.common_http import do_http
852 from allmydata import uri
853+from twisted.python import usage
854+from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode
855+from allmydata.util.assertutil import precondition
856+
857 
858 def ascii_or_none(s):
859     if s is None:
860hunk ./src/allmydata/scripts/tahoe_cp.py 78
861 
862 class LocalFileSource:
863     def __init__(self, pathname):
864+        precondition(isinstance(pathname, unicode), pathname)
865         self.pathname = pathname
866 
867     def need_to_copy_bytes(self):
868hunk ./src/allmydata/scripts/tahoe_cp.py 89
869 
870 class LocalFileTarget:
871     def __init__(self, pathname):
872+        precondition(isinstance(pathname, unicode), pathname)
873         self.pathname = pathname
874     def put_file(self, inf):
875         outf = open(self.pathname, "wb")
876hunk ./src/allmydata/scripts/tahoe_cp.py 102
877 
878 class LocalMissingTarget:
879     def __init__(self, pathname):
880+        precondition(isinstance(pathname, unicode), pathname)
881         self.pathname = pathname
882 
883     def put_file(self, inf):
884hunk ./src/allmydata/scripts/tahoe_cp.py 116
885 
886 class LocalDirectorySource:
887     def __init__(self, progressfunc, pathname):
888+        precondition(isinstance(pathname, unicode), pathname)
889+
890         self.progressfunc = progressfunc
891         self.pathname = pathname
892         self.children = None
893hunk ./src/allmydata/scripts/tahoe_cp.py 126
894         if self.children is not None:
895             return
896         self.children = {}
897-        children = os.listdir(self.pathname)
898+        children = listdir_unicode(self.pathname)
899         for i,n in enumerate(children):
900             self.progressfunc("examining %d of %d" % (i, len(children)))
901             pn = os.path.join(self.pathname, n)
902hunk ./src/allmydata/scripts/tahoe_cp.py 143
903 
904 class LocalDirectoryTarget:
905     def __init__(self, progressfunc, pathname):
906+        precondition(isinstance(pathname, unicode), pathname)
907+
908         self.progressfunc = progressfunc
909         self.pathname = pathname
910         self.children = None
911hunk ./src/allmydata/scripts/tahoe_cp.py 153
912         if self.children is not None:
913             return
914         self.children = {}
915-        children = os.listdir(self.pathname)
916+        children = listdir_unicode(self.pathname)
917         for i,n in enumerate(children):
918             self.progressfunc("examining %d of %d" % (i, len(children)))
919             pn = os.path.join(self.pathname, n)
920hunk ./src/allmydata/scripts/tahoe_cp.py 176
921         return LocalDirectoryTarget(self.progressfunc, pathname)
922 
923     def put_file(self, name, inf):
924+        precondition(isinstance(name, unicode), name)
925         pathname = os.path.join(self.pathname, name)
926hunk ./src/allmydata/scripts/tahoe_cp.py 178
927-        outf = open(pathname, "wb")
928+        outf = open_unicode(pathname, "wb")
929         while True:
930             data = inf.read(32768)
931             if not data:
932hunk ./src/allmydata/scripts/tahoe_cp.py 371
933                 if self.writecap:
934                     url = self.nodeurl + "/".join(["uri",
935                                                    urllib.quote(self.writecap),
936-                                                   urllib.quote(name.encode('utf-8'))])
937+                                                   urllib.quote(unicode_to_url(name))])
938                 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
939                                                       writecap, readcap, url)
940             elif data[0] == "dirnode":
941hunk ./src/allmydata/scripts/tahoe_ls.py 7
942 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
943                                      UnknownAliasError
944 from allmydata.scripts.common_http import do_http
945+from allmydata.util.stringutils import unicode_to_stdout
946 
947 def list(options):
948     nodeurl = options['node-url']
949hunk ./src/allmydata/scripts/tahoe_ls.py 134
950             line.append(ctime_s)
951         if not options["classify"]:
952             classify = ""
953-        line.append(name + classify)
954+        line.append(unicode_to_stdout(name) + classify)
955         if options["uri"]:
956             line.append(uri)
957         if options["readonly-uri"]:
958hunk ./src/allmydata/scripts/tahoe_manifest.py 88
959                 try:
960                     print >>stdout, d["cap"], "/".join(d["path"])
961                 except UnicodeEncodeError:
962-                    print >>stdout, d["cap"], "/".join([p.encode("utf-8")
963+                    print >>stdout, d["cap"], "/".join([unicode_to_stdout(p)
964                                                         for p in d["path"]])
965 
966 def manifest(options):
967hunk ./src/allmydata/scripts/tahoe_mkdir.py 5
968 import urllib
969 from allmydata.scripts.common_http import do_http, check_http_error
970 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError
971+from allmydata.util.stringutils import unicode_to_url
972 
973 def mkdir(options):
974     nodeurl = options['node-url']
975hunk ./src/allmydata/scripts/tahoe_mkdir.py 39
976         path = path[:-1]
977     # path (in argv) must be "/".join([s.encode("utf-8") for s in segments])
978     url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap),
979-                                           urllib.quote(path))
980+                                           urllib.quote(unicode_to_url(path)))
981     resp = do_http("POST", url)
982     check_http_error(resp, stderr)
983     new_uri = resp.read().strip()
984hunk ./src/allmydata/test/test_cli.py 9
985 import urllib
986 import re
987 import simplejson
988+import sys
989 
990 from allmydata.util import fileutil, hashutil, base32
991 from allmydata import uri
992hunk ./src/allmydata/test/test_cli.py 30
993 from twisted.internet import threads # CLI tests use deferToThread
994 from twisted.python import usage
995 
996+from allmydata.util.stringutils import listdir_unicode, open_unicode, \
997+     unicode_platform, FilenameEncodingError
998+
999 timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
1000 
1001 
1002hunk ./src/allmydata/test/test_cli.py 291
1003                    "work": "WA",
1004                    "c": "CA"}
1005         def ga1(path):
1006-            return get_alias(aliases, path, "tahoe")
1007+            return get_alias(aliases, path, u"tahoe")
1008         uses_lettercolon = common.platform_uses_lettercolon_drivename()
1009         self.failUnlessEqual(ga1("bare"), ("TA", "bare"))
1010         self.failUnlessEqual(ga1("baredir/file"), ("TA", "baredir/file"))
1011hunk ./src/allmydata/test/test_cli.py 386
1012         # default set to something that isn't in the aliases argument should
1013         # raise an UnknownAliasError.
1014         def ga4(path):
1015-            return get_alias(aliases, path, "badddefault:")
1016+            return get_alias(aliases, path, u"badddefault:")
1017         self.failUnlessRaises(common.UnknownAliasError, ga4, "afile")
1018         self.failUnlessRaises(common.UnknownAliasError, ga4, "a/dir/path/")
1019 
1020hunk ./src/allmydata/test/test_cli.py 394
1021             old = common.pretend_platform_uses_lettercolon
1022             try:
1023                 common.pretend_platform_uses_lettercolon = True
1024-                retval = get_alias(aliases, path, "baddefault:")
1025+                retval = get_alias(aliases, path, u"baddefault:")
1026             finally:
1027                 common.pretend_platform_uses_lettercolon = old
1028             return retval
1029hunk ./src/allmydata/test/test_cli.py 400
1030         self.failUnlessRaises(common.UnknownAliasError, ga5, "C:\\Windows")
1031 
1032+    def test_listdir_unicode_good(self):
1033+        basedir = u"cli/common/listdir_unicode_good"
1034+        fileutil.make_dirs(basedir)
1035+
1036+        files = (u'Lôzane', u'Bern', u'Genève')
1037+
1038+        for file in files:
1039+            open(os.path.join(basedir, file), "w").close()
1040+
1041+        for file in listdir_unicode(basedir):
1042+            self.failUnlessEqual(file in files, True)
1043+
1044+    def test_listdir_unicode_bad(self):
1045+        if unicode_platform():
1046+            raise unittest.SkipTest("This test doesn't make any sense on architecture which handle filenames natively as Unicode entities.")
1047+
1048+        basedir = u"cli/common/listdir_unicode_bad"
1049+        fileutil.make_dirs(basedir)
1050+
1051+        files = (u'Lôzane', u'Bern', u'Genève')
1052+
1053+        # We use a wrong encoding on purpose
1054+        if sys.getfilesystemencoding() == 'UTF-8':
1055+            encoding = 'latin1'
1056+        else:
1057+            encoding = 'UTF-8'
1058+
1059+        for file in files:
1060+            path = os.path.join(basedir, file).encode(encoding)
1061+            open(path, "w").close()
1062+
1063+        self.failUnlessRaises(FilenameEncodingError, listdir_unicode, basedir)
1064 
1065 class Help(unittest.TestCase):
1066 
1067hunk ./src/allmydata/test/test_cli.py 631
1068             self.failUnless(aliases["un-corrupted2"].startswith("URI:DIR2:"))
1069         d.addCallback(_check_not_corrupted)
1070 
1071-        return d
1072 
1073hunk ./src/allmydata/test/test_cli.py 632
1074+    def test_create_unicode(self):
1075+        if sys.getfilesystemencoding() not in ('UTF-8', 'mbcs'):
1076+            raise unittest.SkipTest("Arbitrary filenames are not supported by this platform")
1077+
1078+        if sys.stdout.encoding not in ('UTF-8'):
1079+            raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
1080+
1081+        self.basedir = "cli/CreateAlias/create_unicode"
1082+        self.set_up_grid()
1083+        aliasfile = os.path.join(self.get_clientdir(), "private", "aliases")
1084+
1085+        d = self.do_cli("create-alias", "études")
1086+        def _check_create_unicode((rc,stdout,stderr)):
1087+            self.failUnlessEqual(rc, 0)
1088+            self.failIf(stderr)
1089+
1090+            # If stdout only supports ascii, accentuated characters are
1091+            # being replaced by '?'
1092+            if sys.stdout.encoding == "ANSI_X3.4-1968":
1093+                self.failUnless("Alias '?tudes' created" in stdout)
1094+            else:
1095+                self.failUnless("Alias 'études' created" in stdout)
1096+
1097+            aliases = get_aliases(self.get_clientdir())
1098+            self.failUnless(aliases[u"études"].startswith("URI:DIR2:"))
1099+        d.addCallback(_check_create_unicode)
1100+
1101+        d.addCallback(lambda res: self.do_cli("ls", "études:"))
1102+        def _check_ls1((rc, stdout, stderr)):
1103+            self.failUnlessEqual(rc, 0)
1104+            self.failIf(stderr)
1105+
1106+            self.failUnlessEqual(stdout, "")
1107+        d.addCallback(_check_ls1)
1108+
1109+        d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt",
1110+          stdin="Blah blah blah"))
1111+
1112+        d.addCallback(lambda res: self.do_cli("ls", "études:"))
1113+        def _check_ls2((rc, stdout, stderr)):
1114+            self.failUnlessEqual(rc, 0)
1115+            self.failIf(stderr)
1116+
1117+            self.failUnlessEqual(stdout, "uploaded.txt\n")
1118+        d.addCallback(_check_ls2)
1119+
1120+        d.addCallback(lambda res: self.do_cli("get", "études:uploaded.txt"))
1121+        def _check_get((rc, stdout, stderr)):
1122+            self.failUnlessEqual(rc, 0)
1123+            self.failIf(stderr)
1124+            self.failUnlessEqual(stdout, "Blah blah blah")
1125+        d.addCallback(_check_get)
1126+
1127+        # Ensure that an Unicode filename in an Unicode alias works as expected
1128+        d.addCallback(lambda res: self.do_cli("put", "-", "études:lumière.txt",
1129+          stdin="Let the sunshine In!"))
1130+
1131+        d.addCallback(lambda res: self.do_cli("get",
1132+                      get_aliases(self.get_clientdir())[u"études"] + "/lumière.txt"))
1133+        def _check_get((rc, stdout, stderr)):
1134+            self.failUnlessEqual(rc, 0)
1135+            self.failIf(stderr)
1136+            self.failUnlessEqual(stdout, "Let the sunshine In!")
1137+        d.addCallback(_check_get)
1138+
1139+        return d
1140 
1141 class Ln(GridTestMixin, CLITestMixin, unittest.TestCase):
1142     def _create_test_file(self):
1143hunk ./src/allmydata/test/test_cli.py 969
1144         return d
1145 
1146 
1147+    def test_immutable_from_file_unicode(self):
1148+        if sys.stdout.encoding not in ('UTF-8'):
1149+            raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
1150+     
1151+        # tahoe put file.txt "à trier.txt"
1152+        self.basedir = os.path.dirname(self.mktemp())
1153+        self.set_up_grid()
1154+
1155+        rel_fn = os.path.join(self.basedir, "DATAFILE")
1156+        abs_fn = os.path.abspath(rel_fn)
1157+        # we make the file small enough to fit in a LIT file, for speed
1158+        DATA = "short file"
1159+        f = open(rel_fn, "w")
1160+        f.write(DATA)
1161+        f.close()
1162+
1163+        d = self.do_cli("create-alias", "tahoe")
1164+
1165+        d.addCallback(lambda res:
1166+                      self.do_cli("put", rel_fn, "à trier.txt"))
1167+        def _uploaded((rc,stdout,stderr)):
1168+            readcap = stdout.strip()
1169+            self.failUnless(readcap.startswith("URI:LIT:"))
1170+            self.failUnless("201 Created" in stderr, stderr)
1171+            self.readcap = readcap
1172+        d.addCallback(_uploaded)
1173+
1174+        d.addCallback(lambda res:
1175+                      self.do_cli("get", "tahoe:à trier.txt"))
1176+        d.addCallback(lambda (rc,stdout,stderr):
1177+                      self.failUnlessEqual(stdout, DATA))
1178+
1179+        return d
1180+
1181 class List(GridTestMixin, CLITestMixin, unittest.TestCase):
1182     def test_list(self):
1183         self.basedir = "cli/List/list"
1184hunk ./src/allmydata/test/test_cli.py 1284
1185                               o.parseOptions, ["onearg"])
1186 
1187     def test_unicode_filename(self):
1188+        if sys.getfilesystemencoding() not in ('UTF-8', 'mbcs'):
1189+            raise unittest.SkipTest("Arbitrary filenames are not supported by this platform")
1190+
1191+        if sys.stdout.encoding not in ('UTF-8'):
1192+            raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
1193+
1194         self.basedir = "cli/Cp/unicode_filename"
1195         self.set_up_grid()
1196hunk ./src/allmydata/test/test_cli.py 1292
1197+        d = self.do_cli("create-alias", "tahoe")
1198 
1199hunk ./src/allmydata/test/test_cli.py 1294
1200-        fn1 = os.path.join(self.basedir, "Ärtonwall")
1201+        # Use unicode strings when calling os functions
1202+        fn1 = os.path.join(self.basedir, u"Ärtonwall")
1203         DATA1 = "unicode file content"
1204         fileutil.write(fn1, DATA1)
1205 
1206hunk ./src/allmydata/test/test_cli.py 1299
1207-        fn2 = os.path.join(self.basedir, "Metallica")
1208-        DATA2 = "non-unicode file content"
1209-        fileutil.write(fn2, DATA2)
1210-
1211-        # Bug #534
1212-        # Assure that uploading a file whose name contains unicode character
1213-        # doesn't prevent further uploads in the same directory
1214-        d = self.do_cli("create-alias", "tahoe")
1215-        d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:"))
1216-        d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:"))
1217+        d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:"))
1218 
1219         d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
1220         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
1221hunk ./src/allmydata/test/test_cli.py 1304
1222 
1223+        fn2 = os.path.join(self.basedir, u"Metallica")
1224+        DATA2 = "non-unicode file content"
1225+        fileutil.write(fn2, DATA2)
1226+
1227+        d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:"))
1228+
1229         d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica"))
1230         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
1231 
1232hunk ./src/allmydata/test/test_cli.py 1313
1233+        d.addCallback(lambda res: self.do_cli("ls", "tahoe:"))
1234+        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, "Metallica\nÄrtonwall\n"))
1235+
1236         return d
1237hunk ./src/allmydata/test/test_cli.py 1317
1238-    test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms.  See issue ticket #534."
1239 
1240     def test_dangling_symlink_vs_recursion(self):
1241         if not hasattr(os, 'symlink'):
1242hunk ./src/allmydata/test/test_cli.py 1423
1243         return d
1244 
1245 
1246+class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase):
1247+    def test_unicode_mkdir(self):
1248+        self.basedir = os.path.dirname(self.mktemp())
1249+        self.set_up_grid()
1250+
1251+        d = self.do_cli("create-alias", "tahoe")
1252+        d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead"))
1253+
1254+        return d
1255+
1256+
1257 class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
1258 
1259     def writeto(self, path, data):
1260}
1261
1262Context:
1263
1264[docs: line-wrap README.txt
1265zooko@zooko.com**20100518174240
1266 Ignore-this: 670a02d360df7de51ebdcf4fae752577
1267] 
1268[Hush pyflakes warnings
1269Kevan Carstensen <kevan@isnotajoke.com>**20100515184344
1270 Ignore-this: fd602c3bba115057770715c36a87b400
1271] 
1272[setup: new improved misc/show-tool-versions.py
1273zooko@zooko.com**20100516050122
1274 Ignore-this: ce9b1de1b35b07d733e6cf823b66335a
1275] 
1276[Improve code coverage of the Tahoe2PeerSelector tests.
1277Kevan Carstensen <kevan@isnotajoke.com>**20100515032913
1278 Ignore-this: 793151b63ffa65fdae6915db22d9924a
1279] 
1280[Remove a comment that no longer makes sense.
1281Kevan Carstensen <kevan@isnotajoke.com>**20100514203516
1282 Ignore-this: 956983c7e7c7e4477215494dfce8f058
1283] 
1284[docs: update docs/architecture.txt to more fully and correctly explain the upload procedure
1285zooko@zooko.com**20100514043458
1286 Ignore-this: 538b6ea256a49fed837500342092efa3
1287] 
1288[Fix up the behavior of #778, per reviewers' comments
1289Kevan Carstensen <kevan@isnotajoke.com>**20100514004917
1290 Ignore-this: 9c20b60716125278b5456e8feb396bff
1291 
1292   - Make some important utility functions clearer and more thoroughly
1293     documented.
1294   - Assert in upload.servers_of_happiness that the buckets attributes
1295     of PeerTrackers passed to it are mutually disjoint.
1296   - Get rid of some silly non-Pythonisms that I didn't see when I first
1297     wrote these patches.
1298   - Make sure that should_add_server returns true when queried about a
1299     shnum that it doesn't know about yet.
1300   - Change Tahoe2PeerSelector.preexisting_shares to map a shareid to a set
1301     of peerids, alter dependencies to deal with that.
1302   - Remove upload.should_add_servers, because it is no longer necessary
1303   - Move upload.shares_of_happiness and upload.shares_by_server to a utility
1304     file.
1305   - Change some points in Tahoe2PeerSelector.
1306   - Compute servers_of_happiness using a bipartite matching algorithm that
1307     we know is optimal instead of an ad-hoc greedy algorithm that isn't.
1308   - Change servers_of_happiness to just take a sharemap as an argument,
1309     change its callers to merge existing_shares and used_peers before
1310     calling it.
1311   - Change an error message in the encoder to be more appropriate for
1312     servers of happiness.
1313   - Clarify the wording of an error message in immutable/upload.py
1314   - Refactor a happiness failure message to happinessutil.py, and make
1315     immutable/upload.py and immutable/encode.py use it.
1316   - Move the word "only" as far to the right as possible in failure
1317     messages.
1318   - Use a better definition of progress during peer selection.
1319   - Do read-only peer share detection queries in parallel, not sequentially.
1320   - Clean up logging semantics; print the query statistics whenever an
1321     upload is unsuccessful, not just in one case.
1322 
1323] 
1324[Alter the error message when an upload fails, per some comments in #778.
1325Kevan Carstensen <kevan@isnotajoke.com>**20091230210344
1326 Ignore-this: ba97422b2f9737c46abeb828727beb1
1327 
1328 When I first implemented #778, I just altered the error messages to refer to
1329 servers where they referred to shares. The resulting error messages weren't
1330 very good. These are a bit better.
1331] 
1332[Change "UploadHappinessError" to "UploadUnhappinessError"
1333Kevan Carstensen <kevan@isnotajoke.com>**20091205043037
1334 Ignore-this: 236b64ab19836854af4993bb5c1b221a
1335] 
1336[Alter the error message returned when peer selection fails
1337Kevan Carstensen <kevan@isnotajoke.com>**20091123002405
1338 Ignore-this: b2a7dc163edcab8d9613bfd6907e5166
1339 
1340 The Tahoe2PeerSelector returned either NoSharesError or NotEnoughSharesError
1341 for a variety of error conditions that weren't informatively described by them.
1342 This patch creates a new error, UploadHappinessError, replaces uses of
1343 NoSharesError and NotEnoughSharesError with it, and alters the error message
1344 raised with the errors to be more in line with the new servers_of_happiness
1345 behavior. See ticket #834 for more information.
1346] 
1347[Eliminate overcounting iof servers_of_happiness in Tahoe2PeerSelector; also reorganize some things.
1348Kevan Carstensen <kevan@isnotajoke.com>**20091118014542
1349 Ignore-this: a6cb032cbff74f4f9d4238faebd99868
1350] 
1351[Change stray "shares_of_happiness" to "servers_of_happiness"
1352Kevan Carstensen <kevan@isnotajoke.com>**20091116212459
1353 Ignore-this: 1c971ba8c3c4d2e7ba9f020577b28b73
1354] 
1355[Alter Tahoe2PeerSelector to make sure that it recognizes existing shares on readonly servers, fixing an issue in #778
1356Kevan Carstensen <kevan@isnotajoke.com>**20091116192805
1357 Ignore-this: 15289f4d709e03851ed0587b286fd955
1358] 
1359[Alter 'immutable/encode.py' and 'immutable/upload.py' to use servers_of_happiness instead of shares_of_happiness.
1360Kevan Carstensen <kevan@isnotajoke.com>**20091104111222
1361 Ignore-this: abb3283314820a8bbf9b5d0cbfbb57c8
1362] 
1363[Alter the signature of set_shareholders in IEncoder to add a 'servermap' parameter, which gives IEncoders enough information to perform a sane check for servers_of_happiness.
1364Kevan Carstensen <kevan@isnotajoke.com>**20091104033241
1365 Ignore-this: b3a6649a8ac66431beca1026a31fed94
1366] 
1367[Alter CiphertextDownloader to work with servers_of_happiness
1368Kevan Carstensen <kevan@isnotajoke.com>**20090924041932
1369 Ignore-this: e81edccf0308c2d3bedbc4cf217da197
1370] 
1371[Revisions of the #778 tests, per reviewers' comments
1372Kevan Carstensen <kevan@isnotajoke.com>**20100514012542
1373 Ignore-this: 735bbc7f663dce633caeb3b66a53cf6e
1374 
1375 - Fix comments and confusing naming.
1376 - Add tests for the new error messages suggested by David-Sarah
1377   and Zooko.
1378 - Alter existing tests for new error messages.
1379 - Make sure that the tests continue to work with the trunk.
1380 - Add a test for a mutual disjointedness assertion that I added to
1381   upload.servers_of_happiness.
1382 - Fix the comments to correctly reflect read-onlyness
1383 - Add a test for an edge case in should_add_server
1384 - Add an assertion to make sure that share redistribution works as it
1385   should
1386 - Alter tests to work with revised servers_of_happiness semantics
1387 - Remove tests for should_add_server, since that function no longer exists.
1388 - Alter tests to know about merge_peers, and to use it before calling
1389   servers_of_happiness.
1390 - Add tests for merge_peers.
1391 - Add Zooko's puzzles to the tests.
1392 - Edit encoding tests to expect the new kind of failure message.
1393 - Edit tests to expect error messages with the word "only" moved as far
1394   to the right as possible.
1395 - Extended and cleaned up some helper functions.
1396 - Changed some tests to call more appropriate helper functions.
1397 - Added a test for the failing redistribution algorithm
1398 - Added a test for the progress message
1399 - Added a test for the upper bound on readonly peer share discovery.
1400 
1401] 
1402[Alter various unit tests to work with the new happy behavior
1403Kevan Carstensen <kevan@isnotajoke.com>**20100107181325
1404 Ignore-this: 132032bbf865e63a079f869b663be34a
1405] 
1406[Replace "UploadHappinessError" with "UploadUnhappinessError" in tests.
1407Kevan Carstensen <kevan@isnotajoke.com>**20091205043453
1408 Ignore-this: 83f4bc50c697d21b5f4e2a4cd91862ca
1409] 
1410[Add tests for the behavior described in #834.
1411Kevan Carstensen <kevan@isnotajoke.com>**20091123012008
1412 Ignore-this: d8e0aa0f3f7965ce9b5cea843c6d6f9f
1413] 
1414[Re-work 'test_upload.py' to be more readable; add more tests for #778
1415Kevan Carstensen <kevan@isnotajoke.com>**20091116192334
1416 Ignore-this: 7e8565f92fe51dece5ae28daf442d659
1417] 
1418[Test Tahoe2PeerSelector to make sure that it recognizeses existing shares on readonly servers
1419Kevan Carstensen <kevan@isnotajoke.com>**20091109003735
1420 Ignore-this: 12f9b4cff5752fca7ed32a6ebcff6446
1421] 
1422[Add more tests for comment:53 in ticket #778
1423Kevan Carstensen <kevan@isnotajoke.com>**20091104112849
1424 Ignore-this: 3bb2edd299a944cc9586e14d5d83ec8c
1425] 
1426[Add a test for upload.shares_by_server
1427Kevan Carstensen <kevan@isnotajoke.com>**20091104111324
1428 Ignore-this: f9802e82d6982a93e00f92e0b276f018
1429] 
1430[Minor tweak to an existing test -- make the first server read-write, instead of read-only
1431Kevan Carstensen <kevan@isnotajoke.com>**20091104034232
1432 Ignore-this: a951a46c93f7f58dd44d93d8623b2aee
1433] 
1434[Alter tests to use the new form of set_shareholders
1435Kevan Carstensen <kevan@isnotajoke.com>**20091104033602
1436 Ignore-this: 3deac11fc831618d11441317463ef830
1437] 
1438[Refactor some behavior into a mixin, and add tests for the behavior described in #778
1439"Kevan Carstensen" <kevan@isnotajoke.com>**20091030091908
1440 Ignore-this: a6f9797057ca135579b249af3b2b66ac
1441] 
1442[Alter NoNetworkGrid to allow the creation of readonly servers for testing purposes.
1443Kevan Carstensen <kevan@isnotajoke.com>**20091018013013
1444 Ignore-this: e12cd7c4ddeb65305c5a7e08df57c754
1445] 
1446[Update 'docs/architecture.txt' to reflect readonly share discovery
1447kevan@isnotajoke.com**20100514003852
1448 Ignore-this: 7ead71b34df3b1ecfdcfd3cb2882e4f9
1449] 
1450[Alter the wording in docs/architecture.txt to more accurately describe the servers_of_happiness behavior.
1451Kevan Carstensen <kevan@isnotajoke.com>**20100428002455
1452 Ignore-this: 6eff7fa756858a1c6f73728d989544cc
1453] 
1454[Alter wording in 'interfaces.py' to be correct wrt #778
1455"Kevan Carstensen" <kevan@isnotajoke.com>**20091205034005
1456 Ignore-this: c9913c700ac14e7a63569458b06980e0
1457] 
1458[Update 'docs/configuration.txt' to reflect the servers_of_happiness behavior.
1459Kevan Carstensen <kevan@isnotajoke.com>**20091205033813
1460 Ignore-this: 5e1cb171f8239bfb5b565d73c75ac2b8
1461] 
1462[Clarify quickstart instructions for installing pywin32
1463david-sarah@jacaranda.org**20100511180300
1464 Ignore-this: d4668359673600d2acbc7cd8dd44b93c
1465] 
1466[web: add a simple test that you can load directory.xhtml
1467zooko@zooko.com**20100510063729
1468 Ignore-this: e49b25fa3c67b3c7a56c8b1ae01bb463
1469] 
1470[setup: fix typos in misc/show-tool-versions.py
1471zooko@zooko.com**20100510063615
1472 Ignore-this: 2181b1303a0e288e7a9ebd4c4855628
1473] 
1474[setup: show code-coverage tool versions in show-tools-versions.py
1475zooko@zooko.com**20100510062955
1476 Ignore-this: 4b4c68eb3780b762c8dbbd22b39df7cf
1477] 
1478[docs: update README, mv it to README.txt, update setup.py
1479zooko@zooko.com**20100504094340
1480 Ignore-this: 40e28ca36c299ea1fd12d3b91e5b421c
1481] 
1482[Dependency on Windmill test framework is not needed yet.
1483david-sarah@jacaranda.org**20100504161043
1484 Ignore-this: be088712bec650d4ef24766c0026ebc8
1485] 
1486[tests: pass z to tar so that BSD tar will know to ungzip
1487zooko@zooko.com**20100504090628
1488 Ignore-this: 1339e493f255e8fc0b01b70478f23a09
1489] 
1490[setup: update comments and URLs in setup.cfg
1491zooko@zooko.com**20100504061653
1492 Ignore-this: f97692807c74bcab56d33100c899f829
1493] 
1494[setup: reorder and extend the show-tool-versions script, the better to glean information about our new buildslaves
1495zooko@zooko.com**20100504045643
1496 Ignore-this: 836084b56b8d4ee8f1de1f4efb706d36
1497] 
1498[CLI: Support for https url in option --node-url
1499Francois Deppierraz <francois@ctrlaltdel.ch>**20100430185609
1500 Ignore-this: 1717176b4d27c877e6bc67a944d9bf34
1501 
1502 This patch modifies the regular expression used for verifying of '--node-url'
1503 parameter.  Support for accessing a Tahoe gateway over HTTPS was already
1504 present, thanks to Python's urllib.
1505 
1506] 
1507[backupdb.did_create_directory: use REPLACE INTO, not INSERT INTO + ignore error
1508Brian Warner <warner@lothar.com>**20100428050803
1509 Ignore-this: 1fca7b8f364a21ae413be8767161e32f
1510 
1511 This handles the case where we upload a new tahoe directory for a
1512 previously-processed local directory, possibly creating a new dircap (if the
1513 metadata had changed). Now we replace the old dirhash->dircap record. The
1514 previous behavior left the old record in place (with the old dircap and
1515 timestamps), so we'd never stop creating new directories and never converge
1516 on a null backup.
1517] 
1518["tahoe webopen": add --info flag, to get ?t=info
1519Brian Warner <warner@lothar.com>**20100424233003
1520 Ignore-this: 126b0bb6db340fabacb623d295eb45fa
1521 
1522 Also fix some trailing whitespace.
1523] 
1524[docs: install.html http-equiv refresh to quickstart.html
1525zooko@zooko.com**20100421165708
1526 Ignore-this: 52b4b619f9dde5886ae2cd7f1f3b734b
1527] 
1528[docs: install.html -> quickstart.html
1529zooko@zooko.com**20100421155757
1530 Ignore-this: 6084e203909306bed93efb09d0e6181d
1531 It is not called "installing" because that implies that it is going to change the configuration of your operating system. It is not called "building" because that implies that you need developer tools like a compiler. Also I added a stern warning against looking at the "InstallDetails" wiki page, which I have renamed to "AdvancedInstall".
1532] 
1533[Fix another typo in tahoe_storagespace munin plugin
1534david-sarah@jacaranda.org**20100416220935
1535 Ignore-this: ad1f7aa66b554174f91dfb2b7a3ea5f3
1536] 
1537[Add dependency on windmill >= 1.3
1538david-sarah@jacaranda.org**20100416190404
1539 Ignore-this: 4437a7a464e92d6c9012926b18676211
1540] 
1541[licensing: phrase the OpenSSL-exemption in the vocabulary of copyright instead of computer technology, and replicate the exemption from the GPL to the TGPPL
1542zooko@zooko.com**20100414232521
1543 Ignore-this: a5494b2f582a295544c6cad3f245e91
1544] 
1545[munin-tahoe_storagespace
1546freestorm77@gmail.com**20100221203626
1547 Ignore-this: 14d6d6a587afe1f8883152bf2e46b4aa
1548 
1549 Plugin configuration rename
1550 
1551] 
1552[setup: add licensing declaration for setuptools (noticed by the FSF compliance folks)
1553zooko@zooko.com**20100309184415
1554 Ignore-this: 2dfa7d812d65fec7c72ddbf0de609ccb
1555] 
1556[setup: fix error in licensing declaration from Shawn Willden, as noted by the FSF compliance division
1557zooko@zooko.com**20100309163736
1558 Ignore-this: c0623d27e469799d86cabf67921a13f8
1559] 
1560[CREDITS to Jacob Appelbaum
1561zooko@zooko.com**20100304015616
1562 Ignore-this: 70db493abbc23968fcc8db93f386ea54
1563] 
1564[desert-island-build-with-proper-versions
1565jacob@appelbaum.net**20100304013858] 
1566[docs: a few small edits to try to guide newcomers through the docs
1567zooko@zooko.com**20100303231902
1568 Ignore-this: a6aab44f5bf5ad97ea73e6976bc4042d
1569 These edits were suggested by my watching over Jake Appelbaum's shoulder as he completely ignored/skipped/missed install.html and also as he decided that debian.txt wouldn't help him with basic installation. Then I threw in a few docs edits that have been sitting around in my sandbox asking to be committed for months.
1570] 
1571[TAG allmydata-tahoe-1.6.1
1572david-sarah@jacaranda.org**20100228062314
1573 Ignore-this: eb5f03ada8ea953ee7780e7fe068539
1574] 
1575Patch bundle hash:
15761cac164f9367a123cd5f7968971916e3bcdffebb