Ticket #534: unicode-v3-minus-the-stdout-and-aliases-parts.patch.txt

File unicode-v3-minus-the-stdout-and-aliases-parts.patch.txt, 28.7 KB (added by zooko, at 2009-04-09T04:02:14Z)
Line 
1diff -rN -u old-unicode/docs/frontends/CLI.txt new-unicode/docs/frontends/CLI.txt
2--- old-unicode/docs/frontends/CLI.txt  2009-04-08 22:00:03.000000000 -0600
3+++ new-unicode/docs/frontends/CLI.txt  2009-04-08 22:00:07.000000000 -0600
4@@ -91,9 +91,21 @@
5 These commands also use a table of "aliases" to figure out which directory
6 they ought to use a starting point. This is explained in more detail below.
7 
8-In Tahoe v1.3.0, passing non-ascii characters to the cli is not guaranteed to
9-work, although it might work on your platform, especially if your platform
10-uses utf-8 encoding.
11+As of Tahoe v1.3.1, filenames containing non-ascii characters are
12+supported on the commande line if your terminal is correctly configured
13+for UTF-8 support. This is usually the case on moderns GNU/Linux
14+distributions.
15+
16+If your terminal doesn't support UTF-8, you will still be able to list
17+directories but non-ascii characters will be replaced by a question mark
18+(?) on display.
19+
20+Reading from and writing to files whose name contain non-ascii
21+characters is also supported when your system correctly understand them.
22+Under Unix, this is usually handled by locale settings. If Tahoe cannot
23+correctly decode a filename, it will raise an error. In such case,
24+you'll need to correct the name of your file, possibly with help from
25+tools such as convmv.
26 
27 === Starting Directories ===
28 
29diff -rN -u old-unicode/src/allmydata/scripts/cli.py new-unicode/src/allmydata/scripts/cli.py
30--- old-unicode/src/allmydata/scripts/cli.py    2009-04-08 22:00:05.000000000 -0600
31+++ new-unicode/src/allmydata/scripts/cli.py    2009-04-08 22:00:08.000000000 -0600
32@@ -1,6 +1,7 @@
33 import os.path, re, sys, fnmatch
34 from twisted.python import usage
35 from allmydata.scripts.common import BaseOptions, get_aliases
36+from allmydata.util.stringutils import argv_to_unicode
37 
38 NODEURL_RE=re.compile("http://([^:]*)(:([1-9][0-9]*))?")
39 
40@@ -49,7 +50,7 @@
41 
42 class MakeDirectoryOptions(VDriveOptions):
43     def parseArgs(self, where=""):
44-        self.where = where
45+        self.where = argv_to_unicode(where)
46     longdesc = """Create a new directory, either unlinked or as a subdirectory."""
47 
48 class AddAliasOptions(VDriveOptions):
49@@ -83,7 +84,7 @@
50         ("json", None, "Show the raw JSON output"),
51         ]
52     def parseArgs(self, where=""):
53-        self.where = where
54+        self.where = argv_to_unicode(where)
55 
56     longdesc = """List the contents of some portion of the virtual drive."""
57 
58@@ -94,8 +95,13 @@
59         # tahoe get FOO bar              # write to local file
60         # tahoe get tahoe:FOO bar        # same
61 
62-        self.from_file = arg1
63-        self.to_file = arg2
64+        self.from_file = argv_to_unicode(arg1)
65+
66+        if arg2:
67+            self.to_file = argv_to_unicode(arg2)
68+        else:
69+            self.to_file = None
70+
71         if self.to_file == "-":
72             self.to_file = None
73 
74@@ -131,15 +137,15 @@
75         # tahoe put bar tahoe:FOO        # same
76 
77         if arg1 is not None and arg2 is not None:
78-            self.from_file = arg1
79-            self.to_file = arg2
80+            self.from_file = argv_to_unicode(arg1)
81+            self.to_file =  argv_to_unicode(arg2)
82         elif arg1 is not None and arg2 is None:
83-            self.from_file = arg1 # might be "-"
84+            self.from_file = argv_to_unicode(arg1) # might be "-"
85             self.to_file = None
86         else:
87             self.from_file = None
88             self.to_file = None
89-        if self.from_file == "-":
90+        if self.from_file == u"-":
91             self.from_file = None
92 
93     def getSynopsis(self):
94@@ -176,28 +182,28 @@
95     def parseArgs(self, *args):
96         if len(args) < 2:
97             raise usage.UsageError("cp requires at least two arguments")
98-        self.sources = args[:-1]
99-        self.destination = args[-1]
100+        self.sources = map(argv_to_unicode, args[:-1])
101+        self.destination = argv_to_unicode(args[-1])
102 
103 class RmOptions(VDriveOptions):
104     def parseArgs(self, where):
105-        self.where = where
106+        self.where = argv_to_unicode(where)
107 
108     def getSynopsis(self):
109         return "%s rm VDRIVE_FILE" % (os.path.basename(sys.argv[0]),)
110 
111 class MvOptions(VDriveOptions):
112     def parseArgs(self, frompath, topath):
113-        self.from_file = frompath
114-        self.to_file = topath
115+        self.from_file = argv_to_unicode(frompath)
116+        self.to_file = argv_to_unicode(topath)
117 
118     def getSynopsis(self):
119         return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),)
120 
121 class LnOptions(VDriveOptions):
122     def parseArgs(self, frompath, topath):
123-        self.from_file = frompath
124-        self.to_file = topath
125+        self.from_file = argv_to_unicode(frompath)
126+        self.to_file = argv_to_unicode(topath)
127 
128     def getSynopsis(self):
129         return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),)
130@@ -221,8 +227,8 @@
131         self['exclude'] = set()
132 
133     def parseArgs(self, localdir, topath):
134-        self.from_dir = localdir
135-        self.to_dir = topath
136+        self.from_dir = argv_to_unicode(localdir)
137+        self.to_dir = argv_to_unicode(topath)
138 
139     def getSynopsis(Self):
140         return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0])
141@@ -270,7 +276,7 @@
142 
143 class WebopenOptions(VDriveOptions):
144     def parseArgs(self, where=''):
145-        self.where = where
146+        self.where = argv_to_unicode(where)
147 
148     def getSynopsis(self):
149         return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
150@@ -285,7 +291,7 @@
151         ("raw", "r", "Display raw JSON data instead of parsed"),
152         ]
153     def parseArgs(self, where=''):
154-        self.where = where
155+        self.where = argv_to_unicode(where)
156 
157     def getSynopsis(self):
158         return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
159@@ -297,7 +303,7 @@
160         ("raw", "r", "Display raw JSON data instead of parsed"),
161         ]
162     def parseArgs(self, where=''):
163-        self.where = where
164+        self.where = argv_to_unicode(where)
165 
166     def getSynopsis(self):
167         return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
168@@ -312,7 +318,7 @@
169         ("add-lease", None, "Add/renew lease on all shares"),
170         ]
171     def parseArgs(self, where=''):
172-        self.where = where
173+        self.where = argv_to_unicode(where)
174 
175     def getSynopsis(self):
176         return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
177@@ -328,7 +334,7 @@
178         ("verbose", "v", "Be noisy about what is happening."),
179         ]
180     def parseArgs(self, where=''):
181-        self.where = where
182+        self.where = argv_to_unicode(where)
183 
184     def getSynopsis(self):
185         return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
186diff -rN -u old-unicode/src/allmydata/scripts/common.py new-unicode/src/allmydata/scripts/common.py
187--- old-unicode/src/allmydata/scripts/common.py 2009-04-08 22:00:05.000000000 -0600
188+++ new-unicode/src/allmydata/scripts/common.py 2009-04-08 22:00:08.000000000 -0600
189@@ -1,7 +1,8 @@
190 
191 import os, sys, urllib
192+import codecs
193 from twisted.python import usage
194-
195+from allmydata.util.stringutils import unicode_to_url
196 
197 class BaseOptions:
198     # unit tests can override these to point at StringIO instances
199@@ -107,7 +108,7 @@
200                 continue
201             name, cap = line.split(":", 1)
202             # normalize it: remove http: prefix, urldecode
203-            cap = cap.strip()
204+            cap = cap.strip().encode('ascii')
205             aliases[name] = uri.from_string_dirnode(cap).to_string()
206     except EnvironmentError:
207         pass
208@@ -163,4 +164,4 @@
209 
210 def escape_path(path):
211     segments = path.split("/")
212-    return "/".join([urllib.quote(s) for s in segments])
213+    return "/".join([urllib.quote(unicode_to_url(s)) for s in segments])
214diff -rN -u old-unicode/src/allmydata/scripts/tahoe_backup.py new-unicode/src/allmydata/scripts/tahoe_backup.py
215--- old-unicode/src/allmydata/scripts/tahoe_backup.py   2009-04-08 22:00:05.000000000 -0600
216+++ new-unicode/src/allmydata/scripts/tahoe_backup.py   2009-04-08 22:00:09.000000000 -0600
217@@ -4,11 +4,15 @@
218 import urllib
219 import simplejson
220 import datetime
221+import sys
222 from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS
223 from allmydata.scripts.common_http import do_http
224 from allmydata import uri
225 from allmydata.util import time_format
226 from allmydata.scripts import backupdb
227+from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs
228+from allmydata.util.assertutil import precondition
229+from twisted.python import usage
230 
231 class HTTPError(Exception):
232     pass
233@@ -248,6 +252,7 @@
234             print >>self.options.stdout, msg
235 
236     def process(self, localpath, olddircap):
237+        precondition(isinstance(localpath, unicode), localpath)
238         # returns newdircap
239 
240         self.verboseprint("processing %s, olddircap %s" % (localpath, olddircap))
241@@ -256,7 +261,8 @@
242             olddircontents = self.readdir(olddircap)
243 
244         newdircontents = {} # childname -> (type, rocap, metadata)
245-        for child in self.options.filter_listdir(os.listdir(localpath)):
246+        for child in self.options.filter_listdir(os.listdir(unicode_to_fs(localpath))):
247+            child = fs_to_unicode(child)
248             childpath = os.path.join(localpath, child)
249             if os.path.isdir(childpath):
250                 metadata = get_local_metadata(childpath)
251@@ -342,6 +348,8 @@
252         return contents
253 
254     def upload(self, childpath):
255+        precondition(isinstance(childpath, unicode), childpath)
256+
257         #self.verboseprint("uploading %s.." % childpath)
258         metadata = get_local_metadata(childpath)
259 
260@@ -350,7 +358,7 @@
261 
262         if must_upload:
263             self.verboseprint("uploading %s.." % childpath)
264-            infileobj = open(os.path.expanduser(childpath), "rb")
265+            infileobj = open(unicode_to_fs(os.path.expanduser(childpath)), "rb")
266             url = self.options['node-url'] + "uri"
267             resp = do_http("PUT", url, infileobj)
268             if resp.status not in (200, 201):
269diff -rN -u old-unicode/src/allmydata/scripts/tahoe_cp.py new-unicode/src/allmydata/scripts/tahoe_cp.py
270--- old-unicode/src/allmydata/scripts/tahoe_cp.py       2009-04-08 22:00:05.000000000 -0600
271+++ new-unicode/src/allmydata/scripts/tahoe_cp.py       2009-04-08 22:00:09.000000000 -0600
272@@ -4,9 +4,13 @@
273 import simplejson
274 from cStringIO import StringIO
275 from twisted.python.failure import Failure
276+import sys
277 from allmydata.scripts.common import get_alias, escape_path, DefaultAliasMarker
278 from allmydata.scripts.common_http import do_http
279 from allmydata import uri
280+from twisted.python import usage
281+from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs, unicode_to_url
282+from allmydata.util.assertutil import precondition
283 
284 def ascii_or_none(s):
285     if s is None:
286@@ -69,6 +73,7 @@
287 
288 class LocalFileSource:
289     def __init__(self, pathname):
290+        precondition(isinstance(pathname, unicode), pathname)
291         self.pathname = pathname
292 
293     def need_to_copy_bytes(self):
294@@ -79,6 +84,7 @@
295 
296 class LocalFileTarget:
297     def __init__(self, pathname):
298+        precondition(isinstance(pathname, unicode), pathname)
299         self.pathname = pathname
300     def put_file(self, inf):
301         outf = open(self.pathname, "wb")
302@@ -91,6 +97,7 @@
303 
304 class LocalMissingTarget:
305     def __init__(self, pathname):
306+        precondition(isinstance(pathname, unicode), pathname)
307         self.pathname = pathname
308 
309     def put_file(self, inf):
310@@ -104,6 +111,8 @@
311 
312 class LocalDirectorySource:
313     def __init__(self, progressfunc, pathname):
314+        precondition(isinstance(pathname, unicode), pathname)
315+
316         self.progressfunc = progressfunc
317         self.pathname = pathname
318         self.children = None
319@@ -112,8 +121,9 @@
320         if self.children is not None:
321             return
322         self.children = {}
323-        children = os.listdir(self.pathname)
324+        children = os.listdir(unicode_to_fs(self.pathname))
325         for i,n in enumerate(children):
326+            n = fs_to_unicode(n)
327             self.progressfunc("examining %d of %d" % (i, len(children)))
328             pn = os.path.join(self.pathname, n)
329             if os.path.isdir(pn):
330@@ -129,6 +139,8 @@
331 
332 class LocalDirectoryTarget:
333     def __init__(self, progressfunc, pathname):
334+        precondition(isinstance(pathname, unicode), pathname)
335+
336         self.progressfunc = progressfunc
337         self.pathname = pathname
338         self.children = None
339@@ -137,8 +149,9 @@
340         if self.children is not None:
341             return
342         self.children = {}
343-        children = os.listdir(self.pathname)
344+        children = os.listdir(unicode_to_fs(self.pathname))
345         for i,n in enumerate(children):
346+            n = fs_to_unicode(n)
347             self.progressfunc("examining %d of %d" % (i, len(children)))
348             pn = os.path.join(self.pathname, n)
349             if os.path.isdir(pn):
350@@ -160,8 +173,9 @@
351         return LocalDirectoryTarget(self.progressfunc, pathname)
352 
353     def put_file(self, name, inf):
354+        precondition(isinstance(name, unicode), name)
355         pathname = os.path.join(self.pathname, name)
356-        outf = open(pathname, "wb")
357+        outf = open(unicode_to_fs(pathname), "wb")
358         while True:
359             data = inf.read(32768)
360             if not data:
361@@ -350,7 +364,7 @@
362                 if self.writecap:
363                     url = self.nodeurl + "/".join(["uri",
364                                                    urllib.quote(self.writecap),
365-                                                   urllib.quote(name.encode('utf-8'))])
366+                                                   urllib.quote(unicode_to_url(name))])
367                 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
368                                                       writecap, readcap, url)
369             else:
370diff -rN -u old-unicode/src/allmydata/scripts/tahoe_ls.py new-unicode/src/allmydata/scripts/tahoe_ls.py
371--- old-unicode/src/allmydata/scripts/tahoe_ls.py       2009-04-08 22:00:05.000000000 -0600
372+++ new-unicode/src/allmydata/scripts/tahoe_ls.py       2009-04-08 22:00:09.000000000 -0600
373@@ -82,17 +82,17 @@
374         if childtype == "dirnode":
375             t0 = "d"
376             size = "-"
377-            classify = "/"
378+            classify = u"/"
379         elif childtype == "filenode":
380             t0 = "-"
381             size = str(child[1]['size'])
382-            classify = ""
383+            classify = u""
384             if rw_uri:
385-                classify = "*"
386+                classify = u"*"
387         else:
388             t0 = "?"
389             size = "?"
390-            classify = "?"
391+            classify = u"?"
392         t1 = "-"
393         if ro_uri:
394             t1 = "r"
395@@ -111,7 +111,7 @@
396             line.append(size)
397             line.append(ctime_s)
398         if not options["classify"]:
399-            classify = ""
400+            classify = u""
401         line.append(name + classify)
402         if options["uri"]:
403             line.append(uri)
404@@ -135,13 +135,13 @@
405         left_justifys[0] = True
406     fmt_pieces = []
407     for i in range(len(max_widths)):
408-        piece = "%"
409+        piece = u"%"
410         if left_justifys[i]:
411-            piece += "-"
412+            piece += u"-"
413         piece += str(max_widths[i])
414-        piece += "s"
415+        piece += u"s"
416         fmt_pieces.append(piece)
417-    fmt = " ".join(fmt_pieces)
418+    fmt = u" ".join(fmt_pieces)
419     for row in rows:
420         print >>stdout, (fmt % tuple(row)).rstrip()
421 
422diff -rN -u old-unicode/src/allmydata/scripts/tahoe_manifest.py new-unicode/src/allmydata/scripts/tahoe_manifest.py
423--- old-unicode/src/allmydata/scripts/tahoe_manifest.py 2009-04-08 22:00:05.000000000 -0600
424+++ new-unicode/src/allmydata/scripts/tahoe_manifest.py 2009-04-08 22:00:09.000000000 -0600
425@@ -78,10 +78,15 @@
426                     print >>stdout, vc
427             else:
428                 try:
429-                    print >>stdout, d["cap"], "/".join(d["path"])
430+                    print >>stdout, d["cap"], u"/".join(d["path"])
431                 except UnicodeEncodeError:
432-                    print >>stdout, d["cap"], "/".join([p.encode("utf-8")
433-                                                        for p in d["path"]])
434+                    # Perhaps python and/or the local system is misconfigured
435+                    # and actually it should have used utf-8.  See ticket #534
436+                    # about the questionable practice of second-guessing
437+                    # python+system-config like this.  (And how 'utf-16le'
438+                    # might be a better second-guess on Windows.)
439+                    print >>stdout, d["cap"].encode('utf-8'),
440+                        "/".join([p.encode('utf-8') for p in d["path"]])
441 
442 def manifest(options):
443     return ManifestStreamer().run(options)
444diff -rN -u old-unicode/src/allmydata/scripts/tahoe_mkdir.py new-unicode/src/allmydata/scripts/tahoe_mkdir.py
445--- old-unicode/src/allmydata/scripts/tahoe_mkdir.py    2009-04-08 22:00:05.000000000 -0600
446+++ new-unicode/src/allmydata/scripts/tahoe_mkdir.py    2009-04-08 22:00:09.000000000 -0600
447@@ -2,6 +2,7 @@
448 import urllib
449 from allmydata.scripts.common_http import do_http, check_http_error
450 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS
451+from allmydata.util.stringutils import unicode_to_url
452 
453 def mkdir(options):
454     nodeurl = options['node-url']
455@@ -31,7 +32,7 @@
456         path = path[:-1]
457     # path (in argv) must be "/".join([s.encode("utf-8") for s in segments])
458     url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap),
459-                                           urllib.quote(path))
460+                                           urllib.quote(unicode_to_url(path)))
461     resp = do_http("POST", url)
462     check_http_error(resp, stderr)
463     new_uri = resp.read().strip()
464diff -rN -u old-unicode/src/allmydata/test/test_cli.py new-unicode/src/allmydata/test/test_cli.py
465--- old-unicode/src/allmydata/test/test_cli.py  2009-04-08 22:00:05.000000000 -0600
466+++ new-unicode/src/allmydata/test/test_cli.py  2009-04-08 22:00:09.000000000 -0600
467@@ -1,5 +1,6 @@
468 # coding=utf-8
469 
470+import sys
471 import os.path
472 from twisted.trial import unittest
473 from cStringIO import StringIO
474@@ -518,6 +519,41 @@
475             self._test_webopen(["two:"], self.two_url)
476         d.addCallback(_test_urls)
477 
478+        d.addCallback(lambda res: self.do_cli("create-alias", "études"))
479+        def _check_create_unicode((rc,stdout,stderr)):
480+            self.failUnlessEqual(rc, 0)
481+            self.failIf(stderr)
482+
483+            # If stdout only supports ascii, accentuated characters are
484+            # being replaced by '?'
485+            if sys.stdout.encoding == "ANSI_X3.4-1968":
486+                self.failUnless("Alias '?tudes' created" in stdout)
487+            else:
488+                self.failUnless("Alias 'études' created" in stdout)
489+
490+            aliases = get_aliases(self.get_clientdir())
491+            self.failUnless(aliases[u"études"].startswith("URI:DIR2:"))
492+        d.addCallback(_check_create_unicode)
493+
494+        d.addCallback(lambda res: self.do_cli("ls", "études:"))
495+        def _check_ls1((rc, stdout, stderr)):
496+            self.failUnlessEqual(rc, 0)
497+            self.failIf(stderr)
498+
499+            self.failUnlessEqual(stdout, "")
500+        d.addCallback(_check_ls1)
501+
502+        d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt",
503+          stdin="Blah blah blah"))
504+
505+        d.addCallback(lambda res: self.do_cli("ls", "études:"))
506+        def _check_ls2((rc, stdout, stderr)):
507+            self.failUnlessEqual(rc, 0)
508+            self.failIf(stderr)
509+
510+            self.failUnlessEqual(stdout, "uploaded.txt\n")
511+        d.addCallback(_check_ls2)
512+
513         return d
514 
515 class Put(GridTestMixin, CLITestMixin, unittest.TestCase):
516@@ -739,6 +775,37 @@
517         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
518         return d
519 
520+    def test_immutable_from_file_unicode(self):
521+        # tahoe put file.txt "à trier.txt"
522+        self.basedir = os.path.dirname(self.mktemp())
523+        self.set_up_grid()
524+
525+        rel_fn = os.path.join(self.basedir, "DATAFILE")
526+        abs_fn = os.path.abspath(rel_fn)
527+        # we make the file small enough to fit in a LIT file, for speed
528+        DATA = "short file"
529+        f = open(rel_fn, "w")
530+        f.write(DATA)
531+        f.close()
532+
533+        d = self.do_cli("create-alias", "tahoe")
534+
535+        d.addCallback(lambda res:
536+                      self.do_cli("put", rel_fn, "à trier.txt"))
537+        def _uploaded((rc,stdout,stderr)):
538+            readcap = stdout.strip()
539+            self.failUnless(readcap.startswith("URI:LIT:"))
540+            self.failUnless("201 Created" in stderr, stderr)
541+            self.readcap = readcap
542+        d.addCallback(_uploaded)
543+
544+        d.addCallback(lambda res:
545+                      self.do_cli("get", "tahoe:à trier.txt"))
546+        d.addCallback(lambda (rc,stdout,stderr):
547+                      self.failUnlessEqual(stdout, DATA))
548+
549+        return d
550+
551 class List(GridTestMixin, CLITestMixin, unittest.TestCase):
552     def test_list(self):
553         self.basedir = "cli/List/list"
554@@ -795,30 +862,37 @@
555     def test_unicode_filename(self):
556         self.basedir = "cli/Cp/unicode_filename"
557         self.set_up_grid()
558+        d = self.do_cli("create-alias", "tahoe")
559+
560+        # Use unicode strings when calling os functions
561+        if sys.getfilesystemencoding() == "ANSI_X3.4-1968":
562+            fn1 = os.path.join(self.basedir, u"Artonwall")
563+        else:
564+            fn1 = os.path.join(self.basedir, u"Ärtonwall")
565 
566-        fn1 = os.path.join(self.basedir, "Ärtonwall")
567         DATA1 = "unicode file content"
568         open(fn1, "wb").write(DATA1)
569+        d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:Ärtonwall"))
570+
571+        d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
572+        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
573 
574-        fn2 = os.path.join(self.basedir, "Metallica")
575+
576+        fn2 = os.path.join(self.basedir, u"Metallica")
577         DATA2 = "non-unicode file content"
578         open(fn2, "wb").write(DATA2)
579 
580         # Bug #534
581         # Assure that uploading a file whose name contains unicode character doesn't
582         # prevent further uploads in the same directory
583-        d = self.do_cli("create-alias", "tahoe")
584-        d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:"))
585-        d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:"))
586-
587-        d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
588-        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
589+        d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:"))
590 
591         d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica"))
592         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
593 
594+        d.addCallback(lambda res: self.do_cli("ls", "tahoe:"))
595+
596         return d
597-    test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms.  See issue ticket #534."
598 
599     def test_dangling_symlink_vs_recursion(self):
600         if not hasattr(os, 'symlink'):
601@@ -837,6 +911,17 @@
602                                               dn, "tahoe:"))
603         return d
604 
605+class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase):
606+    def test_unicode_mkdir(self):
607+        self.basedir = os.path.dirname(self.mktemp())
608+        self.set_up_grid()
609+
610+        d = self.do_cli("create-alias", "tahoe")
611+        d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead"))
612+
613+        return d
614+
615+
616 class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
617 
618     def writeto(self, path, data):
619@@ -871,6 +956,11 @@
620         self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
621         self.writeto("parent/blah.txt", "blah")
622 
623+        if sys.getfilesystemencoding() == "ANSI_X3.4-1968":
624+            self.writeto(u"parent/artonwall.txt", "Marmelade Jacuzzi")
625+        else:
626+            self.writeto(u"parent/ärtonwall.txt", "Marmelade Jacuzzi")
627+
628         def do_backup(use_backupdb=True, verbose=False):
629             cmd = ["backup"]
630             if not have_bdb or not use_backupdb:
631@@ -895,8 +985,8 @@
632             self.failUnlessEqual(err, "")
633             self.failUnlessEqual(rc, 0)
634             fu, fr, dc, dr = self.count_output(out)
635-            # foo.txt, bar.txt, blah.txt
636-            self.failUnlessEqual(fu, 3)
637+            # foo.txt, bar.txt, blah.txt, ärtonwall.txt
638+            self.failUnlessEqual(fu, 4)
639             self.failUnlessEqual(fr, 0)
640             # empty, home, home/parent, home/parent/subdir
641             self.failUnlessEqual(dc, 4)
642@@ -945,9 +1035,9 @@
643             self.failUnlessEqual(rc, 0)
644             if have_bdb:
645                 fu, fr, dc, dr = self.count_output(out)
646-                # foo.txt, bar.txt, blah.txt
647+                # foo.txt, bar.txt, blah.txt, ärtonwall.txt
648                 self.failUnlessEqual(fu, 0)
649-                self.failUnlessEqual(fr, 3)
650+                self.failUnlessEqual(fr, 4)
651                 # empty, home, home/parent, home/parent/subdir
652                 self.failUnlessEqual(dc, 0)
653                 self.failUnlessEqual(dr, 4)
654@@ -975,9 +1065,9 @@
655                 self.failUnlessEqual(rc, 0)
656                 fu, fr, dc, dr = self.count_output(out)
657                 fchecked, dchecked, dread = self.count_output2(out)
658-                self.failUnlessEqual(fchecked, 3)
659+                self.failUnlessEqual(fchecked, 4)
660                 self.failUnlessEqual(fu, 0)
661-                self.failUnlessEqual(fr, 3)
662+                self.failUnlessEqual(fr, 4)
663                 # TODO: backupdb doesn't do dirs yet; when it does, this will
664                 # change to dchecked=4, and maybe dread=0
665                 self.failUnlessEqual(dchecked, 0)
666@@ -1023,8 +1113,8 @@
667                 fu, fr, dc, dr = self.count_output(out)
668                 # new foo.txt, surprise file, subfile, empty
669                 self.failUnlessEqual(fu, 4)
670-                # old bar.txt
671-                self.failUnlessEqual(fr, 1)
672+                # old bar.txt, ärtonwall.txt
673+                self.failUnlessEqual(fr, 2)
674                 # home, parent, subdir, blah.txt, surprisedir
675                 self.failUnlessEqual(dc, 5)
676                 self.failUnlessEqual(dr, 0)
677@@ -1063,7 +1153,7 @@
678             self.failUnlessEqual(err, "")
679             self.failUnlessEqual(rc, 0)
680             fu, fr, dc, dr = self.count_output(out)
681-            self.failUnlessEqual(fu, 5)
682+            self.failUnlessEqual(fu, 6)
683             self.failUnlessEqual(fr, 0)
684             self.failUnlessEqual(dc, 0)
685             self.failUnlessEqual(dr, 5)
686diff -rN -u old-unicode/src/allmydata/util/stringutils.py new-unicode/src/allmydata/util/stringutils.py
687--- old-unicode/src/allmydata/util/stringutils.py       1969-12-31 17:00:00.000000000 -0700
688+++ new-unicode/src/allmydata/util/stringutils.py       2009-04-08 22:00:09.000000000 -0600
689@@ -0,0 +1,62 @@
690+"""
691+Functions used to convert inputs from whatever encoding used in the system to
692+unicode and back.
693+
694+TODO:
695+  * Accept two cli arguments --argv-encoding and --filesystem-encoding
696+"""
697+
698+import sys
699+from allmydata.util.assertutil import precondition
700+from twisted.python import usage
701+
702+def argv_to_unicode(s):
703+    """
704+    Decode given argv element to unicode.
705+    """
706+    # sys.argv encoding detection in Python is not trivial so utf-8 is
707+    # currently used by default and an informative error message is given if
708+    # the argument cannot be correctly decoded.
709+
710+    precondition(isinstance(s, str), s)
711+    try:
712+        return unicode(s, 'utf-8')
713+    except UnicodeEncodeError:
714+        raise usageError("Argument '%s' cannot be decoded as UTF-8." % s)
715+
716+def fs_to_unicode(s):
717+    """
718+    Decode a filename (or a directory name) to unicode using the same encoding
719+    as the filesystem.
720+    """
721+    # Filename encoding detection is a little bit better thanks to
722+    # getfilesystemencoding() in the sys module. However, filenames can be
723+    # encoded using another encoding than the one used on the filesystem.
724+
725+    precondition(isinstance(s, str), s)
726+    encoding = sys.getfilesystemencoding()
727+    try:
728+        return unicode(s, encoding)
729+    except UnicodeDecodeError:
730+        raise usage.UsageError("Filename '%s' cannot be decoded using the current encoding of your filesystem (%s). Please rename this file." % (s, encoding))
731+
732+def unicode_to_fs(s):
733+    """
734+    Encode an unicode object used in file or directoy name.
735+    """
736+
737+    precondition(isinstance(s, unicode), s)
738+    encoding = sys.getfilesystemencoding()
739+    try:
740+        return s.encode(encoding)
741+    except UnicodeEncodeError:
742+        raise usage.UsageError("Filename '%s' cannot be encoded using the current encoding of your filesystem (%s). Please configure your locale correctly or rename this file." % (s, encoding))
743+
744+def unicode_to_url(s):
745+    """
746+    Encode an unicode object used in an URL.
747+    """
748+    # According to RFC 2718, non-ascii characters in url's must be UTF-8 encoded.
749+
750+    precondition(isinstance(s, unicode), s)
751+    return s.encode('utf-8')
752