Ticket #534: unicode-v3-minus-the-stdout-parts.patch.txt

File unicode-v3-minus-the-stdout-parts.patch.txt, 30.7 KB (added by zooko, at 2009-04-09T03:59:02Z)
Line 
1diff -rN -u old-unicode/docs/frontends/CLI.txt new-unicode/docs/frontends/CLI.txt
2--- old-unicode/docs/frontends/CLI.txt  2009-04-08 21:57:57.000000000 -0600
3+++ new-unicode/docs/frontends/CLI.txt  2009-04-08 21:58:07.000000000 -0600
4@@ -91,9 +91,21 @@
5 These commands also use a table of "aliases" to figure out which directory
6 they ought to use a starting point. This is explained in more detail below.
7 
8-In Tahoe v1.3.0, passing non-ascii characters to the cli is not guaranteed to
9-work, although it might work on your platform, especially if your platform
10-uses utf-8 encoding.
11+As of Tahoe v1.3.1, filenames containing non-ascii characters are
12+supported on the commande line if your terminal is correctly configured
13+for UTF-8 support. This is usually the case on moderns GNU/Linux
14+distributions.
15+
16+If your terminal doesn't support UTF-8, you will still be able to list
17+directories but non-ascii characters will be replaced by a question mark
18+(?) on display.
19+
20+Reading from and writing to files whose name contain non-ascii
21+characters is also supported when your system correctly understand them.
22+Under Unix, this is usually handled by locale settings. If Tahoe cannot
23+correctly decode a filename, it will raise an error. In such case,
24+you'll need to correct the name of your file, possibly with help from
25+tools such as convmv.
26 
27 === Starting Directories ===
28 
29diff -rN -u old-unicode/src/allmydata/scripts/cli.py new-unicode/src/allmydata/scripts/cli.py
30--- old-unicode/src/allmydata/scripts/cli.py    2009-04-08 21:58:02.000000000 -0600
31+++ new-unicode/src/allmydata/scripts/cli.py    2009-04-08 21:58:07.000000000 -0600
32@@ -1,6 +1,7 @@
33 import os.path, re, sys, fnmatch
34 from twisted.python import usage
35 from allmydata.scripts.common import BaseOptions, get_aliases
36+from allmydata.util.stringutils import argv_to_unicode
37 
38 NODEURL_RE=re.compile("http://([^:]*)(:([1-9][0-9]*))?")
39 
40@@ -49,12 +50,12 @@
41 
42 class MakeDirectoryOptions(VDriveOptions):
43     def parseArgs(self, where=""):
44-        self.where = where
45+        self.where = argv_to_unicode(where)
46     longdesc = """Create a new directory, either unlinked or as a subdirectory."""
47 
48 class AddAliasOptions(VDriveOptions):
49     def parseArgs(self, alias, cap):
50-        self.alias = alias
51+        self.alias = argv_to_unicode(alias)
52         self.cap = cap
53 
54     def getSynopsis(self):
55@@ -64,7 +65,7 @@
56 
57 class CreateAliasOptions(VDriveOptions):
58     def parseArgs(self, alias):
59-        self.alias = alias
60+        self.alias = argv_to_unicode(alias)
61 
62     def getSynopsis(self):
63         return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),)
64@@ -83,7 +84,7 @@
65         ("json", None, "Show the raw JSON output"),
66         ]
67     def parseArgs(self, where=""):
68-        self.where = where
69+        self.where = argv_to_unicode(where)
70 
71     longdesc = """List the contents of some portion of the virtual drive."""
72 
73@@ -94,8 +95,13 @@
74         # tahoe get FOO bar              # write to local file
75         # tahoe get tahoe:FOO bar        # same
76 
77-        self.from_file = arg1
78-        self.to_file = arg2
79+        self.from_file = argv_to_unicode(arg1)
80+
81+        if arg2:
82+            self.to_file = argv_to_unicode(arg2)
83+        else:
84+            self.to_file = None
85+
86         if self.to_file == "-":
87             self.to_file = None
88 
89@@ -131,15 +137,15 @@
90         # tahoe put bar tahoe:FOO        # same
91 
92         if arg1 is not None and arg2 is not None:
93-            self.from_file = arg1
94-            self.to_file = arg2
95+            self.from_file = argv_to_unicode(arg1)
96+            self.to_file =  argv_to_unicode(arg2)
97         elif arg1 is not None and arg2 is None:
98-            self.from_file = arg1 # might be "-"
99+            self.from_file = argv_to_unicode(arg1) # might be "-"
100             self.to_file = None
101         else:
102             self.from_file = None
103             self.to_file = None
104-        if self.from_file == "-":
105+        if self.from_file == u"-":
106             self.from_file = None
107 
108     def getSynopsis(self):
109@@ -176,28 +182,28 @@
110     def parseArgs(self, *args):
111         if len(args) < 2:
112             raise usage.UsageError("cp requires at least two arguments")
113-        self.sources = args[:-1]
114-        self.destination = args[-1]
115+        self.sources = map(argv_to_unicode, args[:-1])
116+        self.destination = argv_to_unicode(args[-1])
117 
118 class RmOptions(VDriveOptions):
119     def parseArgs(self, where):
120-        self.where = where
121+        self.where = argv_to_unicode(where)
122 
123     def getSynopsis(self):
124         return "%s rm VDRIVE_FILE" % (os.path.basename(sys.argv[0]),)
125 
126 class MvOptions(VDriveOptions):
127     def parseArgs(self, frompath, topath):
128-        self.from_file = frompath
129-        self.to_file = topath
130+        self.from_file = argv_to_unicode(frompath)
131+        self.to_file = argv_to_unicode(topath)
132 
133     def getSynopsis(self):
134         return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),)
135 
136 class LnOptions(VDriveOptions):
137     def parseArgs(self, frompath, topath):
138-        self.from_file = frompath
139-        self.to_file = topath
140+        self.from_file = argv_to_unicode(frompath)
141+        self.to_file = argv_to_unicode(topath)
142 
143     def getSynopsis(self):
144         return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),)
145@@ -221,8 +227,8 @@
146         self['exclude'] = set()
147 
148     def parseArgs(self, localdir, topath):
149-        self.from_dir = localdir
150-        self.to_dir = topath
151+        self.from_dir = argv_to_unicode(localdir)
152+        self.to_dir = argv_to_unicode(topath)
153 
154     def getSynopsis(Self):
155         return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0])
156@@ -270,7 +276,7 @@
157 
158 class WebopenOptions(VDriveOptions):
159     def parseArgs(self, where=''):
160-        self.where = where
161+        self.where = argv_to_unicode(where)
162 
163     def getSynopsis(self):
164         return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
165@@ -285,7 +291,7 @@
166         ("raw", "r", "Display raw JSON data instead of parsed"),
167         ]
168     def parseArgs(self, where=''):
169-        self.where = where
170+        self.where = argv_to_unicode(where)
171 
172     def getSynopsis(self):
173         return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
174@@ -297,7 +303,7 @@
175         ("raw", "r", "Display raw JSON data instead of parsed"),
176         ]
177     def parseArgs(self, where=''):
178-        self.where = where
179+        self.where = argv_to_unicode(where)
180 
181     def getSynopsis(self):
182         return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
183@@ -312,7 +318,7 @@
184         ("add-lease", None, "Add/renew lease on all shares"),
185         ]
186     def parseArgs(self, where=''):
187-        self.where = where
188+        self.where = argv_to_unicode(where)
189 
190     def getSynopsis(self):
191         return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
192@@ -328,7 +334,7 @@
193         ("verbose", "v", "Be noisy about what is happening."),
194         ]
195     def parseArgs(self, where=''):
196-        self.where = where
197+        self.where = argv_to_unicode(where)
198 
199     def getSynopsis(self):
200         return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
201diff -rN -u old-unicode/src/allmydata/scripts/common.py new-unicode/src/allmydata/scripts/common.py
202--- old-unicode/src/allmydata/scripts/common.py 2009-04-08 21:58:02.000000000 -0600
203+++ new-unicode/src/allmydata/scripts/common.py 2009-04-08 21:58:07.000000000 -0600
204@@ -1,7 +1,8 @@
205 
206 import os, sys, urllib
207+import codecs
208 from twisted.python import usage
209-
210+from allmydata.util.stringutils import unicode_to_url
211 
212 class BaseOptions:
213     # unit tests can override these to point at StringIO instances
214@@ -100,14 +101,14 @@
215     except EnvironmentError:
216         pass
217     try:
218-        f = open(aliasfile, "r")
219+        f = codecs.open(aliasfile, "r", "utf-8")
220         for line in f.readlines():
221             line = line.strip()
222             if line.startswith("#") or not line:
223                 continue
224             name, cap = line.split(":", 1)
225             # normalize it: remove http: prefix, urldecode
226-            cap = cap.strip()
227+            cap = cap.strip().encode('ascii')
228             aliases[name] = uri.from_string_dirnode(cap).to_string()
229     except EnvironmentError:
230         pass
231@@ -163,4 +164,4 @@
232 
233 def escape_path(path):
234     segments = path.split("/")
235-    return "/".join([urllib.quote(s) for s in segments])
236+    return "/".join([urllib.quote(unicode_to_url(s)) for s in segments])
237diff -rN -u old-unicode/src/allmydata/scripts/tahoe_add_alias.py new-unicode/src/allmydata/scripts/tahoe_add_alias.py
238--- old-unicode/src/allmydata/scripts/tahoe_add_alias.py        2009-04-08 21:58:02.000000000 -0600
239+++ new-unicode/src/allmydata/scripts/tahoe_add_alias.py        2009-04-08 21:58:07.000000000 -0600
240@@ -1,8 +1,11 @@
241 
242 import os.path
243+import codecs
244+import sys
245 from allmydata import uri
246 from allmydata.scripts.common_http import do_http, check_http_error
247 from allmydata.scripts.common import get_aliases
248+from allmydata.util.stringutils import argv_to_unicode
249 
250 def add_alias(options):
251     nodedir = options['node-directory']
252@@ -28,6 +31,7 @@
253 def create_alias(options):
254     # mkdir+add_alias
255     nodedir = options['node-directory']
256+    assert isinstance(options.alias, unicode), options.alias
257     alias = options.alias
258     stdout = options.stdout
259     stderr = options.stderr
260@@ -52,10 +56,11 @@
261     new_uri = resp.read().strip()
262 
263     # probably check for others..
264-    f = open(aliasfile, "a")
265-    f.write("%s: %s\n" % (alias, new_uri))
266+    f = codecs.open(aliasfile, "a", "utf-8")
267+    f.write(u"%s: %s\n" % (alias, new_uri))
268     f.close()
269-    print >>stdout, "Alias '%s' created" % (alias,)
270+    assert isinstance(alias, unicode), alias
271+    print >>stdout, u"Alias '"+alias+u"' created"
272     return 0
273 
274 def list_aliases(options):
275diff -rN -u old-unicode/src/allmydata/scripts/tahoe_backup.py new-unicode/src/allmydata/scripts/tahoe_backup.py
276--- old-unicode/src/allmydata/scripts/tahoe_backup.py   2009-04-08 21:58:02.000000000 -0600
277+++ new-unicode/src/allmydata/scripts/tahoe_backup.py   2009-04-08 21:58:07.000000000 -0600
278@@ -4,11 +4,15 @@
279 import urllib
280 import simplejson
281 import datetime
282+import sys
283 from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS
284 from allmydata.scripts.common_http import do_http
285 from allmydata import uri
286 from allmydata.util import time_format
287 from allmydata.scripts import backupdb
288+from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs
289+from allmydata.util.assertutil import precondition
290+from twisted.python import usage
291 
292 class HTTPError(Exception):
293     pass
294@@ -248,6 +252,7 @@
295             print >>self.options.stdout, msg
296 
297     def process(self, localpath, olddircap):
298+        precondition(isinstance(localpath, unicode), localpath)
299         # returns newdircap
300 
301         self.verboseprint("processing %s, olddircap %s" % (localpath, olddircap))
302@@ -256,7 +261,8 @@
303             olddircontents = self.readdir(olddircap)
304 
305         newdircontents = {} # childname -> (type, rocap, metadata)
306-        for child in self.options.filter_listdir(os.listdir(localpath)):
307+        for child in self.options.filter_listdir(os.listdir(unicode_to_fs(localpath))):
308+            child = fs_to_unicode(child)
309             childpath = os.path.join(localpath, child)
310             if os.path.isdir(childpath):
311                 metadata = get_local_metadata(childpath)
312@@ -342,6 +348,8 @@
313         return contents
314 
315     def upload(self, childpath):
316+        precondition(isinstance(childpath, unicode), childpath)
317+
318         #self.verboseprint("uploading %s.." % childpath)
319         metadata = get_local_metadata(childpath)
320 
321@@ -350,7 +358,7 @@
322 
323         if must_upload:
324             self.verboseprint("uploading %s.." % childpath)
325-            infileobj = open(os.path.expanduser(childpath), "rb")
326+            infileobj = open(unicode_to_fs(os.path.expanduser(childpath)), "rb")
327             url = self.options['node-url'] + "uri"
328             resp = do_http("PUT", url, infileobj)
329             if resp.status not in (200, 201):
330diff -rN -u old-unicode/src/allmydata/scripts/tahoe_cp.py new-unicode/src/allmydata/scripts/tahoe_cp.py
331--- old-unicode/src/allmydata/scripts/tahoe_cp.py       2009-04-08 21:58:02.000000000 -0600
332+++ new-unicode/src/allmydata/scripts/tahoe_cp.py       2009-04-08 21:58:08.000000000 -0600
333@@ -4,9 +4,13 @@
334 import simplejson
335 from cStringIO import StringIO
336 from twisted.python.failure import Failure
337+import sys
338 from allmydata.scripts.common import get_alias, escape_path, DefaultAliasMarker
339 from allmydata.scripts.common_http import do_http
340 from allmydata import uri
341+from twisted.python import usage
342+from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs, unicode_to_url
343+from allmydata.util.assertutil import precondition
344 
345 def ascii_or_none(s):
346     if s is None:
347@@ -69,6 +73,7 @@
348 
349 class LocalFileSource:
350     def __init__(self, pathname):
351+        precondition(isinstance(pathname, unicode), pathname)
352         self.pathname = pathname
353 
354     def need_to_copy_bytes(self):
355@@ -79,6 +84,7 @@
356 
357 class LocalFileTarget:
358     def __init__(self, pathname):
359+        precondition(isinstance(pathname, unicode), pathname)
360         self.pathname = pathname
361     def put_file(self, inf):
362         outf = open(self.pathname, "wb")
363@@ -91,6 +97,7 @@
364 
365 class LocalMissingTarget:
366     def __init__(self, pathname):
367+        precondition(isinstance(pathname, unicode), pathname)
368         self.pathname = pathname
369 
370     def put_file(self, inf):
371@@ -104,6 +111,8 @@
372 
373 class LocalDirectorySource:
374     def __init__(self, progressfunc, pathname):
375+        precondition(isinstance(pathname, unicode), pathname)
376+
377         self.progressfunc = progressfunc
378         self.pathname = pathname
379         self.children = None
380@@ -112,8 +121,9 @@
381         if self.children is not None:
382             return
383         self.children = {}
384-        children = os.listdir(self.pathname)
385+        children = os.listdir(unicode_to_fs(self.pathname))
386         for i,n in enumerate(children):
387+            n = fs_to_unicode(n)
388             self.progressfunc("examining %d of %d" % (i, len(children)))
389             pn = os.path.join(self.pathname, n)
390             if os.path.isdir(pn):
391@@ -129,6 +139,8 @@
392 
393 class LocalDirectoryTarget:
394     def __init__(self, progressfunc, pathname):
395+        precondition(isinstance(pathname, unicode), pathname)
396+
397         self.progressfunc = progressfunc
398         self.pathname = pathname
399         self.children = None
400@@ -137,8 +149,9 @@
401         if self.children is not None:
402             return
403         self.children = {}
404-        children = os.listdir(self.pathname)
405+        children = os.listdir(unicode_to_fs(self.pathname))
406         for i,n in enumerate(children):
407+            n = fs_to_unicode(n)
408             self.progressfunc("examining %d of %d" % (i, len(children)))
409             pn = os.path.join(self.pathname, n)
410             if os.path.isdir(pn):
411@@ -160,8 +173,9 @@
412         return LocalDirectoryTarget(self.progressfunc, pathname)
413 
414     def put_file(self, name, inf):
415+        precondition(isinstance(name, unicode), name)
416         pathname = os.path.join(self.pathname, name)
417-        outf = open(pathname, "wb")
418+        outf = open(unicode_to_fs(pathname), "wb")
419         while True:
420             data = inf.read(32768)
421             if not data:
422@@ -350,7 +364,7 @@
423                 if self.writecap:
424                     url = self.nodeurl + "/".join(["uri",
425                                                    urllib.quote(self.writecap),
426-                                                   urllib.quote(name.encode('utf-8'))])
427+                                                   urllib.quote(unicode_to_url(name))])
428                 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
429                                                       writecap, readcap, url)
430             else:
431diff -rN -u old-unicode/src/allmydata/scripts/tahoe_ls.py new-unicode/src/allmydata/scripts/tahoe_ls.py
432--- old-unicode/src/allmydata/scripts/tahoe_ls.py       2009-04-08 21:58:02.000000000 -0600
433+++ new-unicode/src/allmydata/scripts/tahoe_ls.py       2009-04-08 21:58:08.000000000 -0600
434@@ -82,17 +82,17 @@
435         if childtype == "dirnode":
436             t0 = "d"
437             size = "-"
438-            classify = "/"
439+            classify = u"/"
440         elif childtype == "filenode":
441             t0 = "-"
442             size = str(child[1]['size'])
443-            classify = ""
444+            classify = u""
445             if rw_uri:
446-                classify = "*"
447+                classify = u"*"
448         else:
449             t0 = "?"
450             size = "?"
451-            classify = "?"
452+            classify = u"?"
453         t1 = "-"
454         if ro_uri:
455             t1 = "r"
456@@ -111,7 +111,7 @@
457             line.append(size)
458             line.append(ctime_s)
459         if not options["classify"]:
460-            classify = ""
461+            classify = u""
462         line.append(name + classify)
463         if options["uri"]:
464             line.append(uri)
465@@ -135,13 +135,13 @@
466         left_justifys[0] = True
467     fmt_pieces = []
468     for i in range(len(max_widths)):
469-        piece = "%"
470+        piece = u"%"
471         if left_justifys[i]:
472-            piece += "-"
473+            piece += u"-"
474         piece += str(max_widths[i])
475-        piece += "s"
476+        piece += u"s"
477         fmt_pieces.append(piece)
478-    fmt = " ".join(fmt_pieces)
479+    fmt = u" ".join(fmt_pieces)
480     for row in rows:
481         print >>stdout, (fmt % tuple(row)).rstrip()
482 
483diff -rN -u old-unicode/src/allmydata/scripts/tahoe_manifest.py new-unicode/src/allmydata/scripts/tahoe_manifest.py
484--- old-unicode/src/allmydata/scripts/tahoe_manifest.py 2009-04-08 21:58:02.000000000 -0600
485+++ new-unicode/src/allmydata/scripts/tahoe_manifest.py 2009-04-08 21:58:08.000000000 -0600
486@@ -78,10 +78,15 @@
487                     print >>stdout, vc
488             else:
489                 try:
490-                    print >>stdout, d["cap"], "/".join(d["path"])
491+                    print >>stdout, d["cap"], u"/".join(d["path"])
492                 except UnicodeEncodeError:
493-                    print >>stdout, d["cap"], "/".join([p.encode("utf-8")
494-                                                        for p in d["path"]])
495+                    # Perhaps python and/or the local system is misconfigured
496+                    # and actually it should have used utf-8.  See ticket #534
497+                    # about the questionable practice of second-guessing
498+                    # python+system-config like this.  (And how 'utf-16le'
499+                    # might be a better second-guess on Windows.)
500+                    print >>stdout, d["cap"].encode('utf-8'),
501+                        "/".join([p.encode('utf-8') for p in d["path"]])
502 
503 def manifest(options):
504     return ManifestStreamer().run(options)
505diff -rN -u old-unicode/src/allmydata/scripts/tahoe_mkdir.py new-unicode/src/allmydata/scripts/tahoe_mkdir.py
506--- old-unicode/src/allmydata/scripts/tahoe_mkdir.py    2009-04-08 21:58:02.000000000 -0600
507+++ new-unicode/src/allmydata/scripts/tahoe_mkdir.py    2009-04-08 21:58:08.000000000 -0600
508@@ -2,6 +2,7 @@
509 import urllib
510 from allmydata.scripts.common_http import do_http, check_http_error
511 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS
512+from allmydata.util.stringutils import unicode_to_url
513 
514 def mkdir(options):
515     nodeurl = options['node-url']
516@@ -31,7 +32,7 @@
517         path = path[:-1]
518     # path (in argv) must be "/".join([s.encode("utf-8") for s in segments])
519     url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap),
520-                                           urllib.quote(path))
521+                                           urllib.quote(unicode_to_url(path)))
522     resp = do_http("POST", url)
523     check_http_error(resp, stderr)
524     new_uri = resp.read().strip()
525diff -rN -u old-unicode/src/allmydata/test/test_cli.py new-unicode/src/allmydata/test/test_cli.py
526--- old-unicode/src/allmydata/test/test_cli.py  2009-04-08 21:58:03.000000000 -0600
527+++ new-unicode/src/allmydata/test/test_cli.py  2009-04-08 21:58:08.000000000 -0600
528@@ -1,5 +1,6 @@
529 # coding=utf-8
530 
531+import sys
532 import os.path
533 from twisted.trial import unittest
534 from cStringIO import StringIO
535@@ -518,6 +519,41 @@
536             self._test_webopen(["two:"], self.two_url)
537         d.addCallback(_test_urls)
538 
539+        d.addCallback(lambda res: self.do_cli("create-alias", "études"))
540+        def _check_create_unicode((rc,stdout,stderr)):
541+            self.failUnlessEqual(rc, 0)
542+            self.failIf(stderr)
543+
544+            # If stdout only supports ascii, accentuated characters are
545+            # being replaced by '?'
546+            if sys.stdout.encoding == "ANSI_X3.4-1968":
547+                self.failUnless("Alias '?tudes' created" in stdout)
548+            else:
549+                self.failUnless("Alias 'études' created" in stdout)
550+
551+            aliases = get_aliases(self.get_clientdir())
552+            self.failUnless(aliases[u"études"].startswith("URI:DIR2:"))
553+        d.addCallback(_check_create_unicode)
554+
555+        d.addCallback(lambda res: self.do_cli("ls", "études:"))
556+        def _check_ls1((rc, stdout, stderr)):
557+            self.failUnlessEqual(rc, 0)
558+            self.failIf(stderr)
559+
560+            self.failUnlessEqual(stdout, "")
561+        d.addCallback(_check_ls1)
562+
563+        d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt",
564+          stdin="Blah blah blah"))
565+
566+        d.addCallback(lambda res: self.do_cli("ls", "études:"))
567+        def _check_ls2((rc, stdout, stderr)):
568+            self.failUnlessEqual(rc, 0)
569+            self.failIf(stderr)
570+
571+            self.failUnlessEqual(stdout, "uploaded.txt\n")
572+        d.addCallback(_check_ls2)
573+
574         return d
575 
576 class Put(GridTestMixin, CLITestMixin, unittest.TestCase):
577@@ -739,6 +775,37 @@
578         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
579         return d
580 
581+    def test_immutable_from_file_unicode(self):
582+        # tahoe put file.txt "à trier.txt"
583+        self.basedir = os.path.dirname(self.mktemp())
584+        self.set_up_grid()
585+
586+        rel_fn = os.path.join(self.basedir, "DATAFILE")
587+        abs_fn = os.path.abspath(rel_fn)
588+        # we make the file small enough to fit in a LIT file, for speed
589+        DATA = "short file"
590+        f = open(rel_fn, "w")
591+        f.write(DATA)
592+        f.close()
593+
594+        d = self.do_cli("create-alias", "tahoe")
595+
596+        d.addCallback(lambda res:
597+                      self.do_cli("put", rel_fn, "à trier.txt"))
598+        def _uploaded((rc,stdout,stderr)):
599+            readcap = stdout.strip()
600+            self.failUnless(readcap.startswith("URI:LIT:"))
601+            self.failUnless("201 Created" in stderr, stderr)
602+            self.readcap = readcap
603+        d.addCallback(_uploaded)
604+
605+        d.addCallback(lambda res:
606+                      self.do_cli("get", "tahoe:à trier.txt"))
607+        d.addCallback(lambda (rc,stdout,stderr):
608+                      self.failUnlessEqual(stdout, DATA))
609+
610+        return d
611+
612 class List(GridTestMixin, CLITestMixin, unittest.TestCase):
613     def test_list(self):
614         self.basedir = "cli/List/list"
615@@ -795,30 +862,37 @@
616     def test_unicode_filename(self):
617         self.basedir = "cli/Cp/unicode_filename"
618         self.set_up_grid()
619+        d = self.do_cli("create-alias", "tahoe")
620+
621+        # Use unicode strings when calling os functions
622+        if sys.getfilesystemencoding() == "ANSI_X3.4-1968":
623+            fn1 = os.path.join(self.basedir, u"Artonwall")
624+        else:
625+            fn1 = os.path.join(self.basedir, u"Ärtonwall")
626 
627-        fn1 = os.path.join(self.basedir, "Ärtonwall")
628         DATA1 = "unicode file content"
629         open(fn1, "wb").write(DATA1)
630+        d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:Ärtonwall"))
631+
632+        d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
633+        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
634 
635-        fn2 = os.path.join(self.basedir, "Metallica")
636+
637+        fn2 = os.path.join(self.basedir, u"Metallica")
638         DATA2 = "non-unicode file content"
639         open(fn2, "wb").write(DATA2)
640 
641         # Bug #534
642         # Assure that uploading a file whose name contains unicode character doesn't
643         # prevent further uploads in the same directory
644-        d = self.do_cli("create-alias", "tahoe")
645-        d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:"))
646-        d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:"))
647-
648-        d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
649-        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
650+        d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:"))
651 
652         d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica"))
653         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
654 
655+        d.addCallback(lambda res: self.do_cli("ls", "tahoe:"))
656+
657         return d
658-    test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms.  See issue ticket #534."
659 
660     def test_dangling_symlink_vs_recursion(self):
661         if not hasattr(os, 'symlink'):
662@@ -837,6 +911,17 @@
663                                               dn, "tahoe:"))
664         return d
665 
666+class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase):
667+    def test_unicode_mkdir(self):
668+        self.basedir = os.path.dirname(self.mktemp())
669+        self.set_up_grid()
670+
671+        d = self.do_cli("create-alias", "tahoe")
672+        d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead"))
673+
674+        return d
675+
676+
677 class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
678 
679     def writeto(self, path, data):
680@@ -871,6 +956,11 @@
681         self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
682         self.writeto("parent/blah.txt", "blah")
683 
684+        if sys.getfilesystemencoding() == "ANSI_X3.4-1968":
685+            self.writeto(u"parent/artonwall.txt", "Marmelade Jacuzzi")
686+        else:
687+            self.writeto(u"parent/ärtonwall.txt", "Marmelade Jacuzzi")
688+
689         def do_backup(use_backupdb=True, verbose=False):
690             cmd = ["backup"]
691             if not have_bdb or not use_backupdb:
692@@ -895,8 +985,8 @@
693             self.failUnlessEqual(err, "")
694             self.failUnlessEqual(rc, 0)
695             fu, fr, dc, dr = self.count_output(out)
696-            # foo.txt, bar.txt, blah.txt
697-            self.failUnlessEqual(fu, 3)
698+            # foo.txt, bar.txt, blah.txt, ärtonwall.txt
699+            self.failUnlessEqual(fu, 4)
700             self.failUnlessEqual(fr, 0)
701             # empty, home, home/parent, home/parent/subdir
702             self.failUnlessEqual(dc, 4)
703@@ -945,9 +1035,9 @@
704             self.failUnlessEqual(rc, 0)
705             if have_bdb:
706                 fu, fr, dc, dr = self.count_output(out)
707-                # foo.txt, bar.txt, blah.txt
708+                # foo.txt, bar.txt, blah.txt, ärtonwall.txt
709                 self.failUnlessEqual(fu, 0)
710-                self.failUnlessEqual(fr, 3)
711+                self.failUnlessEqual(fr, 4)
712                 # empty, home, home/parent, home/parent/subdir
713                 self.failUnlessEqual(dc, 0)
714                 self.failUnlessEqual(dr, 4)
715@@ -975,9 +1065,9 @@
716                 self.failUnlessEqual(rc, 0)
717                 fu, fr, dc, dr = self.count_output(out)
718                 fchecked, dchecked, dread = self.count_output2(out)
719-                self.failUnlessEqual(fchecked, 3)
720+                self.failUnlessEqual(fchecked, 4)
721                 self.failUnlessEqual(fu, 0)
722-                self.failUnlessEqual(fr, 3)
723+                self.failUnlessEqual(fr, 4)
724                 # TODO: backupdb doesn't do dirs yet; when it does, this will
725                 # change to dchecked=4, and maybe dread=0
726                 self.failUnlessEqual(dchecked, 0)
727@@ -1023,8 +1113,8 @@
728                 fu, fr, dc, dr = self.count_output(out)
729                 # new foo.txt, surprise file, subfile, empty
730                 self.failUnlessEqual(fu, 4)
731-                # old bar.txt
732-                self.failUnlessEqual(fr, 1)
733+                # old bar.txt, ärtonwall.txt
734+                self.failUnlessEqual(fr, 2)
735                 # home, parent, subdir, blah.txt, surprisedir
736                 self.failUnlessEqual(dc, 5)
737                 self.failUnlessEqual(dr, 0)
738@@ -1063,7 +1153,7 @@
739             self.failUnlessEqual(err, "")
740             self.failUnlessEqual(rc, 0)
741             fu, fr, dc, dr = self.count_output(out)
742-            self.failUnlessEqual(fu, 5)
743+            self.failUnlessEqual(fu, 6)
744             self.failUnlessEqual(fr, 0)
745             self.failUnlessEqual(dc, 0)
746             self.failUnlessEqual(dr, 5)
747diff -rN -u old-unicode/src/allmydata/util/stringutils.py new-unicode/src/allmydata/util/stringutils.py
748--- old-unicode/src/allmydata/util/stringutils.py       1969-12-31 17:00:00.000000000 -0700
749+++ new-unicode/src/allmydata/util/stringutils.py       2009-04-08 21:58:08.000000000 -0600
750@@ -0,0 +1,62 @@
751+"""
752+Functions used to convert inputs from whatever encoding used in the system to
753+unicode and back.
754+
755+TODO:
756+  * Accept two cli arguments --argv-encoding and --filesystem-encoding
757+"""
758+
759+import sys
760+from allmydata.util.assertutil import precondition
761+from twisted.python import usage
762+
763+def argv_to_unicode(s):
764+    """
765+    Decode given argv element to unicode.
766+    """
767+    # sys.argv encoding detection in Python is not trivial so utf-8 is
768+    # currently used by default and an informative error message is given if
769+    # the argument cannot be correctly decoded.
770+
771+    precondition(isinstance(s, str), s)
772+    try:
773+        return unicode(s, 'utf-8')
774+    except UnicodeEncodeError:
775+        raise usageError("Argument '%s' cannot be decoded as UTF-8." % s)
776+
777+def fs_to_unicode(s):
778+    """
779+    Decode a filename (or a directory name) to unicode using the same encoding
780+    as the filesystem.
781+    """
782+    # Filename encoding detection is a little bit better thanks to
783+    # getfilesystemencoding() in the sys module. However, filenames can be
784+    # encoded using another encoding than the one used on the filesystem.
785+
786+    precondition(isinstance(s, str), s)
787+    encoding = sys.getfilesystemencoding()
788+    try:
789+        return unicode(s, encoding)
790+    except UnicodeDecodeError:
791+        raise usage.UsageError("Filename '%s' cannot be decoded using the current encoding of your filesystem (%s). Please rename this file." % (s, encoding))
792+
793+def unicode_to_fs(s):
794+    """
795+    Encode an unicode object used in file or directoy name.
796+    """
797+
798+    precondition(isinstance(s, unicode), s)
799+    encoding = sys.getfilesystemencoding()
800+    try:
801+        return s.encode(encoding)
802+    except UnicodeEncodeError:
803+        raise usage.UsageError("Filename '%s' cannot be encoded using the current encoding of your filesystem (%s). Please configure your locale correctly or rename this file." % (s, encoding))
804+
805+def unicode_to_url(s):
806+    """
807+    Encode an unicode object used in an URL.
808+    """
809+    # According to RFC 2718, non-ascii characters in url's must be UTF-8 encoded.
810+
811+    precondition(isinstance(s, unicode), s)
812+    return s.encode('utf-8')
813