Ticket #534: unicode-v3-minus-the-stdout-and-aliases-and-argv-and-url-parts.patch.txt

File unicode-v3-minus-the-stdout-and-aliases-and-argv-and-url-parts.patch.txt, 16.5 KB (added by zooko, at 2009-04-09T04:56:05Z)
Line 
1diff -rN -u old-unicode/src/allmydata/scripts/tahoe_backup.py new-unicode/src/allmydata/scripts/tahoe_backup.py
2--- old-unicode/src/allmydata/scripts/tahoe_backup.py   2009-04-08 22:55:52.000000000 -0600
3+++ new-unicode/src/allmydata/scripts/tahoe_backup.py   2009-04-08 22:55:55.000000000 -0600
4@@ -4,11 +4,15 @@
5 import urllib
6 import simplejson
7 import datetime
8+import sys
9 from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS
10 from allmydata.scripts.common_http import do_http
11 from allmydata import uri
12 from allmydata.util import time_format
13 from allmydata.scripts import backupdb
14+from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs
15+from allmydata.util.assertutil import precondition
16+from twisted.python import usage
17 
18 class HTTPError(Exception):
19     pass
20@@ -248,6 +252,7 @@
21             print >>self.options.stdout, msg
22 
23     def process(self, localpath, olddircap):
24+        precondition(isinstance(localpath, unicode), localpath)
25         # returns newdircap
26 
27         self.verboseprint("processing %s, olddircap %s" % (localpath, olddircap))
28@@ -256,7 +261,8 @@
29             olddircontents = self.readdir(olddircap)
30 
31         newdircontents = {} # childname -> (type, rocap, metadata)
32-        for child in self.options.filter_listdir(os.listdir(localpath)):
33+        for child in self.options.filter_listdir(os.listdir(unicode_to_fs(localpath))):
34+            child = fs_to_unicode(child)
35             childpath = os.path.join(localpath, child)
36             if os.path.isdir(childpath):
37                 metadata = get_local_metadata(childpath)
38@@ -342,6 +348,8 @@
39         return contents
40 
41     def upload(self, childpath):
42+        precondition(isinstance(childpath, unicode), childpath)
43+
44         #self.verboseprint("uploading %s.." % childpath)
45         metadata = get_local_metadata(childpath)
46 
47@@ -350,7 +358,7 @@
48 
49         if must_upload:
50             self.verboseprint("uploading %s.." % childpath)
51-            infileobj = open(os.path.expanduser(childpath), "rb")
52+            infileobj = open(unicode_to_fs(os.path.expanduser(childpath)), "rb")
53             url = self.options['node-url'] + "uri"
54             resp = do_http("PUT", url, infileobj)
55             if resp.status not in (200, 201):
56diff -rN -u old-unicode/src/allmydata/scripts/tahoe_cp.py new-unicode/src/allmydata/scripts/tahoe_cp.py
57--- old-unicode/src/allmydata/scripts/tahoe_cp.py       2009-04-08 22:55:53.000000000 -0600
58+++ new-unicode/src/allmydata/scripts/tahoe_cp.py       2009-04-08 22:55:55.000000000 -0600
59@@ -4,9 +4,13 @@
60 import simplejson
61 from cStringIO import StringIO
62 from twisted.python.failure import Failure
63+import sys
64 from allmydata.scripts.common import get_alias, escape_path, DefaultAliasMarker
65 from allmydata.scripts.common_http import do_http
66 from allmydata import uri
67+from twisted.python import usage
68+from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs
69+from allmydata.util.assertutil import precondition
70 
71 def ascii_or_none(s):
72     if s is None:
73@@ -69,6 +73,7 @@
74 
75 class LocalFileSource:
76     def __init__(self, pathname):
77+        precondition(isinstance(pathname, unicode), pathname)
78         self.pathname = pathname
79 
80     def need_to_copy_bytes(self):
81@@ -79,6 +84,7 @@
82 
83 class LocalFileTarget:
84     def __init__(self, pathname):
85+        precondition(isinstance(pathname, unicode), pathname)
86         self.pathname = pathname
87     def put_file(self, inf):
88         outf = open(self.pathname, "wb")
89@@ -91,6 +97,7 @@
90 
91 class LocalMissingTarget:
92     def __init__(self, pathname):
93+        precondition(isinstance(pathname, unicode), pathname)
94         self.pathname = pathname
95 
96     def put_file(self, inf):
97@@ -104,6 +111,8 @@
98 
99 class LocalDirectorySource:
100     def __init__(self, progressfunc, pathname):
101+        precondition(isinstance(pathname, unicode), pathname)
102+
103         self.progressfunc = progressfunc
104         self.pathname = pathname
105         self.children = None
106@@ -112,8 +121,9 @@
107         if self.children is not None:
108             return
109         self.children = {}
110-        children = os.listdir(self.pathname)
111+        children = os.listdir(unicode_to_fs(self.pathname))
112         for i,n in enumerate(children):
113+            n = fs_to_unicode(n)
114             self.progressfunc("examining %d of %d" % (i, len(children)))
115             pn = os.path.join(self.pathname, n)
116             if os.path.isdir(pn):
117@@ -129,6 +139,8 @@
118 
119 class LocalDirectoryTarget:
120     def __init__(self, progressfunc, pathname):
121+        precondition(isinstance(pathname, unicode), pathname)
122+
123         self.progressfunc = progressfunc
124         self.pathname = pathname
125         self.children = None
126@@ -137,8 +149,9 @@
127         if self.children is not None:
128             return
129         self.children = {}
130-        children = os.listdir(self.pathname)
131+        children = os.listdir(unicode_to_fs(self.pathname))
132         for i,n in enumerate(children):
133+            n = fs_to_unicode(n)
134             self.progressfunc("examining %d of %d" % (i, len(children)))
135             pn = os.path.join(self.pathname, n)
136             if os.path.isdir(pn):
137@@ -160,8 +173,9 @@
138         return LocalDirectoryTarget(self.progressfunc, pathname)
139 
140     def put_file(self, name, inf):
141+        precondition(isinstance(name, unicode), name)
142         pathname = os.path.join(self.pathname, name)
143-        outf = open(pathname, "wb")
144+        outf = open(unicode_to_fs(pathname), "wb")
145         while True:
146             data = inf.read(32768)
147             if not data:
148diff -rN -u old-unicode/src/allmydata/scripts/tahoe_ls.py new-unicode/src/allmydata/scripts/tahoe_ls.py
149--- old-unicode/src/allmydata/scripts/tahoe_ls.py       2009-04-08 22:55:53.000000000 -0600
150+++ new-unicode/src/allmydata/scripts/tahoe_ls.py       2009-04-08 22:55:55.000000000 -0600
151@@ -86,9 +86,9 @@
152         elif childtype == "filenode":
153             t0 = "-"
154             size = str(child[1]['size'])
155-            classify = ""
156+            classify = u""
157             if rw_uri:
158-                classify = "*"
159+                classify = u"*"
160         else:
161             t0 = "?"
162             size = "?"
163diff -rN -u old-unicode/src/allmydata/test/test_cli.py new-unicode/src/allmydata/test/test_cli.py
164--- old-unicode/src/allmydata/test/test_cli.py  2009-04-08 22:55:53.000000000 -0600
165+++ new-unicode/src/allmydata/test/test_cli.py  2009-04-08 22:55:56.000000000 -0600
166@@ -1,5 +1,6 @@
167 # coding=utf-8
168 
169+import sys
170 import os.path
171 from twisted.trial import unittest
172 from cStringIO import StringIO
173@@ -518,6 +519,41 @@
174             self._test_webopen(["two:"], self.two_url)
175         d.addCallback(_test_urls)
176 
177+        d.addCallback(lambda res: self.do_cli("create-alias", "études"))
178+        def _check_create_unicode((rc,stdout,stderr)):
179+            self.failUnlessEqual(rc, 0)
180+            self.failIf(stderr)
181+
182+            # If stdout only supports ascii, accentuated characters are
183+            # being replaced by '?'
184+            if sys.stdout.encoding == "ANSI_X3.4-1968":
185+                self.failUnless("Alias '?tudes' created" in stdout)
186+            else:
187+                self.failUnless("Alias 'études' created" in stdout)
188+
189+            aliases = get_aliases(self.get_clientdir())
190+            self.failUnless(aliases[u"études"].startswith("URI:DIR2:"))
191+        d.addCallback(_check_create_unicode)
192+
193+        d.addCallback(lambda res: self.do_cli("ls", "études:"))
194+        def _check_ls1((rc, stdout, stderr)):
195+            self.failUnlessEqual(rc, 0)
196+            self.failIf(stderr)
197+
198+            self.failUnlessEqual(stdout, "")
199+        d.addCallback(_check_ls1)
200+
201+        d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt",
202+          stdin="Blah blah blah"))
203+
204+        d.addCallback(lambda res: self.do_cli("ls", "études:"))
205+        def _check_ls2((rc, stdout, stderr)):
206+            self.failUnlessEqual(rc, 0)
207+            self.failIf(stderr)
208+
209+            self.failUnlessEqual(stdout, "uploaded.txt\n")
210+        d.addCallback(_check_ls2)
211+
212         return d
213 
214 class Put(GridTestMixin, CLITestMixin, unittest.TestCase):
215@@ -739,6 +775,37 @@
216         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
217         return d
218 
219+    def test_immutable_from_file_unicode(self):
220+        # tahoe put file.txt "à trier.txt"
221+        self.basedir = os.path.dirname(self.mktemp())
222+        self.set_up_grid()
223+
224+        rel_fn = os.path.join(self.basedir, "DATAFILE")
225+        abs_fn = os.path.abspath(rel_fn)
226+        # we make the file small enough to fit in a LIT file, for speed
227+        DATA = "short file"
228+        f = open(rel_fn, "w")
229+        f.write(DATA)
230+        f.close()
231+
232+        d = self.do_cli("create-alias", "tahoe")
233+
234+        d.addCallback(lambda res:
235+                      self.do_cli("put", rel_fn, "à trier.txt"))
236+        def _uploaded((rc,stdout,stderr)):
237+            readcap = stdout.strip()
238+            self.failUnless(readcap.startswith("URI:LIT:"))
239+            self.failUnless("201 Created" in stderr, stderr)
240+            self.readcap = readcap
241+        d.addCallback(_uploaded)
242+
243+        d.addCallback(lambda res:
244+                      self.do_cli("get", "tahoe:à trier.txt"))
245+        d.addCallback(lambda (rc,stdout,stderr):
246+                      self.failUnlessEqual(stdout, DATA))
247+
248+        return d
249+
250 class List(GridTestMixin, CLITestMixin, unittest.TestCase):
251     def test_list(self):
252         self.basedir = "cli/List/list"
253@@ -795,30 +862,37 @@
254     def test_unicode_filename(self):
255         self.basedir = "cli/Cp/unicode_filename"
256         self.set_up_grid()
257+        d = self.do_cli("create-alias", "tahoe")
258+
259+        # Use unicode strings when calling os functions
260+        if sys.getfilesystemencoding() == "ANSI_X3.4-1968":
261+            fn1 = os.path.join(self.basedir, u"Artonwall")
262+        else:
263+            fn1 = os.path.join(self.basedir, u"Ärtonwall")
264 
265-        fn1 = os.path.join(self.basedir, "Ärtonwall")
266         DATA1 = "unicode file content"
267         open(fn1, "wb").write(DATA1)
268+        d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:Ärtonwall"))
269+
270+        d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
271+        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
272 
273-        fn2 = os.path.join(self.basedir, "Metallica")
274+
275+        fn2 = os.path.join(self.basedir, u"Metallica")
276         DATA2 = "non-unicode file content"
277         open(fn2, "wb").write(DATA2)
278 
279         # Bug #534
280         # Assure that uploading a file whose name contains unicode character doesn't
281         # prevent further uploads in the same directory
282-        d = self.do_cli("create-alias", "tahoe")
283-        d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:"))
284-        d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:"))
285-
286-        d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
287-        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
288+        d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:"))
289 
290         d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica"))
291         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
292 
293+        d.addCallback(lambda res: self.do_cli("ls", "tahoe:"))
294+
295         return d
296-    test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms.  See issue ticket #534."
297 
298     def test_dangling_symlink_vs_recursion(self):
299         if not hasattr(os, 'symlink'):
300@@ -837,6 +911,17 @@
301                                               dn, "tahoe:"))
302         return d
303 
304+class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase):
305+    def test_unicode_mkdir(self):
306+        self.basedir = os.path.dirname(self.mktemp())
307+        self.set_up_grid()
308+
309+        d = self.do_cli("create-alias", "tahoe")
310+        d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead"))
311+
312+        return d
313+
314+
315 class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
316 
317     def writeto(self, path, data):
318@@ -871,6 +956,11 @@
319         self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
320         self.writeto("parent/blah.txt", "blah")
321 
322+        if sys.getfilesystemencoding() == "ANSI_X3.4-1968":
323+            self.writeto(u"parent/artonwall.txt", "Marmelade Jacuzzi")
324+        else:
325+            self.writeto(u"parent/ärtonwall.txt", "Marmelade Jacuzzi")
326+
327         def do_backup(use_backupdb=True, verbose=False):
328             cmd = ["backup"]
329             if not have_bdb or not use_backupdb:
330@@ -895,8 +985,8 @@
331             self.failUnlessEqual(err, "")
332             self.failUnlessEqual(rc, 0)
333             fu, fr, dc, dr = self.count_output(out)
334-            # foo.txt, bar.txt, blah.txt
335-            self.failUnlessEqual(fu, 3)
336+            # foo.txt, bar.txt, blah.txt, ärtonwall.txt
337+            self.failUnlessEqual(fu, 4)
338             self.failUnlessEqual(fr, 0)
339             # empty, home, home/parent, home/parent/subdir
340             self.failUnlessEqual(dc, 4)
341@@ -945,9 +1035,9 @@
342             self.failUnlessEqual(rc, 0)
343             if have_bdb:
344                 fu, fr, dc, dr = self.count_output(out)
345-                # foo.txt, bar.txt, blah.txt
346+                # foo.txt, bar.txt, blah.txt, ärtonwall.txt
347                 self.failUnlessEqual(fu, 0)
348-                self.failUnlessEqual(fr, 3)
349+                self.failUnlessEqual(fr, 4)
350                 # empty, home, home/parent, home/parent/subdir
351                 self.failUnlessEqual(dc, 0)
352                 self.failUnlessEqual(dr, 4)
353@@ -975,9 +1065,9 @@
354                 self.failUnlessEqual(rc, 0)
355                 fu, fr, dc, dr = self.count_output(out)
356                 fchecked, dchecked, dread = self.count_output2(out)
357-                self.failUnlessEqual(fchecked, 3)
358+                self.failUnlessEqual(fchecked, 4)
359                 self.failUnlessEqual(fu, 0)
360-                self.failUnlessEqual(fr, 3)
361+                self.failUnlessEqual(fr, 4)
362                 # TODO: backupdb doesn't do dirs yet; when it does, this will
363                 # change to dchecked=4, and maybe dread=0
364                 self.failUnlessEqual(dchecked, 0)
365@@ -1023,8 +1113,8 @@
366                 fu, fr, dc, dr = self.count_output(out)
367                 # new foo.txt, surprise file, subfile, empty
368                 self.failUnlessEqual(fu, 4)
369-                # old bar.txt
370-                self.failUnlessEqual(fr, 1)
371+                # old bar.txt, ärtonwall.txt
372+                self.failUnlessEqual(fr, 2)
373                 # home, parent, subdir, blah.txt, surprisedir
374                 self.failUnlessEqual(dc, 5)
375                 self.failUnlessEqual(dr, 0)
376@@ -1063,7 +1153,7 @@
377             self.failUnlessEqual(err, "")
378             self.failUnlessEqual(rc, 0)
379             fu, fr, dc, dr = self.count_output(out)
380-            self.failUnlessEqual(fu, 5)
381+            self.failUnlessEqual(fu, 6)
382             self.failUnlessEqual(fr, 0)
383             self.failUnlessEqual(dc, 0)
384             self.failUnlessEqual(dr, 5)
385diff -rN -u old-unicode/src/allmydata/util/stringutils.py new-unicode/src/allmydata/util/stringutils.py
386--- old-unicode/src/allmydata/util/stringutils.py       1969-12-31 17:00:00.000000000 -0700
387+++ new-unicode/src/allmydata/util/stringutils.py       2009-04-08 22:55:56.000000000 -0600
388@@ -0,0 +1,39 @@
389+"""
390+Functions used to convert inputs from whatever encoding used in the system to
391+unicode and back.
392+
393+TODO:
394+  * Accept two cli arguments --argv-encoding and --filesystem-encoding
395+"""
396+
397+import sys
398+from allmydata.util.assertutil import precondition
399+from twisted.python import usage
400+
401+def fs_to_unicode(s):
402+    """
403+    Decode a filename (or a directory name) to unicode using the same encoding
404+    as the filesystem.
405+    """
406+    # Filename encoding detection is a little bit better thanks to
407+    # getfilesystemencoding() in the sys module. However, filenames can be
408+    # encoded using another encoding than the one used on the filesystem.
409+
410+    precondition(isinstance(s, str), s)
411+    encoding = sys.getfilesystemencoding()
412+    try:
413+        return unicode(s, encoding)
414+    except UnicodeDecodeError:
415+        raise usage.UsageError("Filename '%s' cannot be decoded using the current encoding of your filesystem (%s). Please rename this file." % (s, encoding))
416+
417+def unicode_to_fs(s):
418+    """
419+    Encode an unicode object used in file or directoy name.
420+    """
421+
422+    precondition(isinstance(s, unicode), s)
423+    encoding = sys.getfilesystemencoding()
424+    try:
425+        return s.encode(encoding)
426+    except UnicodeEncodeError:
427+        raise usage.UsageError("Filename '%s' cannot be encoded using the current encoding of your filesystem (%s). Please configure your locale correctly or rename this file." % (s, encoding))
428