diff -rN -u old-unicode/src/allmydata/scripts/tahoe_backup.py new-unicode/src/allmydata/scripts/tahoe_backup.py --- old-unicode/src/allmydata/scripts/tahoe_backup.py 2009-04-08 22:55:52.000000000 -0600 +++ new-unicode/src/allmydata/scripts/tahoe_backup.py 2009-04-08 22:55:55.000000000 -0600 @@ -4,11 +4,15 @@ import urllib import simplejson import datetime +import sys from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS from allmydata.scripts.common_http import do_http from allmydata import uri from allmydata.util import time_format from allmydata.scripts import backupdb +from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs +from allmydata.util.assertutil import precondition +from twisted.python import usage class HTTPError(Exception): pass @@ -248,6 +252,7 @@ print >>self.options.stdout, msg def process(self, localpath, olddircap): + precondition(isinstance(localpath, unicode), localpath) # returns newdircap self.verboseprint("processing %s, olddircap %s" % (localpath, olddircap)) @@ -256,7 +261,8 @@ olddircontents = self.readdir(olddircap) newdircontents = {} # childname -> (type, rocap, metadata) - for child in self.options.filter_listdir(os.listdir(localpath)): + for child in self.options.filter_listdir(os.listdir(unicode_to_fs(localpath))): + child = fs_to_unicode(child) childpath = os.path.join(localpath, child) if os.path.isdir(childpath): metadata = get_local_metadata(childpath) @@ -342,6 +348,8 @@ return contents def upload(self, childpath): + precondition(isinstance(childpath, unicode), childpath) + #self.verboseprint("uploading %s.." % childpath) metadata = get_local_metadata(childpath) @@ -350,7 +358,7 @@ if must_upload: self.verboseprint("uploading %s.." % childpath) - infileobj = open(os.path.expanduser(childpath), "rb") + infileobj = open(unicode_to_fs(os.path.expanduser(childpath)), "rb") url = self.options['node-url'] + "uri" resp = do_http("PUT", url, infileobj) if resp.status not in (200, 201): diff -rN -u old-unicode/src/allmydata/scripts/tahoe_cp.py new-unicode/src/allmydata/scripts/tahoe_cp.py --- old-unicode/src/allmydata/scripts/tahoe_cp.py 2009-04-08 22:55:53.000000000 -0600 +++ new-unicode/src/allmydata/scripts/tahoe_cp.py 2009-04-08 22:55:55.000000000 -0600 @@ -4,9 +4,13 @@ import simplejson from cStringIO import StringIO from twisted.python.failure import Failure +import sys from allmydata.scripts.common import get_alias, escape_path, DefaultAliasMarker from allmydata.scripts.common_http import do_http from allmydata import uri +from twisted.python import usage +from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs +from allmydata.util.assertutil import precondition def ascii_or_none(s): if s is None: @@ -69,6 +73,7 @@ class LocalFileSource: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def need_to_copy_bytes(self): @@ -79,6 +84,7 @@ class LocalFileTarget: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def put_file(self, inf): outf = open(self.pathname, "wb") @@ -91,6 +97,7 @@ class LocalMissingTarget: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def put_file(self, inf): @@ -104,6 +111,8 @@ class LocalDirectorySource: def __init__(self, progressfunc, pathname): + precondition(isinstance(pathname, unicode), pathname) + self.progressfunc = progressfunc self.pathname = pathname self.children = None @@ -112,8 +121,9 @@ if self.children is not None: return self.children = {} - children = os.listdir(self.pathname) + children = os.listdir(unicode_to_fs(self.pathname)) for i,n in enumerate(children): + n = fs_to_unicode(n) self.progressfunc("examining %d of %d" % (i, len(children))) pn = os.path.join(self.pathname, n) if os.path.isdir(pn): @@ -129,6 +139,8 @@ class LocalDirectoryTarget: def __init__(self, progressfunc, pathname): + precondition(isinstance(pathname, unicode), pathname) + self.progressfunc = progressfunc self.pathname = pathname self.children = None @@ -137,8 +149,9 @@ if self.children is not None: return self.children = {} - children = os.listdir(self.pathname) + children = os.listdir(unicode_to_fs(self.pathname)) for i,n in enumerate(children): + n = fs_to_unicode(n) self.progressfunc("examining %d of %d" % (i, len(children))) pn = os.path.join(self.pathname, n) if os.path.isdir(pn): @@ -160,8 +173,9 @@ return LocalDirectoryTarget(self.progressfunc, pathname) def put_file(self, name, inf): + precondition(isinstance(name, unicode), name) pathname = os.path.join(self.pathname, name) - outf = open(pathname, "wb") + outf = open(unicode_to_fs(pathname), "wb") while True: data = inf.read(32768) if not data: diff -rN -u old-unicode/src/allmydata/scripts/tahoe_ls.py new-unicode/src/allmydata/scripts/tahoe_ls.py --- old-unicode/src/allmydata/scripts/tahoe_ls.py 2009-04-08 22:55:53.000000000 -0600 +++ new-unicode/src/allmydata/scripts/tahoe_ls.py 2009-04-08 22:55:55.000000000 -0600 @@ -86,9 +86,9 @@ elif childtype == "filenode": t0 = "-" size = str(child[1]['size']) - classify = "" + classify = u"" if rw_uri: - classify = "*" + classify = u"*" else: t0 = "?" size = "?" diff -rN -u old-unicode/src/allmydata/test/test_cli.py new-unicode/src/allmydata/test/test_cli.py --- old-unicode/src/allmydata/test/test_cli.py 2009-04-08 22:55:53.000000000 -0600 +++ new-unicode/src/allmydata/test/test_cli.py 2009-04-08 22:55:56.000000000 -0600 @@ -1,5 +1,6 @@ # coding=utf-8 +import sys import os.path from twisted.trial import unittest from cStringIO import StringIO @@ -518,6 +519,41 @@ self._test_webopen(["two:"], self.two_url) d.addCallback(_test_urls) + d.addCallback(lambda res: self.do_cli("create-alias", "études")) + def _check_create_unicode((rc,stdout,stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + # If stdout only supports ascii, accentuated characters are + # being replaced by '?' + if sys.stdout.encoding == "ANSI_X3.4-1968": + self.failUnless("Alias '?tudes' created" in stdout) + else: + self.failUnless("Alias 'études' created" in stdout) + + aliases = get_aliases(self.get_clientdir()) + self.failUnless(aliases[u"études"].startswith("URI:DIR2:")) + d.addCallback(_check_create_unicode) + + d.addCallback(lambda res: self.do_cli("ls", "études:")) + def _check_ls1((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + self.failUnlessEqual(stdout, "") + d.addCallback(_check_ls1) + + d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt", + stdin="Blah blah blah")) + + d.addCallback(lambda res: self.do_cli("ls", "études:")) + def _check_ls2((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + self.failUnlessEqual(stdout, "uploaded.txt\n") + d.addCallback(_check_ls2) + return d class Put(GridTestMixin, CLITestMixin, unittest.TestCase): @@ -739,6 +775,37 @@ d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2)) return d + def test_immutable_from_file_unicode(self): + # tahoe put file.txt "à trier.txt" + self.basedir = os.path.dirname(self.mktemp()) + self.set_up_grid() + + rel_fn = os.path.join(self.basedir, "DATAFILE") + abs_fn = os.path.abspath(rel_fn) + # we make the file small enough to fit in a LIT file, for speed + DATA = "short file" + f = open(rel_fn, "w") + f.write(DATA) + f.close() + + d = self.do_cli("create-alias", "tahoe") + + d.addCallback(lambda res: + self.do_cli("put", rel_fn, "à trier.txt")) + def _uploaded((rc,stdout,stderr)): + readcap = stdout.strip() + self.failUnless(readcap.startswith("URI:LIT:")) + self.failUnless("201 Created" in stderr, stderr) + self.readcap = readcap + d.addCallback(_uploaded) + + d.addCallback(lambda res: + self.do_cli("get", "tahoe:à trier.txt")) + d.addCallback(lambda (rc,stdout,stderr): + self.failUnlessEqual(stdout, DATA)) + + return d + class List(GridTestMixin, CLITestMixin, unittest.TestCase): def test_list(self): self.basedir = "cli/List/list" @@ -795,30 +862,37 @@ def test_unicode_filename(self): self.basedir = "cli/Cp/unicode_filename" self.set_up_grid() + d = self.do_cli("create-alias", "tahoe") + + # Use unicode strings when calling os functions + if sys.getfilesystemencoding() == "ANSI_X3.4-1968": + fn1 = os.path.join(self.basedir, u"Artonwall") + else: + fn1 = os.path.join(self.basedir, u"Ärtonwall") - fn1 = os.path.join(self.basedir, "Ärtonwall") DATA1 = "unicode file content" open(fn1, "wb").write(DATA1) + d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:Ärtonwall")) + + d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall")) + d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1)) - fn2 = os.path.join(self.basedir, "Metallica") + + fn2 = os.path.join(self.basedir, u"Metallica") DATA2 = "non-unicode file content" open(fn2, "wb").write(DATA2) # Bug #534 # Assure that uploading a file whose name contains unicode character doesn't # prevent further uploads in the same directory - d = self.do_cli("create-alias", "tahoe") - d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:")) - d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:")) - - d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall")) - d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1)) + d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:")) d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica")) d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2)) + d.addCallback(lambda res: self.do_cli("ls", "tahoe:")) + return d - test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms. See issue ticket #534." def test_dangling_symlink_vs_recursion(self): if not hasattr(os, 'symlink'): @@ -837,6 +911,17 @@ dn, "tahoe:")) return d +class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase): + def test_unicode_mkdir(self): + self.basedir = os.path.dirname(self.mktemp()) + self.set_up_grid() + + d = self.do_cli("create-alias", "tahoe") + d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead")) + + return d + + class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): def writeto(self, path, data): @@ -871,6 +956,11 @@ self.writeto("parent/subdir/bar.txt", "bar\n" * 1000) self.writeto("parent/blah.txt", "blah") + if sys.getfilesystemencoding() == "ANSI_X3.4-1968": + self.writeto(u"parent/artonwall.txt", "Marmelade Jacuzzi") + else: + self.writeto(u"parent/ärtonwall.txt", "Marmelade Jacuzzi") + def do_backup(use_backupdb=True, verbose=False): cmd = ["backup"] if not have_bdb or not use_backupdb: @@ -895,8 +985,8 @@ self.failUnlessEqual(err, "") self.failUnlessEqual(rc, 0) fu, fr, dc, dr = self.count_output(out) - # foo.txt, bar.txt, blah.txt - self.failUnlessEqual(fu, 3) + # foo.txt, bar.txt, blah.txt, ärtonwall.txt + self.failUnlessEqual(fu, 4) self.failUnlessEqual(fr, 0) # empty, home, home/parent, home/parent/subdir self.failUnlessEqual(dc, 4) @@ -945,9 +1035,9 @@ self.failUnlessEqual(rc, 0) if have_bdb: fu, fr, dc, dr = self.count_output(out) - # foo.txt, bar.txt, blah.txt + # foo.txt, bar.txt, blah.txt, ärtonwall.txt self.failUnlessEqual(fu, 0) - self.failUnlessEqual(fr, 3) + self.failUnlessEqual(fr, 4) # empty, home, home/parent, home/parent/subdir self.failUnlessEqual(dc, 0) self.failUnlessEqual(dr, 4) @@ -975,9 +1065,9 @@ self.failUnlessEqual(rc, 0) fu, fr, dc, dr = self.count_output(out) fchecked, dchecked, dread = self.count_output2(out) - self.failUnlessEqual(fchecked, 3) + self.failUnlessEqual(fchecked, 4) self.failUnlessEqual(fu, 0) - self.failUnlessEqual(fr, 3) + self.failUnlessEqual(fr, 4) # TODO: backupdb doesn't do dirs yet; when it does, this will # change to dchecked=4, and maybe dread=0 self.failUnlessEqual(dchecked, 0) @@ -1023,8 +1113,8 @@ fu, fr, dc, dr = self.count_output(out) # new foo.txt, surprise file, subfile, empty self.failUnlessEqual(fu, 4) - # old bar.txt - self.failUnlessEqual(fr, 1) + # old bar.txt, ärtonwall.txt + self.failUnlessEqual(fr, 2) # home, parent, subdir, blah.txt, surprisedir self.failUnlessEqual(dc, 5) self.failUnlessEqual(dr, 0) @@ -1063,7 +1153,7 @@ self.failUnlessEqual(err, "") self.failUnlessEqual(rc, 0) fu, fr, dc, dr = self.count_output(out) - self.failUnlessEqual(fu, 5) + self.failUnlessEqual(fu, 6) self.failUnlessEqual(fr, 0) self.failUnlessEqual(dc, 0) self.failUnlessEqual(dr, 5) diff -rN -u old-unicode/src/allmydata/util/stringutils.py new-unicode/src/allmydata/util/stringutils.py --- old-unicode/src/allmydata/util/stringutils.py 1969-12-31 17:00:00.000000000 -0700 +++ new-unicode/src/allmydata/util/stringutils.py 2009-04-08 22:55:56.000000000 -0600 @@ -0,0 +1,39 @@ +""" +Functions used to convert inputs from whatever encoding used in the system to +unicode and back. + +TODO: + * Accept two cli arguments --argv-encoding and --filesystem-encoding +""" + +import sys +from allmydata.util.assertutil import precondition +from twisted.python import usage + +def fs_to_unicode(s): + """ + Decode a filename (or a directory name) to unicode using the same encoding + as the filesystem. + """ + # Filename encoding detection is a little bit better thanks to + # getfilesystemencoding() in the sys module. However, filenames can be + # encoded using another encoding than the one used on the filesystem. + + precondition(isinstance(s, str), s) + encoding = sys.getfilesystemencoding() + try: + return unicode(s, encoding) + except UnicodeDecodeError: + raise usage.UsageError("Filename '%s' cannot be decoded using the current encoding of your filesystem (%s). Please rename this file." % (s, encoding)) + +def unicode_to_fs(s): + """ + Encode an unicode object used in file or directoy name. + """ + + precondition(isinstance(s, unicode), s) + encoding = sys.getfilesystemencoding() + try: + return s.encode(encoding) + except UnicodeEncodeError: + raise usage.UsageError("Filename '%s' cannot be encoded using the current encoding of your filesystem (%s). Please configure your locale correctly or rename this file." % (s, encoding))