[tahoe-dev] Reviewers wanted !
Francois Deppierraz
francois at ctrlaltdel.ch
Fri Jun 5 13:03:40 PDT 2009
Hi Folks,
Tahoe's 1.5.0 release is approaching and IMHO support for accented
characters in filenames is a pretty important feature (see bug #534 for
details).
Because this is a somewhat large change, I need *your* help reviewing
it. As far I can tell, the proposed changes add support for the basic
functionality without compromising further enhancements or introducing
compatibility problems.
The current state of the code only tries to decode filenames according
to Python's getfilesystemencoding() without any clever handling of badly
encoded filename. In such case, an error message is displayed to the
user and tahoe exits.
Advanced heuristics such as the one which were thoroughly discussed on
this mailing-list are not yet implemented.
I've included the complete patch at the end of this mail to collect your
comments inline, but you can also download a darcs patch bundle
containing all the discrete patches from [1] for easier review.
Non-Linux users are also wholeheartedly welcome because all this was
developed under Ubuntu Linux.
Thank you very much for your time !
François
Bug #534: http://allmydata.org/trac/tahoe/ticket/534
[1]
http://allmydata.org/trac/tahoe/attachment/ticket/534/tahoe-534-bundle.dpatch
diff -rN -u old-tahoe-534/docs/frontends/CLI.txt
new-tahoe-534/docs/frontends/CLI.txt
--- old-tahoe-534/docs/frontends/CLI.txt 2009-06-05 21:14:40.000000000 +0200
+++ new-tahoe-534/docs/frontends/CLI.txt 2009-06-05 21:14:40.000000000 +0200
@@ -91,9 +91,21 @@
These commands also use a table of "aliases" to figure out which directory
they ought to use a starting point. This is explained in more detail below.
-In Tahoe v1.3.0, passing non-ascii characters to the cli is not
guaranteed to
-work, although it might work on your platform, especially if your platform
-uses utf-8 encoding.
+As of Tahoe v1.3.1, filenames containing non-ascii characters are
+supported on the commande line if your terminal is correctly configured
+for UTF-8 support. This is usually the case on moderns GNU/Linux
+distributions.
+
+If your terminal doesn't support UTF-8, you will still be able to list
+directories but non-ascii characters will be replaced by a question mark
+(?) on display.
+
+Reading from and writing to files whose name contain non-ascii
+characters is also supported when your system correctly understand them.
+Under Unix, this is usually handled by locale settings. If Tahoe cannot
+correctly decode a filename, it will raise an error. In such case,
+you'll need to correct the name of your file, possibly with help from
+tools such as convmv.
=== Starting Directories ===
diff -rN -u old-tahoe-534/src/allmydata/scripts/cli.py
new-tahoe-534/src/allmydata/scripts/cli.py
--- old-tahoe-534/src/allmydata/scripts/cli.py 2009-06-05
21:14:40.000000000 +0200
+++ new-tahoe-534/src/allmydata/scripts/cli.py 2009-06-05
21:14:40.000000000 +0200
@@ -1,6 +1,7 @@
import os.path, re, sys, fnmatch
from twisted.python import usage
from allmydata.scripts.common import BaseOptions, get_aliases
+from allmydata.util.stringutils import argv_to_unicode
NODEURL_RE=re.compile("http://([^:]*)(:([1-9][0-9]*))?")
@@ -49,12 +50,12 @@
class MakeDirectoryOptions(VDriveOptions):
def parseArgs(self, where=""):
- self.where = where
+ self.where = argv_to_unicode(where)
longdesc = """Create a new directory, either unlinked or as a
subdirectory."""
class AddAliasOptions(VDriveOptions):
def parseArgs(self, alias, cap):
- self.alias = alias
+ self.alias = argv_to_unicode(alias)
self.cap = cap
def getSynopsis(self):
@@ -64,7 +65,7 @@
class CreateAliasOptions(VDriveOptions):
def parseArgs(self, alias):
- self.alias = alias
+ self.alias = argv_to_unicode(alias)
def getSynopsis(self):
return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),)
@@ -83,7 +84,7 @@
("json", None, "Show the raw JSON output"),
]
def parseArgs(self, where=""):
- self.where = where
+ self.where = argv_to_unicode(where)
longdesc = """List the contents of some portion of the virtual
drive."""
@@ -94,8 +95,13 @@
# tahoe get FOO bar # write to local file
# tahoe get tahoe:FOO bar # same
- self.from_file = arg1
- self.to_file = arg2
+ self.from_file = argv_to_unicode(arg1)
+
+ if arg2:
+ self.to_file = argv_to_unicode(arg2)
+ else:
+ self.to_file = None
+
if self.to_file == "-":
self.to_file = None
@@ -131,15 +137,15 @@
# tahoe put bar tahoe:FOO # same
if arg1 is not None and arg2 is not None:
- self.from_file = arg1
- self.to_file = arg2
+ self.from_file = argv_to_unicode(arg1)
+ self.to_file = argv_to_unicode(arg2)
elif arg1 is not None and arg2 is None:
- self.from_file = arg1 # might be "-"
+ self.from_file = argv_to_unicode(arg1) # might be "-"
self.to_file = None
else:
self.from_file = None
self.to_file = None
- if self.from_file == "-":
+ if self.from_file == u"-":
self.from_file = None
def getSynopsis(self):
@@ -176,28 +182,28 @@
def parseArgs(self, *args):
if len(args) < 2:
raise usage.UsageError("cp requires at least two arguments")
- self.sources = args[:-1]
- self.destination = args[-1]
+ self.sources = map(argv_to_unicode, args[:-1])
+ self.destination = argv_to_unicode(args[-1])
class RmOptions(VDriveOptions):
def parseArgs(self, where):
- self.where = where
+ self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s rm VDRIVE_FILE" % (os.path.basename(sys.argv[0]),)
class MvOptions(VDriveOptions):
def parseArgs(self, frompath, topath):
- self.from_file = frompath
- self.to_file = topath
+ self.from_file = argv_to_unicode(frompath)
+ self.to_file = argv_to_unicode(topath)
def getSynopsis(self):
return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),)
class LnOptions(VDriveOptions):
def parseArgs(self, frompath, topath):
- self.from_file = frompath
- self.to_file = topath
+ self.from_file = argv_to_unicode(frompath)
+ self.to_file = argv_to_unicode(topath)
def getSynopsis(self):
return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),)
@@ -221,8 +227,8 @@
self['exclude'] = set()
def parseArgs(self, localdir, topath):
- self.from_dir = localdir
- self.to_dir = topath
+ self.from_dir = argv_to_unicode(localdir)
+ self.to_dir = argv_to_unicode(topath)
def getSynopsis(Self):
return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0])
@@ -270,7 +276,7 @@
class WebopenOptions(VDriveOptions):
def parseArgs(self, where=''):
- self.where = where
+ self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
@@ -285,7 +291,7 @@
("raw", "r", "Display raw JSON data instead of parsed"),
]
def parseArgs(self, where=''):
- self.where = where
+ self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s manifest [ALIAS:PATH]" %
(os.path.basename(sys.argv[0]),)
@@ -297,7 +303,7 @@
("raw", "r", "Display raw JSON data instead of parsed"),
]
def parseArgs(self, where=''):
- self.where = where
+ self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
@@ -312,7 +318,7 @@
("add-lease", None, "Add/renew lease on all shares"),
]
def parseArgs(self, where=''):
- self.where = where
+ self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
@@ -328,7 +334,7 @@
("verbose", "v", "Be noisy about what is happening."),
]
def parseArgs(self, where=''):
- self.where = where
+ self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s deep-check [ALIAS:PATH]" %
(os.path.basename(sys.argv[0]),)
diff -rN -u old-tahoe-534/src/allmydata/scripts/common.py
new-tahoe-534/src/allmydata/scripts/common.py
--- old-tahoe-534/src/allmydata/scripts/common.py 2009-06-05
21:14:40.000000000 +0200
+++ new-tahoe-534/src/allmydata/scripts/common.py 2009-06-05
21:14:40.000000000 +0200
@@ -1,7 +1,8 @@
import os, sys, urllib
+import codecs
from twisted.python import usage
-
+from allmydata.util.stringutils import unicode_to_url
class BaseOptions:
# unit tests can override these to point at StringIO instances
@@ -100,14 +101,14 @@
except EnvironmentError:
pass
try:
- f = open(aliasfile, "r")
+ f = codecs.open(aliasfile, "r", "utf-8")
for line in f.readlines():
line = line.strip()
if line.startswith("#") or not line:
continue
name, cap = line.split(":", 1)
# normalize it: remove http: prefix, urldecode
- cap = cap.strip()
+ cap = cap.strip().encode('ascii')
aliases[name] = uri.from_string_dirnode(cap).to_string()
except EnvironmentError:
pass
@@ -163,4 +164,4 @@
def escape_path(path):
segments = path.split("/")
- return "/".join([urllib.quote(s) for s in segments])
+ return "/".join([urllib.quote(unicode_to_url(s)) for s in segments])
diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py
new-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py
--- old-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py 2009-06-05
21:14:40.000000000 +0200
+++ new-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py 2009-06-05
21:14:40.000000000 +0200
@@ -1,8 +1,11 @@
import os.path
+import codecs
+import sys
from allmydata import uri
from allmydata.scripts.common_http import do_http, check_http_error
from allmydata.scripts.common import get_aliases
+from allmydata.util.stringutils import unicode_to_stdout
def add_alias(options):
nodedir = options['node-directory']
@@ -52,10 +55,10 @@
new_uri = resp.read().strip()
# probably check for others..
- f = open(aliasfile, "a")
+ f = codecs.open(aliasfile, "a", "utf-8")
f.write("%s: %s\n" % (alias, new_uri))
f.close()
- print >>stdout, "Alias '%s' created" % (alias,)
+ print >>stdout, "Alias '%s' created" % (unicode_to_stdout(alias),)
return 0
def list_aliases(options):
diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_backup.py
new-tahoe-534/src/allmydata/scripts/tahoe_backup.py
--- old-tahoe-534/src/allmydata/scripts/tahoe_backup.py 2009-06-05
21:14:40.000000000 +0200
+++ new-tahoe-534/src/allmydata/scripts/tahoe_backup.py 2009-06-05
21:14:40.000000000 +0200
@@ -4,11 +4,15 @@
import urllib
import simplejson
import datetime
+import sys
from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS
from allmydata.scripts.common_http import do_http
from allmydata import uri
from allmydata.util import time_format
from allmydata.scripts import backupdb
+from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs,
unicode_to_stdout
+from allmydata.util.assertutil import precondition
+from twisted.python import usage
class HTTPError(Exception):
pass
@@ -245,9 +249,10 @@
def verboseprint(self, msg):
if self.verbosity >= 2:
- print >>self.options.stdout, msg
+ print >>self.options.stdout, unicode_to_stdout(msg)
def process(self, localpath, olddircap):
+ precondition(isinstance(localpath, unicode), localpath)
# returns newdircap
self.verboseprint("processing %s, olddircap %s" % (localpath,
olddircap))
@@ -256,7 +261,8 @@
olddircontents = self.readdir(olddircap)
newdircontents = {} # childname -> (type, rocap, metadata)
- for child in self.options.filter_listdir(os.listdir(localpath)):
+ for child in
self.options.filter_listdir(os.listdir(unicode_to_fs(localpath))):
+ child = fs_to_unicode(child)
childpath = os.path.join(localpath, child)
if os.path.isdir(childpath):
metadata = get_local_metadata(childpath)
@@ -342,6 +348,8 @@
return contents
def upload(self, childpath):
+ precondition(isinstance(childpath, unicode), childpath)
+
#self.verboseprint("uploading %s.." % childpath)
metadata = get_local_metadata(childpath)
@@ -350,7 +358,7 @@
if must_upload:
self.verboseprint("uploading %s.." % childpath)
- infileobj = open(os.path.expanduser(childpath), "rb")
+ infileobj =
open(unicode_to_fs(os.path.expanduser(childpath)), "rb")
url = self.options['node-url'] + "uri"
resp = do_http("PUT", url, infileobj)
if resp.status not in (200, 201):
diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_cp.py
new-tahoe-534/src/allmydata/scripts/tahoe_cp.py
--- old-tahoe-534/src/allmydata/scripts/tahoe_cp.py 2009-06-05
21:14:40.000000000 +0200
+++ new-tahoe-534/src/allmydata/scripts/tahoe_cp.py 2009-06-05
21:14:40.000000000 +0200
@@ -4,9 +4,13 @@
import simplejson
from cStringIO import StringIO
from twisted.python.failure import Failure
+import sys
from allmydata.scripts.common import get_alias, escape_path,
DefaultAliasMarker
from allmydata.scripts.common_http import do_http
from allmydata import uri
+from twisted.python import usage
+from allmydata.util.stringutils import fs_to_unicode, unicode_to_fs,
unicode_to_url
+from allmydata.util.assertutil import precondition
def ascii_or_none(s):
if s is None:
@@ -69,6 +73,7 @@
class LocalFileSource:
def __init__(self, pathname):
+ precondition(isinstance(pathname, unicode), pathname)
self.pathname = pathname
def need_to_copy_bytes(self):
@@ -79,6 +84,7 @@
class LocalFileTarget:
def __init__(self, pathname):
+ precondition(isinstance(pathname, unicode), pathname)
self.pathname = pathname
def put_file(self, inf):
outf = open(self.pathname, "wb")
@@ -91,6 +97,7 @@
class LocalMissingTarget:
def __init__(self, pathname):
+ precondition(isinstance(pathname, unicode), pathname)
self.pathname = pathname
def put_file(self, inf):
@@ -104,6 +111,8 @@
class LocalDirectorySource:
def __init__(self, progressfunc, pathname):
+ precondition(isinstance(pathname, unicode), pathname)
+
self.progressfunc = progressfunc
self.pathname = pathname
self.children = None
@@ -112,8 +121,9 @@
if self.children is not None:
return
self.children = {}
- children = os.listdir(self.pathname)
+ children = os.listdir(unicode_to_fs(self.pathname))
for i,n in enumerate(children):
+ n = fs_to_unicode(n)
self.progressfunc("examining %d of %d" % (i, len(children)))
pn = os.path.join(self.pathname, n)
if os.path.isdir(pn):
@@ -129,6 +139,8 @@
class LocalDirectoryTarget:
def __init__(self, progressfunc, pathname):
+ precondition(isinstance(pathname, unicode), pathname)
+
self.progressfunc = progressfunc
self.pathname = pathname
self.children = None
@@ -137,8 +149,9 @@
if self.children is not None:
return
self.children = {}
- children = os.listdir(self.pathname)
+ children = os.listdir(unicode_to_fs(self.pathname))
for i,n in enumerate(children):
+ n = fs_to_unicode(n)
self.progressfunc("examining %d of %d" % (i, len(children)))
pn = os.path.join(self.pathname, n)
if os.path.isdir(pn):
@@ -160,8 +173,9 @@
return LocalDirectoryTarget(self.progressfunc, pathname)
def put_file(self, name, inf):
+ precondition(isinstance(name, unicode), name)
pathname = os.path.join(self.pathname, name)
- outf = open(pathname, "wb")
+ outf = open(unicode_to_fs(pathname), "wb")
while True:
data = inf.read(32768)
if not data:
@@ -350,7 +364,7 @@
if self.writecap:
url = self.nodeurl + "/".join(["uri",
urllib.quote(self.writecap),
-
urllib.quote(name.encode('utf-8'))])
+
urllib.quote(unicode_to_url(name))])
self.children[name] = TahoeFileTarget(self.nodeurl,
mutable,
writecap,
readcap, url)
else:
diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_ls.py
new-tahoe-534/src/allmydata/scripts/tahoe_ls.py
--- old-tahoe-534/src/allmydata/scripts/tahoe_ls.py 2009-06-05
21:14:40.000000000 +0200
+++ new-tahoe-534/src/allmydata/scripts/tahoe_ls.py 2009-06-05
21:14:40.000000000 +0200
@@ -3,6 +3,7 @@
import simplejson
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path
from allmydata.scripts.common_http import do_http
+from allmydata.util.stringutils import unicode_to_stdout
def list(options):
nodeurl = options['node-url']
@@ -112,7 +113,7 @@
line.append(ctime_s)
if not options["classify"]:
classify = ""
- line.append(name + classify)
+ line.append(unicode_to_stdout(name) + classify)
if options["uri"]:
line.append(uri)
if options["readonly-uri"]:
diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_manifest.py
new-tahoe-534/src/allmydata/scripts/tahoe_manifest.py
--- old-tahoe-534/src/allmydata/scripts/tahoe_manifest.py 2009-06-05
21:14:40.000000000 +0200
+++ new-tahoe-534/src/allmydata/scripts/tahoe_manifest.py 2009-06-05
21:14:40.000000000 +0200
@@ -80,7 +80,7 @@
try:
print >>stdout, d["cap"], "/".join(d["path"])
except UnicodeEncodeError:
- print >>stdout, d["cap"], "/".join([p.encode("utf-8")
+ print >>stdout, d["cap"],
"/".join([unicode_to_stdout(p)
for p in
d["path"]])
def manifest(options):
diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py
new-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py
--- old-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py 2009-06-05
21:14:40.000000000 +0200
+++ new-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py 2009-06-05
21:14:40.000000000 +0200
@@ -2,6 +2,7 @@
import urllib
from allmydata.scripts.common_http import do_http, check_http_error
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS
+from allmydata.util.stringutils import unicode_to_url
def mkdir(options):
nodeurl = options['node-url']
@@ -31,7 +32,7 @@
path = path[:-1]
# path (in argv) must be "/".join([s.encode("utf-8") for s in
segments])
url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap),
- urllib.quote(path))
+
urllib.quote(unicode_to_url(path)))
resp = do_http("POST", url)
check_http_error(resp, stderr)
new_uri = resp.read().strip()
diff -rN -u old-tahoe-534/src/allmydata/test/test_cli.py
new-tahoe-534/src/allmydata/test/test_cli.py
--- old-tahoe-534/src/allmydata/test/test_cli.py 2009-06-05
21:14:40.000000000 +0200
+++ new-tahoe-534/src/allmydata/test/test_cli.py 2009-06-05
21:14:40.000000000 +0200
@@ -6,6 +6,7 @@
import urllib
import re
import simplejson
+import sys
from allmydata.util import fileutil, hashutil, base32
from allmydata import uri
@@ -518,6 +519,48 @@
self._test_webopen(["two:"], self.two_url)
d.addCallback(_test_urls)
+ d.addCallback(lambda res: self.do_cli("create-alias", "études"))
+ def _check_create_unicode((rc,stdout,stderr)):
+ self.failUnlessEqual(rc, 0)
+ self.failIf(stderr)
+
+ # If stdout only supports ascii, accentuated characters are
+ # being replaced by '?'
+ if sys.stdout.encoding == "ANSI_X3.4-1968":
+ self.failUnless("Alias '?tudes' created" in stdout)
+ else:
+ self.failUnless("Alias 'études' created" in stdout)
+
+ aliases = get_aliases(self.get_clientdir())
+ self.failUnless(aliases[u"études"].startswith("URI:DIR2:"))
+ d.addCallback(_check_create_unicode)
+
+ d.addCallback(lambda res: self.do_cli("ls", "études:"))
+ def _check_ls1((rc, stdout, stderr)):
+ self.failUnlessEqual(rc, 0)
+ self.failIf(stderr)
+
+ self.failUnlessEqual(stdout, "")
+ d.addCallback(_check_ls1)
+
+ d.addCallback(lambda res: self.do_cli("put", "-",
"études:uploaded.txt",
+ stdin="Blah blah blah"))
+
+ d.addCallback(lambda res: self.do_cli("ls", "études:"))
+ def _check_ls2((rc, stdout, stderr)):
+ self.failUnlessEqual(rc, 0)
+ self.failIf(stderr)
+
+ self.failUnlessEqual(stdout, "uploaded.txt\n")
+ d.addCallback(_check_ls2)
+
+ d.addCallback(lambda res: self.do_cli("get",
"études:uploaded.txt"))
+ def _check_get((rc, stdout, stderr)):
+ self.failUnlessEqual(rc, 0)
+ self.failIf(stderr)
+ self.failUnlessEqual(stdout, "Blah blah blah")
+ d.addCallback(_check_get)
+
return d
class Put(GridTestMixin, CLITestMixin, unittest.TestCase):
@@ -739,6 +782,37 @@
d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out,
DATA2))
return d
+ def test_immutable_from_file_unicode(self):
+ # tahoe put file.txt "à trier.txt"
+ self.basedir = os.path.dirname(self.mktemp())
+ self.set_up_grid()
+
+ rel_fn = os.path.join(self.basedir, "DATAFILE")
+ abs_fn = os.path.abspath(rel_fn)
+ # we make the file small enough to fit in a LIT file, for speed
+ DATA = "short file"
+ f = open(rel_fn, "w")
+ f.write(DATA)
+ f.close()
+
+ d = self.do_cli("create-alias", "tahoe")
+
+ d.addCallback(lambda res:
+ self.do_cli("put", rel_fn, "à trier.txt"))
+ def _uploaded((rc,stdout,stderr)):
+ readcap = stdout.strip()
+ self.failUnless(readcap.startswith("URI:LIT:"))
+ self.failUnless("201 Created" in stderr, stderr)
+ self.readcap = readcap
+ d.addCallback(_uploaded)
+
+ d.addCallback(lambda res:
+ self.do_cli("get", "tahoe:à trier.txt"))
+ d.addCallback(lambda (rc,stdout,stderr):
+ self.failUnlessEqual(stdout, DATA))
+
+ return d
+
class List(GridTestMixin, CLITestMixin, unittest.TestCase):
def test_list(self):
self.basedir = "cli/List/list"
@@ -795,30 +869,37 @@
def test_unicode_filename(self):
self.basedir = "cli/Cp/unicode_filename"
self.set_up_grid()
+ d = self.do_cli("create-alias", "tahoe")
+
+ # Use unicode strings when calling os functions
+ if sys.getfilesystemencoding() == "ANSI_X3.4-1968":
+ fn1 = os.path.join(self.basedir, u"Artonwall")
+ else:
+ fn1 = os.path.join(self.basedir, u"Ärtonwall")
- fn1 = os.path.join(self.basedir, "Ärtonwall")
DATA1 = "unicode file content"
open(fn1, "wb").write(DATA1)
+ d.addCallback(lambda res: self.do_cli("cp",
fn1.encode('utf-8'), "tahoe:Ärtonwall"))
+
+ d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
+ d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out,
DATA1))
- fn2 = os.path.join(self.basedir, "Metallica")
+
+ fn2 = os.path.join(self.basedir, u"Metallica")
DATA2 = "non-unicode file content"
open(fn2, "wb").write(DATA2)
# Bug #534
# Assure that uploading a file whose name contains unicode
character doesn't
# prevent further uploads in the same directory
- d = self.do_cli("create-alias", "tahoe")
- d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:"))
- d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:"))
-
- d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
- d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out,
DATA1))
+ d.addCallback(lambda res: self.do_cli("cp",
fn2.encode('utf-8'), "tahoe:"))
d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica"))
d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out,
DATA2))
+ d.addCallback(lambda res: self.do_cli("ls", "tahoe:"))
+
return d
- test_unicode_filename.todo = "This behavior is not yet supported,
although it does happen to work (for reasons that are ill-understood) on
many platforms. See issue ticket #534."
def test_dangling_symlink_vs_recursion(self):
if not hasattr(os, 'symlink'):
@@ -837,6 +918,17 @@
dn, "tahoe:"))
return d
+class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase):
+ def test_unicode_mkdir(self):
+ self.basedir = os.path.dirname(self.mktemp())
+ self.set_up_grid()
+
+ d = self.do_cli("create-alias", "tahoe")
+ d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead"))
+
+ return d
+
+
class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
def writeto(self, path, data):
@@ -871,6 +963,11 @@
self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
self.writeto("parent/blah.txt", "blah")
+ if sys.getfilesystemencoding() == "ANSI_X3.4-1968":
+ self.writeto(u"parent/artonwall.txt", "Marmelade Jacuzzi")
+ else:
+ self.writeto(u"parent/ärtonwall.txt", "Marmelade Jacuzzi")
+
def do_backup(use_backupdb=True, verbose=False):
cmd = ["backup"]
if not have_bdb or not use_backupdb:
@@ -895,8 +992,8 @@
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
fu, fr, dc, dr = self.count_output(out)
- # foo.txt, bar.txt, blah.txt
- self.failUnlessEqual(fu, 3)
+ # foo.txt, bar.txt, blah.txt, ärtonwall.txt
+ self.failUnlessEqual(fu, 4)
self.failUnlessEqual(fr, 0)
# empty, home, home/parent, home/parent/subdir
self.failUnlessEqual(dc, 4)
@@ -945,9 +1042,9 @@
self.failUnlessEqual(rc, 0)
if have_bdb:
fu, fr, dc, dr = self.count_output(out)
- # foo.txt, bar.txt, blah.txt
+ # foo.txt, bar.txt, blah.txt, ärtonwall.txt
self.failUnlessEqual(fu, 0)
- self.failUnlessEqual(fr, 3)
+ self.failUnlessEqual(fr, 4)
# empty, home, home/parent, home/parent/subdir
self.failUnlessEqual(dc, 0)
self.failUnlessEqual(dr, 4)
@@ -975,9 +1072,9 @@
self.failUnlessEqual(rc, 0)
fu, fr, dc, dr = self.count_output(out)
fchecked, dchecked, dread = self.count_output2(out)
- self.failUnlessEqual(fchecked, 3)
+ self.failUnlessEqual(fchecked, 4)
self.failUnlessEqual(fu, 0)
- self.failUnlessEqual(fr, 3)
+ self.failUnlessEqual(fr, 4)
# TODO: backupdb doesn't do dirs yet; when it does,
this will
# change to dchecked=4, and maybe dread=0
self.failUnlessEqual(dchecked, 0)
@@ -1023,8 +1120,8 @@
fu, fr, dc, dr = self.count_output(out)
# new foo.txt, surprise file, subfile, empty
self.failUnlessEqual(fu, 4)
- # old bar.txt
- self.failUnlessEqual(fr, 1)
+ # old bar.txt, ärtonwall.txt
+ self.failUnlessEqual(fr, 2)
# home, parent, subdir, blah.txt, surprisedir
self.failUnlessEqual(dc, 5)
self.failUnlessEqual(dr, 0)
@@ -1063,7 +1160,7 @@
self.failUnlessEqual(err, "")
self.failUnlessEqual(rc, 0)
fu, fr, dc, dr = self.count_output(out)
- self.failUnlessEqual(fu, 5)
+ self.failUnlessEqual(fu, 6)
self.failUnlessEqual(fr, 0)
self.failUnlessEqual(dc, 0)
self.failUnlessEqual(dr, 5)
diff -rN -u old-tahoe-534/src/allmydata/util/stringutils.py
new-tahoe-534/src/allmydata/util/stringutils.py
--- old-tahoe-534/src/allmydata/util/stringutils.py 1970-01-01
01:00:00.000000000 +0100
+++ new-tahoe-534/src/allmydata/util/stringutils.py 2009-06-05
21:14:40.000000000 +0200
@@ -0,0 +1,70 @@
+"""
+Functions used to convert inputs from whatever encoding used in the
system to
+unicode and back.
+
+TODO:
+ * Accept two cli arguments --argv-encoding and --filesystem-encoding
+"""
+
+import sys
+from allmydata.util.assertutil import precondition
+from twisted.python import usage
+
+def argv_to_unicode(s):
+ """
+ Decode given argv element to unicode.
+ """
+ # sys.argv encoding detection in Python is not trivial so utf-8 is
+ # currently used by default and an informative error message is
given if
+ # the argument cannot be correctly decoded.
+
+ precondition(isinstance(s, str), s)
+ try:
+ return unicode(s, 'utf-8')
+ except UnicodeEncodeError:
+ raise usageError("Argument '%s' cannot be decoded as UTF-8." % s)
+
+def fs_to_unicode(s):
+ """
+ Decode a filename (or a directory name) to unicode using the same
encoding
+ as the filesystem.
+ """
+ # Filename encoding detection is a little bit better thanks to
+ # getfilesystemencoding() in the sys module. However, filenames can be
+ # encoded using another encoding than the one used on the filesystem.
+
+ precondition(isinstance(s, str), s)
+ encoding = sys.getfilesystemencoding()
+ try:
+ return unicode(s, encoding)
+ except UnicodeDecodeError:
+ raise usage.UsageError("Filename '%s' cannot be decoded using
the current encoding of your filesystem (%s). Please rename this file."
% (s, encoding))
+
+def unicode_to_fs(s):
+ """
+ Encode an unicode object used in file or directoy name.
+ """
+
+ precondition(isinstance(s, unicode), s)
+ encoding = sys.getfilesystemencoding()
+ try:
+ return s.encode(encoding)
+ except UnicodeEncodeError:
+ raise usage.UsageError("Filename '%s' cannot be encoded using
the current encoding of your filesystem (%s). Please configure your
locale correctly or rename this file." % (s, encoding))
+
+def unicode_to_url(s):
+ """
+ Encode an unicode object used in an URL.
+ """
+ # According to RFC 2718, non-ascii characters in url's must be
UTF-8 encoded.
+
+ precondition(isinstance(s, unicode), s)
+ return s.encode('utf-8')
+
+def unicode_to_stdout(s):
+ """
+ Encode an unicode object for representation on stdout.
+ """
+
+ precondition(isinstance(s, unicode), s)
+ return s.encode(sys.stdout.encoding, 'replace')
More information about the tahoe-dev
mailing list