Ticket #534: unicode-filenames-handling-v3.2.diff

File unicode-filenames-handling-v3.2.diff, 30.2 KB (added by francois, at 2010-05-18T23:26:26Z)
  • docs/frontends/CLI.txt

    Wed May 19 01:14:13 CEST 2010  Francois Deppierraz <francois@ctrlaltdel.ch>
      * Fix handling of correctly encoded unicode filenames (#534)
      
      Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
      backup', have been improved to correctly handle filenames containing non-ASCII
      characters.
      
      In the case where Tahoe encounters a filename which cannot be decoded using the
      system encoding, an error will be returned and the operation will fail.  Under
      Linux, this typically happens when the filesystem contains filenames encoded
      with another encoding, for instance latin1, than the system locale, for
      instance UTF-8.  In such case, you'll need to fix your system with tools such
      as 'convmv' before using Tahoe CLI.
      
      All CLI commands have been improved to support non-ASCII parameters such as
      filenames and aliases on all supported Operating Systems except Windows as of
      now.
        ***END OF DESCRIPTION***
      
      Place the long patch description above the ***END OF DESCRIPTION*** marker.
      The first line of this file will be the patch name.
      
      
      This patch contains the following changes:
      
    diff -rN -u old-tahoe-534/docs/frontends/CLI.txt new-tahoe-534/docs/frontends/CLI.txt
    old new  
    123123perspective on the graph of files and directories.
    124124
    125125Each tahoe node remembers a list of starting points, named "aliases",
    126 in a file named ~/.tahoe/private/aliases . These aliases are short
    127 strings that stand in for a directory read- or write- cap. If you use
    128 the command line "ls" without any "[STARTING_DIR]:" argument, then it
    129 will use the default alias, which is "tahoe", therefore "tahoe ls" has
    130 the same effect as "tahoe ls tahoe:".  The same goes for the other
    131 commands which can reasonably use a default alias: get, put, mkdir,
    132 mv, and rm.
     126in a file named ~/.tahoe/private/aliases . These aliases are short UTF-8
     127encoded strings that stand in for a directory read- or write- cap. If
     128you use the command line "ls" without any "[STARTING_DIR]:" argument,
     129then it will use the default alias, which is "tahoe", therefore "tahoe
     130ls" has the same effect as "tahoe ls tahoe:".  The same goes for the
     131other commands which can reasonably use a default alias: get, put,
     132mkdir, mv, and rm.
    133133
    134134For backwards compatibility with Tahoe-1.0, if the "tahoe": alias is not
    135135found in ~/.tahoe/private/aliases, the CLI will use the contents of
  • NEWS

    diff -rN -u old-tahoe-534/NEWS new-tahoe-534/NEWS
    old new  
    11User visible changes in Tahoe-LAFS.  -*- outline -*-
    22
     3* Release 1.7.0
     4
     5** Bugfixes
     6
     7*** Unicode filenames handling
     8
     9Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
     10backup', have been improved to correctly handle filenames containing non-ASCII
     11characters.
     12
     13In the case where Tahoe encounters a filename which cannot be decoded using the
     14system encoding, an error will be returned and the operation will fail.  Under
     15Linux, this typically happens when the filesystem contains filenames encoded
     16with another encoding, for instance latin1, than the system locale, for
     17instance UTF-8.  In such case, you'll need to fix your system with tools such
     18as 'convmv' before using Tahoe CLI.
     19
     20All CLI commands have been improved to support non-ASCII parameters such as
     21filenames and aliases on all supported Operating Systems except Windows as of
     22now.
     23
    324* Release 1.6.1 (2010-02-27)
    425
    526** Bugfixes
  • src/allmydata/scripts/cli.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/cli.py new-tahoe-534/src/allmydata/scripts/cli.py
    old new  
    11import os.path, re, sys, fnmatch
    22from twisted.python import usage
    33from allmydata.scripts.common import BaseOptions, get_aliases
     4from allmydata.util.stringutils import argv_to_unicode
    45
    56NODEURL_RE=re.compile("http://([^:]*)(:([1-9][0-9]*))?")
    67
     
    4950
    5051class MakeDirectoryOptions(VDriveOptions):
    5152    def parseArgs(self, where=""):
    52         self.where = where
     53        self.where = argv_to_unicode(where)
    5354    longdesc = """Create a new directory, either unlinked or as a subdirectory."""
    5455
    5556class AddAliasOptions(VDriveOptions):
    5657    def parseArgs(self, alias, cap):
    57         self.alias = alias
     58        self.alias = argv_to_unicode(alias)
    5859        self.cap = cap
    5960
    6061    def getSynopsis(self):
     
    6465
    6566class CreateAliasOptions(VDriveOptions):
    6667    def parseArgs(self, alias):
    67         self.alias = alias
     68        self.alias = argv_to_unicode(alias)
    6869
    6970    def getSynopsis(self):
    7071        return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),)
     
    8384        ("json", None, "Show the raw JSON output"),
    8485        ]
    8586    def parseArgs(self, where=""):
    86         self.where = where
     87        self.where = argv_to_unicode(where)
    8788
    8889    longdesc = """
    8990    List the contents of some portion of the grid.
     
    118119        # tahoe get FOO bar              # write to local file
    119120        # tahoe get tahoe:FOO bar        # same
    120121
    121         self.from_file = arg1
    122         self.to_file = arg2
     122        self.from_file = argv_to_unicode(arg1)
     123
     124        if arg2:
     125            self.to_file = argv_to_unicode(arg2)
     126        else:
     127            self.to_file = None
     128
    123129        if self.to_file == "-":
    124130            self.to_file = None
    125131
     
    151157        # see Examples below
    152158
    153159        if arg1 is not None and arg2 is not None:
    154             self.from_file = arg1
    155             self.to_file = arg2
     160            self.from_file = argv_to_unicode(arg1)
     161            self.to_file =  argv_to_unicode(arg2)
    156162        elif arg1 is not None and arg2 is None:
    157             self.from_file = arg1 # might be "-"
     163            self.from_file = argv_to_unicode(arg1) # might be "-"
    158164            self.to_file = None
    159165        else:
    160166            self.from_file = None
    161167            self.to_file = None
    162         if self.from_file == "-":
     168        if self.from_file == u"-":
    163169            self.from_file = None
    164170
    165171    def getSynopsis(self):
     
    197203    def parseArgs(self, *args):
    198204        if len(args) < 2:
    199205            raise usage.UsageError("cp requires at least two arguments")
    200         self.sources = args[:-1]
    201         self.destination = args[-1]
     206        self.sources = map(argv_to_unicode, args[:-1])
     207        self.destination = argv_to_unicode(args[-1])
    202208    def getSynopsis(self):
    203209        return "Usage: tahoe [options] cp FROM.. TO"
    204210    longdesc = """
     
    228234
    229235class RmOptions(VDriveOptions):
    230236    def parseArgs(self, where):
    231         self.where = where
     237        self.where = argv_to_unicode(where)
    232238
    233239    def getSynopsis(self):
    234240        return "%s rm REMOTE_FILE" % (os.path.basename(sys.argv[0]),)
    235241
    236242class MvOptions(VDriveOptions):
    237243    def parseArgs(self, frompath, topath):
    238         self.from_file = frompath
    239         self.to_file = topath
     244        self.from_file = argv_to_unicode(frompath)
     245        self.to_file = argv_to_unicode(topath)
    240246
    241247    def getSynopsis(self):
    242248        return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),)
     
    254260
    255261class LnOptions(VDriveOptions):
    256262    def parseArgs(self, frompath, topath):
    257         self.from_file = frompath
    258         self.to_file = topath
     263        self.from_file = argv_to_unicode(frompath)
     264        self.to_file = argv_to_unicode(topath)
    259265
    260266    def getSynopsis(self):
    261267        return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),)
     
    279285        self['exclude'] = set()
    280286
    281287    def parseArgs(self, localdir, topath):
    282         self.from_dir = localdir
    283         self.to_dir = topath
     288        self.from_dir = argv_to_unicode(localdir)
     289        self.to_dir = argv_to_unicode(topath)
    284290
    285291    def getSynopsis(Self):
    286292        return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0])
     
    334340
    335341class WebopenOptions(VDriveOptions):
    336342    def parseArgs(self, where=''):
    337         self.where = where
     343        self.where = argv_to_unicode(where)
    338344
    339345    def getSynopsis(self):
    340346        return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
     
    350356        ("raw", "r", "Display raw JSON data instead of parsed"),
    351357        ]
    352358    def parseArgs(self, where=''):
    353         self.where = where
     359        self.where = argv_to_unicode(where)
    354360
    355361    def getSynopsis(self):
    356362        return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
     
    363369        ("raw", "r", "Display raw JSON data instead of parsed"),
    364370        ]
    365371    def parseArgs(self, where=''):
    366         self.where = where
     372        self.where = argv_to_unicode(where)
    367373
    368374    def getSynopsis(self):
    369375        return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
     
    379385        ("add-lease", None, "Add/renew lease on all shares"),
    380386        ]
    381387    def parseArgs(self, where=''):
    382         self.where = where
     388        self.where = argv_to_unicode(where)
    383389
    384390    def getSynopsis(self):
    385391        return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
     
    398404        ("verbose", "v", "Be noisy about what is happening."),
    399405        ]
    400406    def parseArgs(self, where=''):
    401         self.where = where
     407        self.where = argv_to_unicode(where)
    402408
    403409    def getSynopsis(self):
    404410        return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
  • src/allmydata/scripts/common.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/common.py new-tahoe-534/src/allmydata/scripts/common.py
    old new  
    11
    22import os, sys, urllib
     3import codecs
    34from twisted.python import usage
    4 
     5from allmydata.util.stringutils import unicode_to_url
     6from allmydata.util.assertutil import precondition
    57
    68class BaseOptions:
    79    # unit tests can override these to point at StringIO instances
     
    100102    except EnvironmentError:
    101103        pass
    102104    try:
    103         f = open(aliasfile, "r")
     105        f = codecs.open(aliasfile, "r", "utf-8")
    104106        for line in f.readlines():
    105107            line = line.strip()
    106108            if line.startswith("#") or not line:
    107109                continue
    108110            name, cap = line.split(":", 1)
    109111            # normalize it: remove http: prefix, urldecode
    110             cap = cap.strip()
     112            cap = cap.strip().encode('utf-8')
    111113            aliases[name] = uri.from_string_dirnode(cap).to_string()
    112114    except EnvironmentError:
    113115        pass
     
    138140    # and default is not found in aliases, an UnknownAliasError is
    139141    # raised.
    140142    path = path.strip()
    141     if uri.has_uri_prefix(path):
     143    if uri.has_uri_prefix(path.encode('utf-8')):
    142144        # We used to require "URI:blah:./foo" in order to get a subpath,
    143145        # stripping out the ":./" sequence. We still allow that for compatibility,
    144146        # but now also allow just "URI:blah/foo".
     
    180182
    181183def escape_path(path):
    182184    segments = path.split("/")
    183     return "/".join([urllib.quote(s) for s in segments])
     185    return "/".join([urllib.quote(unicode_to_url(s)) for s in segments])
  • src/allmydata/scripts/tahoe_add_alias.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py new-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py
    old new  
    11
    22import os.path
     3import codecs
     4import sys
    35from allmydata import uri
    46from allmydata.scripts.common_http import do_http, check_http_error
    57from allmydata.scripts.common import get_aliases
    68from allmydata.util.fileutil import move_into_place
     9from allmydata.util.stringutils import unicode_to_stdout
     10
    711
    812def add_line_to_aliasfile(aliasfile, alias, cap):
    913    # we use os.path.exists, rather than catching EnvironmentError, to avoid
    1014    # clobbering the valuable alias file in case of spurious or transient
    1115    # filesystem errors.
    1216    if os.path.exists(aliasfile):
    13         f = open(aliasfile, "r")
     17        f = codecs.open(aliasfile, "r", "utf-8")
    1418        aliases = f.read()
    1519        f.close()
    1620        if not aliases.endswith("\n"):
     
    1822    else:
    1923        aliases = ""
    2024    aliases += "%s: %s\n" % (alias, cap)
    21     f = open(aliasfile+".tmp", "w")
     25    f = codecs.open(aliasfile+".tmp", "w", "utf-8")
    2226    f.write(aliases)
    2327    f.close()
    2428    move_into_place(aliasfile+".tmp", aliasfile)
     
    4145
    4246    add_line_to_aliasfile(aliasfile, alias, cap)
    4347
    44     print >>stdout, "Alias '%s' added" % (alias,)
     48    print >>stdout, "Alias '%s' added" % (unicode_to_stdout(alias),)
    4549    return 0
    4650
    4751def create_alias(options):
     
    7478
    7579    add_line_to_aliasfile(aliasfile, alias, new_uri)
    7680
    77     print >>stdout, "Alias '%s' created" % (alias,)
     81    print >>stdout, "Alias '%s' created" % (unicode_to_stdout(alias),)
    7882    return 0
    7983
    8084def list_aliases(options):
  • src/allmydata/scripts/tahoe_backup.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_backup.py new-tahoe-534/src/allmydata/scripts/tahoe_backup.py
    old new  
    99from allmydata.scripts.common_http import do_http
    1010from allmydata.util import time_format
    1111from allmydata.scripts import backupdb
     12import sys
     13from allmydata.util.stringutils import unicode_to_stdout, listdir_unicode, open_unicode
     14from allmydata.util.assertutil import precondition
     15from twisted.python import usage
     16
    1217
    1318class HTTPError(Exception):
    1419    pass
     
    154159
    155160    def verboseprint(self, msg):
    156161        if self.verbosity >= 2:
     162            if isinstance(msg, unicode):
     163                msg = unicode_to_stdout(msg)
     164
    157165            print >>self.options.stdout, msg
    158166
    159167    def warn(self, msg):
    160168        print >>self.options.stderr, msg
    161169
    162170    def process(self, localpath):
     171        precondition(isinstance(localpath, unicode), localpath)
    163172        # returns newdircap
    164173
    165174        self.verboseprint("processing %s" % localpath)
     
    167176        compare_contents = {} # childname -> rocap
    168177
    169178        try:
    170             children = os.listdir(localpath)
     179            children = listdir_unicode(localpath)
    171180        except EnvironmentError:
    172181            self.directories_skipped += 1
    173182            self.warn("WARNING: permission denied on directory %s" % localpath)
     
    283292
    284293    # This function will raise an IOError exception when called on an unreadable file
    285294    def upload(self, childpath):
     295        precondition(isinstance(childpath, unicode), childpath)
     296
    286297        #self.verboseprint("uploading %s.." % childpath)
    287298        metadata = get_local_metadata(childpath)
    288299
     
    291302
    292303        if must_upload:
    293304            self.verboseprint("uploading %s.." % childpath)
    294             infileobj = open(os.path.expanduser(childpath), "rb")
     305            infileobj = open_unicode(os.path.expanduser(childpath), "rb")
    295306            url = self.options['node-url'] + "uri"
    296307            resp = do_http("PUT", url, infileobj)
    297308            if resp.status not in (200, 201):
  • src/allmydata/scripts/tahoe_cp.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_cp.py new-tahoe-534/src/allmydata/scripts/tahoe_cp.py
    old new  
    22import os.path
    33import urllib
    44import simplejson
     5import sys
    56from cStringIO import StringIO
    67from twisted.python.failure import Failure
    78from allmydata.scripts.common import get_alias, escape_path, \
    89                                     DefaultAliasMarker, UnknownAliasError
    910from allmydata.scripts.common_http import do_http
    1011from allmydata import uri
     12from twisted.python import usage
     13from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode
     14from allmydata.util.assertutil import precondition
     15
    1116
    1217def ascii_or_none(s):
    1318    if s is None:
     
    7075
    7176class LocalFileSource:
    7277    def __init__(self, pathname):
     78        precondition(isinstance(pathname, unicode), pathname)
    7379        self.pathname = pathname
    7480
    7581    def need_to_copy_bytes(self):
     
    8086
    8187class LocalFileTarget:
    8288    def __init__(self, pathname):
     89        precondition(isinstance(pathname, unicode), pathname)
    8390        self.pathname = pathname
    8491    def put_file(self, inf):
    8592        outf = open(self.pathname, "wb")
     
    9299
    93100class LocalMissingTarget:
    94101    def __init__(self, pathname):
     102        precondition(isinstance(pathname, unicode), pathname)
    95103        self.pathname = pathname
    96104
    97105    def put_file(self, inf):
     
    105113
    106114class LocalDirectorySource:
    107115    def __init__(self, progressfunc, pathname):
     116        precondition(isinstance(pathname, unicode), pathname)
     117
    108118        self.progressfunc = progressfunc
    109119        self.pathname = pathname
    110120        self.children = None
     
    113123        if self.children is not None:
    114124            return
    115125        self.children = {}
    116         children = os.listdir(self.pathname)
     126        children = listdir_unicode(self.pathname)
    117127        for i,n in enumerate(children):
    118128            self.progressfunc("examining %d of %d" % (i, len(children)))
    119129            pn = os.path.join(self.pathname, n)
     
    130140
    131141class LocalDirectoryTarget:
    132142    def __init__(self, progressfunc, pathname):
     143        precondition(isinstance(pathname, unicode), pathname)
     144
    133145        self.progressfunc = progressfunc
    134146        self.pathname = pathname
    135147        self.children = None
     
    138150        if self.children is not None:
    139151            return
    140152        self.children = {}
    141         children = os.listdir(self.pathname)
     153        children = listdir_unicode(self.pathname)
    142154        for i,n in enumerate(children):
    143155            self.progressfunc("examining %d of %d" % (i, len(children)))
    144156            pn = os.path.join(self.pathname, n)
     
    161173        return LocalDirectoryTarget(self.progressfunc, pathname)
    162174
    163175    def put_file(self, name, inf):
     176        precondition(isinstance(name, unicode), name)
    164177        pathname = os.path.join(self.pathname, name)
    165         outf = open(pathname, "wb")
     178        outf = open_unicode(pathname, "wb")
    166179        while True:
    167180            data = inf.read(32768)
    168181            if not data:
     
    355368                if self.writecap:
    356369                    url = self.nodeurl + "/".join(["uri",
    357370                                                   urllib.quote(self.writecap),
    358                                                    urllib.quote(name.encode('utf-8'))])
     371                                                   urllib.quote(unicode_to_url(name))])
    359372                self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
    360373                                                      writecap, readcap, url)
    361374            elif data[0] == "dirnode":
  • src/allmydata/scripts/tahoe_ls.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_ls.py new-tahoe-534/src/allmydata/scripts/tahoe_ls.py
    old new  
    44from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
    55                                     UnknownAliasError
    66from allmydata.scripts.common_http import do_http
     7from allmydata.util.stringutils import unicode_to_stdout
    78
    89def list(options):
    910    nodeurl = options['node-url']
     
    130131            line.append(ctime_s)
    131132        if not options["classify"]:
    132133            classify = ""
    133         line.append(name + classify)
     134        line.append(unicode_to_stdout(name) + classify)
    134135        if options["uri"]:
    135136            line.append(uri)
    136137        if options["readonly-uri"]:
  • src/allmydata/scripts/tahoe_manifest.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_manifest.py new-tahoe-534/src/allmydata/scripts/tahoe_manifest.py
    old new  
    8585                try:
    8686                    print >>stdout, d["cap"], "/".join(d["path"])
    8787                except UnicodeEncodeError:
    88                     print >>stdout, d["cap"], "/".join([p.encode("utf-8")
     88                    print >>stdout, d["cap"], "/".join([unicode_to_stdout(p)
    8989                                                        for p in d["path"]])
    9090
    9191def manifest(options):
  • src/allmydata/scripts/tahoe_mkdir.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py new-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py
    old new  
    22import urllib
    33from allmydata.scripts.common_http import do_http, check_http_error
    44from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError
     5from allmydata.util.stringutils import unicode_to_url
    56
    67def mkdir(options):
    78    nodeurl = options['node-url']
     
    3536        path = path[:-1]
    3637    # path (in argv) must be "/".join([s.encode("utf-8") for s in segments])
    3738    url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap),
    38                                            urllib.quote(path))
     39                                           urllib.quote(unicode_to_url(path)))
    3940    resp = do_http("POST", url)
    4041    check_http_error(resp, stderr)
    4142    new_uri = resp.read().strip()
  • src/allmydata/test/test_cli.py

    diff -rN -u old-tahoe-534/src/allmydata/test/test_cli.py new-tahoe-534/src/allmydata/test/test_cli.py
    old new  
    66import urllib
    77import re
    88import simplejson
     9import sys
    910
    1011from allmydata.util import fileutil, hashutil, base32
    1112from allmydata import uri
     
    2627from twisted.internet import threads # CLI tests use deferToThread
    2728from twisted.python import usage
    2829
     30from allmydata.util.stringutils import listdir_unicode, open_unicode, \
     31     unicode_platform, FilenameEncodingError
     32
    2933timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
    3034
    3135
     
    279283                   "work": "WA",
    280284                   "c": "CA"}
    281285        def ga1(path):
    282             return get_alias(aliases, path, "tahoe")
     286            return get_alias(aliases, path, u"tahoe")
    283287        uses_lettercolon = common.platform_uses_lettercolon_drivename()
    284288        self.failUnlessEqual(ga1("bare"), ("TA", "bare"))
    285289        self.failUnlessEqual(ga1("baredir/file"), ("TA", "baredir/file"))
     
    374378        # default set to something that isn't in the aliases argument should
    375379        # raise an UnknownAliasError.
    376380        def ga4(path):
    377             return get_alias(aliases, path, "badddefault:")
     381            return get_alias(aliases, path, u"badddefault:")
    378382        self.failUnlessRaises(common.UnknownAliasError, ga4, "afile")
    379383        self.failUnlessRaises(common.UnknownAliasError, ga4, "a/dir/path/")
    380384
     
    382386            old = common.pretend_platform_uses_lettercolon
    383387            try:
    384388                common.pretend_platform_uses_lettercolon = True
    385                 retval = get_alias(aliases, path, "baddefault:")
     389                retval = get_alias(aliases, path, u"baddefault:")
    386390            finally:
    387391                common.pretend_platform_uses_lettercolon = old
    388392            return retval
    389393        self.failUnlessRaises(common.UnknownAliasError, ga5, "C:\\Windows")
    390394
     395    def test_listdir_unicode_good(self):
     396        basedir = u"cli/common/listdir_unicode_good"
     397        fileutil.make_dirs(basedir)
     398
     399        files = (u'Lôzane', u'Bern', u'Genève')
     400
     401        for file in files:
     402            open(os.path.join(basedir, file), "w").close()
     403
     404        for file in listdir_unicode(basedir):
     405            self.failUnlessEqual(file in files, True)
     406
     407    def test_listdir_unicode_bad(self):
     408        if unicode_platform():
     409            raise unittest.SkipTest("This test doesn't make any sense on architecture which handle filenames natively as Unicode entities.")
     410
     411        basedir = u"cli/common/listdir_unicode_bad"
     412        fileutil.make_dirs(basedir)
     413
     414        files = (u'Lôzane', u'Bern', u'Genève')
     415
     416        # We use a wrong encoding on purpose
     417        if sys.getfilesystemencoding() == 'UTF-8':
     418            encoding = 'latin1'
     419        else:
     420            encoding = 'UTF-8'
     421
     422        for file in files:
     423            path = os.path.join(basedir, file).encode(encoding)
     424            open(path, "w").close()
     425
     426        self.failUnlessRaises(FilenameEncodingError, listdir_unicode, basedir)
    391427
    392428class Help(unittest.TestCase):
    393429
     
    582618            self.failUnless(aliases["un-corrupted2"].startswith("URI:DIR2:"))
    583619        d.addCallback(_check_not_corrupted)
    584620
     621        d.addCallback(lambda res: self.do_cli("create-alias", "études"))
     622        def _check_create_unicode((rc,stdout,stderr)):
     623            self.failUnlessEqual(rc, 0)
     624            self.failIf(stderr)
     625
     626            # If stdout only supports ascii, accentuated characters are
     627            # being replaced by '?'
     628            if sys.stdout.encoding == "ANSI_X3.4-1968":
     629                self.failUnless("Alias '?tudes' created" in stdout)
     630            else:
     631                self.failUnless("Alias 'études' created" in stdout)
     632
     633            aliases = get_aliases(self.get_clientdir())
     634            self.failUnless(aliases[u"études"].startswith("URI:DIR2:"))
     635        d.addCallback(_check_create_unicode)
     636
     637        d.addCallback(lambda res: self.do_cli("ls", "études:"))
     638        def _check_ls1((rc, stdout, stderr)):
     639            self.failUnlessEqual(rc, 0)
     640            self.failIf(stderr)
     641
     642            self.failUnlessEqual(stdout, "")
     643        d.addCallback(_check_ls1)
     644
     645        d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt",
     646          stdin="Blah blah blah"))
     647
     648        d.addCallback(lambda res: self.do_cli("ls", "études:"))
     649        def _check_ls2((rc, stdout, stderr)):
     650            self.failUnlessEqual(rc, 0)
     651            self.failIf(stderr)
     652
     653            self.failUnlessEqual(stdout, "uploaded.txt\n")
     654        d.addCallback(_check_ls2)
     655
     656        d.addCallback(lambda res: self.do_cli("get", "études:uploaded.txt"))
     657        def _check_get((rc, stdout, stderr)):
     658            self.failUnlessEqual(rc, 0)
     659            self.failIf(stderr)
     660            self.failUnlessEqual(stdout, "Blah blah blah")
     661        d.addCallback(_check_get)
     662
    585663        return d
    586664
    587665
     
    855933        return d
    856934
    857935
     936    def test_immutable_from_file_unicode(self):
     937        # tahoe put file.txt "à trier.txt"
     938        self.basedir = os.path.dirname(self.mktemp())
     939        self.set_up_grid()
     940
     941        rel_fn = os.path.join(self.basedir, "DATAFILE")
     942        abs_fn = os.path.abspath(rel_fn)
     943        # we make the file small enough to fit in a LIT file, for speed
     944        DATA = "short file"
     945        f = open(rel_fn, "w")
     946        f.write(DATA)
     947        f.close()
     948
     949        d = self.do_cli("create-alias", "tahoe")
     950
     951        d.addCallback(lambda res:
     952                      self.do_cli("put", rel_fn, "à trier.txt"))
     953        def _uploaded((rc,stdout,stderr)):
     954            readcap = stdout.strip()
     955            self.failUnless(readcap.startswith("URI:LIT:"))
     956            self.failUnless("201 Created" in stderr, stderr)
     957            self.readcap = readcap
     958        d.addCallback(_uploaded)
     959
     960        d.addCallback(lambda res:
     961                      self.do_cli("get", "tahoe:à trier.txt"))
     962        d.addCallback(lambda (rc,stdout,stderr):
     963                      self.failUnlessEqual(stdout, DATA))
     964
     965        return d
     966
    858967class List(GridTestMixin, CLITestMixin, unittest.TestCase):
    859968    def test_list(self):
    860969        self.basedir = "cli/List/list"
     
    11381247    def test_unicode_filename(self):
    11391248        self.basedir = "cli/Cp/unicode_filename"
    11401249        self.set_up_grid()
     1250        d = self.do_cli("create-alias", "tahoe")
     1251
     1252        # Use unicode strings when calling os functions
     1253        if sys.getfilesystemencoding() == "ANSI_X3.4-1968":
     1254            fn1 = os.path.join(self.basedir, u"Artonwall")
     1255        else:
     1256            fn1 = os.path.join(self.basedir, u"Ärtonwall")
    11411257
    1142         fn1 = os.path.join(self.basedir, "Ärtonwall")
    11431258        DATA1 = "unicode file content"
    11441259        fileutil.write(fn1, DATA1)
     1260        d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:Ärtonwall"))
     1261
     1262        d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
     1263        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
    11451264
    1146         fn2 = os.path.join(self.basedir, "Metallica")
     1265
     1266        fn2 = os.path.join(self.basedir, u"Metallica")
    11471267        DATA2 = "non-unicode file content"
    11481268        fileutil.write(fn2, DATA2)
    11491269
    11501270        # Bug #534
    11511271        # Assure that uploading a file whose name contains unicode character doesn't
    11521272        # prevent further uploads in the same directory
    1153         d = self.do_cli("create-alias", "tahoe")
    1154         d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:"))
    1155         d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:"))
    1156 
    1157         d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
    1158         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
     1273        d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:"))
    11591274
    11601275        d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica"))
    11611276        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
    11621277
     1278        d.addCallback(lambda res: self.do_cli("ls", "tahoe:"))
     1279
    11631280        return d
    1164     test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms.  See issue ticket #534."
    11651281
    11661282    def test_dangling_symlink_vs_recursion(self):
    11671283        if not hasattr(os, 'symlink'):
     
    12681384        return d
    12691385
    12701386
     1387class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase):
     1388    def test_unicode_mkdir(self):
     1389        self.basedir = os.path.dirname(self.mktemp())
     1390        self.set_up_grid()
     1391
     1392        d = self.do_cli("create-alias", "tahoe")
     1393        d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead"))
     1394
     1395        return d
     1396 
     1397
    12711398class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
    12721399
    12731400    def writeto(self, path, data):