source: trunk/src/allmydata/util/fileutil.py

Last change on this file was 1cfe843d, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-02-22T23:40:25Z

more python2 removal

  • Property mode set to 100644
File size: 24.0 KB
Line 
1"""
2Ported to Python3.
3
4Futz with files like a pro.
5"""
6
7import sys, os, stat, tempfile, time, binascii
8import six
9from collections import namedtuple
10from errno import ENOENT
11
12if sys.platform == "win32":
13    from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_ulonglong, \
14        create_unicode_buffer, get_last_error
15    from ctypes.wintypes import BOOL, DWORD, LPCWSTR, LPWSTR, LPVOID
16
17from twisted.python import log
18
19from allmydata.crypto import aes
20from allmydata.util.assertutil import _assert
21
22
23def rename(src, dst, tries=4, basedelay=0.1):
24    """ Here is a superkludge to workaround the fact that occasionally on
25    Windows some other process (e.g. an anti-virus scanner, a local search
26    engine, etc.) is looking at your file when you want to delete or move it,
27    and hence you can't.  The horrible workaround is to sit and spin, trying
28    to delete it, for a short time and then give up.
29
30    With the default values of tries and basedelay this can block for less
31    than a second.
32
33    @param tries: number of tries -- each time after the first we wait twice
34    as long as the previous wait
35    @param basedelay: how long to wait before the second try
36    """
37    for i in range(tries-1):
38        try:
39            return os.rename(src, dst)
40        except EnvironmentError as le:
41            # XXX Tighten this to check if this is a permission denied error (possibly due to another Windows process having the file open and execute the superkludge only in this case.
42            log.msg("XXX KLUDGE Attempting to move file %s => %s; got %s; sleeping %s seconds" % (src, dst, le, basedelay,))
43            time.sleep(basedelay)
44            basedelay *= 2
45    return os.rename(src, dst) # The last try.
46
47def remove(f, tries=4, basedelay=0.1):
48    """ Here is a superkludge to workaround the fact that occasionally on
49    Windows some other process (e.g. an anti-virus scanner, a local search
50    engine, etc.) is looking at your file when you want to delete or move it,
51    and hence you can't.  The horrible workaround is to sit and spin, trying
52    to delete it, for a short time and then give up.
53
54    With the default values of tries and basedelay this can block for less
55    than a second.
56
57    @param tries: number of tries -- each time after the first we wait twice
58    as long as the previous wait
59    @param basedelay: how long to wait before the second try
60    """
61    try:
62        os.chmod(f, stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD)
63    except:
64        pass
65    for i in range(tries-1):
66        try:
67            return os.remove(f)
68        except EnvironmentError as le:
69            # XXX Tighten this to check if this is a permission denied error (possibly due to another Windows process having the file open and execute the superkludge only in this case.
70            if not os.path.exists(f):
71                return
72            log.msg("XXX KLUDGE Attempting to remove file %s; got %s; sleeping %s seconds" % (f, le, basedelay,))
73            time.sleep(basedelay)
74            basedelay *= 2
75    return os.remove(f) # The last try.
76
77class ReopenableNamedTemporaryFile(object):
78    """
79    This uses tempfile.mkstemp() to generate a secure temp file.  It then closes
80    the file, leaving a zero-length file as a placeholder.  You can get the
81    filename with ReopenableNamedTemporaryFile.name.  When the
82    ReopenableNamedTemporaryFile instance is garbage collected or its shutdown()
83    method is called, it deletes the file.
84    """
85    def __init__(self, *args, **kwargs):
86        fd, self.name = tempfile.mkstemp(*args, **kwargs)
87        os.close(fd)
88
89    def __repr__(self):
90        return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name)
91
92    def __str__(self):
93        return self.__repr__()
94
95    def __del__(self):
96        self.shutdown()
97
98    def shutdown(self):
99        remove(self.name)
100
101class EncryptedTemporaryFile(object):
102    # not implemented: next, readline, readlines, xreadlines, writelines
103
104    def __init__(self):
105        self.file = tempfile.TemporaryFile()
106        self.key = os.urandom(16)  # AES-128
107
108    def _crypt(self, offset, data):
109        offset_big = offset // 16
110        offset_small = offset % 16
111        iv = binascii.unhexlify("%032x" % offset_big)
112        cipher = aes.create_encryptor(self.key, iv)
113        # this is just to advance the counter
114        aes.encrypt_data(cipher, b"\x00" * offset_small)
115        return aes.encrypt_data(cipher, data)
116
117    def close(self):
118        self.file.close()
119
120    def flush(self):
121        self.file.flush()
122
123    def seek(self, offset, whence=0):  # 0 = SEEK_SET
124        self.file.seek(offset, whence)
125
126    def tell(self):
127        offset = self.file.tell()
128        return offset
129
130    def read(self, size=-1):
131        """A read must not follow a write, or vice-versa, without an intervening seek."""
132        index = self.file.tell()
133        ciphertext = self.file.read(size)
134        plaintext = self._crypt(index, ciphertext)
135        return plaintext
136
137    def write(self, plaintext):
138        """A read must not follow a write, or vice-versa, without an intervening seek.
139        If seeking and then writing causes a 'hole' in the file, the contents of the
140        hole are unspecified."""
141        index = self.file.tell()
142        ciphertext = self._crypt(index, plaintext)
143        self.file.write(ciphertext)
144
145    def truncate(self, newsize):
146        """Truncate or extend the file to 'newsize'. If it is extended, the contents after the
147        old end-of-file are unspecified. The file position after this operation is unspecified."""
148        self.file.truncate(newsize)
149
150def make_dirs_with_absolute_mode(parent, dirname, mode):
151    """
152    Make directory `dirname` and chmod it to `mode` afterwards.
153    We chmod all parent directories of `dirname` until we reach
154    `parent`.
155    """
156    precondition_abspath(parent)
157    precondition_abspath(dirname)
158    if not is_ancestor_path(parent, dirname):
159        raise AssertionError("dirname must be a descendant of parent")
160
161    make_dirs(dirname)
162    while dirname != parent:
163        os.chmod(dirname, mode)
164        # FIXME: doesn't seem to work on Windows for long paths
165        old_dirname, dirname = dirname, os.path.dirname(dirname)
166        _assert(len(dirname) < len(old_dirname), dirname=dirname, old_dirname=old_dirname)
167
168def is_ancestor_path(parent, dirname):
169    while dirname != parent:
170        # FIXME: doesn't seem to work on Windows for long paths
171        old_dirname, dirname = dirname, os.path.dirname(dirname)
172        if len(dirname) >= len(old_dirname):
173            return False
174    return True
175
176def make_dirs(dirname, mode=0o777):
177    """
178    An idempotent version of os.makedirs().  If the dir already exists, do
179    nothing and return without raising an exception.  If this call creates the
180    dir, return without raising an exception.  If there is an error that
181    prevents creation or if the directory gets deleted after make_dirs() creates
182    it and before make_dirs() checks that it exists, raise an exception.
183    """
184    tx = None
185    try:
186        os.makedirs(dirname, mode)
187    except OSError as x:
188        tx = x
189
190    if not os.path.isdir(dirname):
191        if tx:
192            raise tx
193        raise IOError("unknown error prevented creation of directory, or deleted the directory immediately after creation: %s" % dirname) # careful not to construct an IOError with a 2-tuple, as that has a special meaning...
194
195def rm_dir(dirname):
196    """
197    A threadsafe and idempotent version of shutil.rmtree().  If the dir is
198    already gone, do nothing and return without raising an exception.  If this
199    call removes the dir, return without raising an exception.  If there is an
200    error that prevents deletion or if the directory gets created again after
201    rm_dir() deletes it and before rm_dir() checks that it is gone, raise an
202    exception.
203    """
204    excs = []
205    try:
206        os.chmod(dirname, stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD)
207        for f in os.listdir(dirname):
208            fullname = os.path.join(dirname, f)
209            if os.path.isdir(fullname):
210                rm_dir(fullname)
211            else:
212                remove(fullname)
213        os.rmdir(dirname)
214    except Exception as le:
215        # Ignore "No such file or directory"
216        if (not isinstance(le, OSError)) or le.args[0] != 2:
217            excs.append(le)
218
219    # Okay, now we've recursively removed everything, ignoring any "No
220    # such file or directory" errors, and collecting any other errors.
221
222    if os.path.exists(dirname):
223        if len(excs) == 1:
224            raise excs[0]
225        if len(excs) == 0:
226            raise OSError("Failed to remove dir for unknown reason.")
227        raise OSError(excs)
228
229
230def remove_if_possible(f):
231    try:
232        remove(f)
233    except:
234        pass
235
236def du(basedir):
237    size = 0
238
239    for root, dirs, files in os.walk(basedir):
240        for f in files:
241            fn = os.path.join(root, f)
242            size += os.path.getsize(fn)
243
244    return size
245
246def move_into_place(source, dest):
247    """Atomically replace a file, or as near to it as the platform allows.
248    The dest file may or may not exist."""
249    if "win32" in sys.platform.lower() and os.path.exists(source):
250        # we check for source existing since we don't want to nuke the
251        # dest unless we'll succeed at moving the target into place
252        remove_if_possible(dest)
253    os.rename(source, dest)
254
255def write_atomically(target, contents, mode="b"):
256    assert (
257        isinstance(contents, bytes) and "b" in mode or
258        isinstance(contents, str) and "t" in mode or mode == ""), (type(contents), mode)
259    with open(target+".tmp", "w"+mode) as f:
260        f.write(contents)
261    move_into_place(target+".tmp", target)
262
263def write(path, data, mode="wb"):
264    if "b" in mode and isinstance(data, str):
265        data = data.encode("utf-8")
266    with open(path, mode) as f:
267        f.write(data)
268
269def read(path, mode="rb"):
270    with open(path, mode) as rf:
271        return rf.read()
272
273def put_file(path, inf):
274    precondition_abspath(path)
275
276    # TODO: create temporary file and move into place?
277    with open(path, "wb") as outf:
278        while True:
279            data = inf.read(32768)
280            if not data:
281                break
282            outf.write(data)
283
284def precondition_abspath(path):
285    if not isinstance(path, str):
286        raise AssertionError("an abspath must be a Unicode string")
287
288    if sys.platform == "win32":
289        # This intentionally doesn't view absolute paths starting with a drive specification, or
290        # paths relative to the current drive, as acceptable.
291        if not path.startswith("\\\\"):
292            raise AssertionError("an abspath should be normalized using abspath_expanduser_unicode")
293    else:
294        # This intentionally doesn't view the path '~' or paths starting with '~/' as acceptable.
295        if not os.path.isabs(path):
296            raise AssertionError("an abspath should be normalized using abspath_expanduser_unicode")
297
298# Work around <http://bugs.python.org/issue3426>. This code is adapted from
299# <http://svn.python.org/view/python/trunk/Lib/ntpath.py?revision=78247&view=markup>
300# with some simplifications.
301
302_getfullpathname = None
303try:
304    from nt import _getfullpathname  # type: ignore
305except ImportError:
306    pass
307
308def abspath_expanduser_unicode(path, base=None, long_path=True):
309    """
310    Return the absolute version of a path. If 'base' is given and 'path' is relative,
311    the path will be expanded relative to 'base'.
312    'path' must be a Unicode string. 'base', if given, must be a Unicode string
313    corresponding to an absolute path as returned by a previous call to
314    abspath_expanduser_unicode.
315    On Windows, the result will be a long path unless long_path is given as False.
316    """
317    if not isinstance(path, str):
318        raise AssertionError("paths must be Unicode strings")
319    if base is not None and long_path:
320        precondition_abspath(base)
321
322    path = expanduser(path)
323
324    if _getfullpathname:
325        # On Windows, os.path.isabs will incorrectly return True
326        # for paths without a drive letter (that are not UNC paths),
327        # e.g. "\\". See <http://bugs.python.org/issue1669539>.
328        try:
329            if base is None:
330                path = _getfullpathname(path or u".")
331            else:
332                path = _getfullpathname(os.path.join(base, path))
333        except OSError:
334            pass
335
336    if not os.path.isabs(path):
337        if base is None:
338            cwd = os.getcwd()
339            path = os.path.join(cwd, path)
340        else:
341            path = os.path.join(base, path)
342
343    # We won't hit <http://bugs.python.org/issue5827> because
344    # there is always at least one Unicode path component.
345    path = os.path.normpath(path)
346
347    if sys.platform == "win32" and long_path:
348        path = to_windows_long_path(path)
349
350    return path
351
352def to_windows_long_path(path):
353    # '/' is normally a perfectly valid path component separator in Windows.
354    # However, when using the "\\?\" syntax it is not recognized, so we
355    # replace it with '\' here.
356    path = path.replace(u"/", u"\\")
357
358    # Note that other normalizations such as removing '.' and '..' should
359    # be done outside this function.
360
361    if path.startswith(u"\\\\?\\") or path.startswith(u"\\\\.\\"):
362        return path
363    elif path.startswith(u"\\\\"):
364        return u"\\\\?\\UNC\\" + path[2 :]
365    else:
366        return u"\\\\?\\" + path
367
368
369have_GetDiskFreeSpaceExW = False
370if sys.platform == "win32":
371    # <http://msdn.microsoft.com/en-us/library/windows/desktop/ms683188%28v=vs.85%29.aspx>
372    GetEnvironmentVariableW = WINFUNCTYPE(
373        DWORD,  LPCWSTR, LPWSTR, DWORD,
374        use_last_error=True
375    )(("GetEnvironmentVariableW", windll.kernel32))
376
377    try:
378        # <http://msdn.microsoft.com/en-us/library/aa383742%28v=VS.85%29.aspx>
379        PULARGE_INTEGER = POINTER(c_ulonglong)
380
381        # <http://msdn.microsoft.com/en-us/library/aa364937%28VS.85%29.aspx>
382        GetDiskFreeSpaceExW = WINFUNCTYPE(
383            BOOL,  LPCWSTR, PULARGE_INTEGER, PULARGE_INTEGER, PULARGE_INTEGER,
384            use_last_error=True
385        )(("GetDiskFreeSpaceExW", windll.kernel32))
386
387        have_GetDiskFreeSpaceExW = True
388    except Exception:
389        import traceback
390        traceback.print_exc()
391
392def expanduser(path):
393    # os.path.expanduser is hopelessly broken for Unicode paths on Windows (ticket #1674).
394    if sys.platform == "win32":
395        return windows_expanduser(path)
396    else:
397        return os.path.expanduser(path)
398
399def windows_expanduser(path):
400    if not path.startswith('~'):
401        return path
402
403    home_dir = windows_getenv(u'USERPROFILE')
404    if home_dir is None:
405        home_drive = windows_getenv(u'HOMEDRIVE')
406        home_path = windows_getenv(u'HOMEPATH')
407        if home_drive is None or home_path is None:
408            raise OSError("Could not find home directory: neither %USERPROFILE% nor (%HOMEDRIVE% and %HOMEPATH%) are set.")
409        home_dir = os.path.join(home_drive, home_path)
410
411    if path == '~':
412        return home_dir
413    elif path.startswith('~/') or path.startswith('~\\'):
414        return os.path.join(home_dir, path[2 :])
415    else:
416        return path
417
418# <https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382%28v=vs.85%29.aspx>
419ERROR_ENVVAR_NOT_FOUND = 203
420
421def windows_getenv(name):
422    # Based on <http://stackoverflow.com/questions/2608200/problems-with-umlauts-in-python-appdata-environvent-variable/2608368#2608368>,
423    # with improved error handling. Returns None if there is no enivronment variable of the given name.
424    if not isinstance(name, str):
425        raise AssertionError("name must be Unicode")
426
427    n = GetEnvironmentVariableW(name, None, 0)
428    # GetEnvironmentVariableW returns DWORD, so n cannot be negative.
429    if n == 0:
430        err = get_last_error()
431        if err == ERROR_ENVVAR_NOT_FOUND:
432            return None
433        raise OSError("WinError: %s\n attempting to read size of environment variable %r"
434                      % (WinError(err), name))
435    if n == 1:
436        # Avoid an ambiguity between a zero-length string and an error in the return value of the
437        # call to GetEnvironmentVariableW below.
438        return u""
439
440    buf = create_unicode_buffer(u'\0'*n)
441    retval = GetEnvironmentVariableW(name, buf, n)
442    if retval == 0:
443        err = get_last_error()
444        if err == ERROR_ENVVAR_NOT_FOUND:
445            return None
446        raise OSError("WinError: %s\n attempting to read environment variable %r"
447                      % (WinError(err), name))
448    if retval >= n:
449        raise OSError("Unexpected result %d (expected less than %d) from GetEnvironmentVariableW attempting to read environment variable %r"
450                      % (retval, n, name))
451
452    return buf.value
453
454def get_disk_stats(whichdir, reserved_space=0):
455    """Return disk statistics for the storage disk, in the form of a dict
456    with the following fields.
457      total:            total bytes on disk
458      free_for_root:    bytes actually free on disk
459      free_for_nonroot: bytes free for "a non-privileged user" [Unix] or
460                          the current user [Windows]; might take into
461                          account quotas depending on platform
462      used:             bytes used on disk
463      avail:            bytes available excluding reserved space
464    An AttributeError can occur if the OS has no API to get disk information.
465    An EnvironmentError can occur if the OS call fails.
466
467    whichdir is a directory on the filesystem in question -- the
468    answer is about the filesystem, not about the directory, so the
469    directory is used only to specify which filesystem.
470
471    reserved_space is how many bytes to subtract from the answer, so
472    you can pass how many bytes you would like to leave unused on this
473    filesystem as reserved_space.
474    """
475
476    if have_GetDiskFreeSpaceExW:
477        # If this is a Windows system and GetDiskFreeSpaceExW is available, use it.
478        # (This might put up an error dialog unless
479        # SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX) has been called,
480        # which we do in allmydata.windows.fixups.initialize().)
481
482        n_free_for_nonroot = c_ulonglong(0)
483        n_total            = c_ulonglong(0)
484        n_free_for_root    = c_ulonglong(0)
485        retval = GetDiskFreeSpaceExW(whichdir, byref(n_free_for_nonroot),
486                                               byref(n_total),
487                                               byref(n_free_for_root))
488        if retval == 0:
489            raise OSError("WinError: %s\n attempting to get disk statistics for %r"
490                          % (WinError(get_last_error()), whichdir))
491        free_for_nonroot = n_free_for_nonroot.value
492        total            = n_total.value
493        free_for_root    = n_free_for_root.value
494    else:
495        # For Unix-like systems.
496        # <http://docs.python.org/library/os.html#os.statvfs>
497        # <http://opengroup.org/onlinepubs/7990989799/xsh/fstatvfs.html>
498        # <http://opengroup.org/onlinepubs/7990989799/xsh/sysstatvfs.h.html>
499        s = os.statvfs(whichdir)
500
501        # on my mac laptop:
502        #  statvfs(2) is a wrapper around statfs(2).
503        #    statvfs.f_frsize = statfs.f_bsize :
504        #     "minimum unit of allocation" (statvfs)
505        #     "fundamental file system block size" (statfs)
506        #    statvfs.f_bsize = statfs.f_iosize = stat.st_blocks : preferred IO size
507        # on an encrypted home directory ("FileVault"), it gets f_blocks
508        # wrong, and s.f_blocks*s.f_frsize is twice the size of my disk,
509        # but s.f_bavail*s.f_frsize is correct
510
511        total = s.f_frsize * s.f_blocks
512        free_for_root = s.f_frsize * s.f_bfree
513        free_for_nonroot = s.f_frsize * s.f_bavail
514
515    # valid for all platforms:
516    used = total - free_for_root
517    avail = max(free_for_nonroot - reserved_space, 0)
518
519    return { 'total': total,
520             'free_for_root': free_for_root,
521             'free_for_nonroot': free_for_nonroot,
522             'used': used,
523             'avail': avail,
524           }
525
526def get_available_space(whichdir, reserved_space):
527    """Returns available space for share storage in bytes, or None if no
528    API to get this information is available.
529
530    whichdir is a directory on the filesystem in question -- the
531    answer is about the filesystem, not about the directory, so the
532    directory is used only to specify which filesystem.
533
534    reserved_space is how many bytes to subtract from the answer, so
535    you can pass how many bytes you would like to leave unused on this
536    filesystem as reserved_space.
537    """
538    try:
539        return get_disk_stats(whichdir, reserved_space)['avail']
540    except AttributeError:
541        return None
542    except EnvironmentError:
543        log.msg("OS call to get disk statistics failed")
544        return 0
545
546
547class ConflictError(Exception):
548    pass
549
550
551class UnableToUnlinkReplacementError(Exception):
552    pass
553
554
555def reraise(wrapper):
556    cls, exc, tb = sys.exc_info()
557    wrapper_exc = wrapper("%s: %s" % (cls.__name__, exc))
558    six.reraise(wrapper, wrapper_exc, tb)
559
560
561if sys.platform == "win32":
562    # <https://msdn.microsoft.com/en-us/library/windows/desktop/aa365512%28v=vs.85%29.aspx>
563    ReplaceFileW = WINFUNCTYPE(
564        BOOL,  LPCWSTR, LPCWSTR, LPCWSTR, DWORD, LPVOID, LPVOID,
565        use_last_error=True
566    )(("ReplaceFileW", windll.kernel32))
567
568    REPLACEFILE_IGNORE_MERGE_ERRORS = 0x00000002
569
570    # <https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382%28v=vs.85%29.aspx>
571    ERROR_FILE_NOT_FOUND = 2
572
573    def rename_no_overwrite(source_path, dest_path):
574        os.rename(source_path, dest_path)
575
576    def replace_file(replaced_path, replacement_path):
577        precondition_abspath(replaced_path)
578        precondition_abspath(replacement_path)
579
580        # no "backup" path (the first None) because we don't want to
581        # create a backup file
582        r = ReplaceFileW(replaced_path, replacement_path, None,
583                         REPLACEFILE_IGNORE_MERGE_ERRORS, None, None)
584        if r == 0:
585            # The UnableToUnlinkReplacementError case does not happen on Windows;
586            # all errors should be treated as signalling a conflict.
587            err = get_last_error()
588            if err != ERROR_FILE_NOT_FOUND:
589                raise ConflictError("WinError: %s" % (WinError(err),))
590
591            try:
592                move_into_place(replacement_path, replaced_path)
593            except EnvironmentError:
594                reraise(ConflictError)
595else:
596    def rename_no_overwrite(source_path, dest_path):
597        # link will fail with EEXIST if there is already something at dest_path.
598        os.link(source_path, dest_path)
599        try:
600            os.unlink(source_path)
601        except EnvironmentError:
602            reraise(UnableToUnlinkReplacementError)
603
604    def replace_file(replaced_path, replacement_path):
605        precondition_abspath(replaced_path)
606        precondition_abspath(replacement_path)
607
608        if not os.path.exists(replacement_path):
609            raise ConflictError("Replacement file not found: %r" % (replacement_path,))
610
611        try:
612            move_into_place(replacement_path, replaced_path)
613        except OSError as e:
614            if e.errno != ENOENT:
615                raise
616        except EnvironmentError:
617            reraise(ConflictError)
618
619
620PathInfo = namedtuple('PathInfo', 'isdir isfile islink exists size mtime_ns ctime_ns')
621
622def seconds_to_ns(t):
623    return int(t * 1000000000)
624
625def get_pathinfo(path_u, now_ns=None):
626    try:
627        statinfo = os.lstat(path_u)
628        mode = statinfo.st_mode
629        return PathInfo(isdir   =stat.S_ISDIR(mode),
630                        isfile  =stat.S_ISREG(mode),
631                        islink  =stat.S_ISLNK(mode),
632                        exists  =True,
633                        size    =statinfo.st_size,
634                        mtime_ns=seconds_to_ns(statinfo.st_mtime),
635                        ctime_ns=seconds_to_ns(statinfo.st_ctime),
636                       )
637    except OSError as e:
638        if e.errno == ENOENT:
639            if now_ns is None:
640                now_ns = seconds_to_ns(time.time())
641            return PathInfo(isdir   =False,
642                            isfile  =False,
643                            islink  =False,
644                            exists  =False,
645                            size    =None,
646                            mtime_ns=now_ns,
647                            ctime_ns=now_ns,
648                           )
649        raise
Note: See TracBrowser for help on using the repository browser.