source file: /home/buildslave/tahoe/edgy/build/src/allmydata/scripts/backupdb.py
file stats: 122 lines, 121 executed: 99.2% covered
coverage versus previous test: 0 lines added, 0 lines removed
    1. 
    2. # the backupdb is only available if sqlite3 is available. Python-2.5.x and
    3. # beyond include sqlite3 in the standard library. For python-2.4, the
    4. # "pysqlite2" "package" (or "module") (which, despite the confusing name, uses
    5. # sqlite3, and which, confusingly, comes in the "pysqlite" "distribution" (or
    6. # "package")) must be installed. On debian, install python-pysqlite2
    7. 
    8. import os.path, sys, time, random, stat
    9. 
   10. DAY = 24*60*60
   11. MONTH = 30*DAY
   12. 
   13. SCHEMA_v1 = """
   14. CREATE TABLE version
   15. (
   16.  version INTEGER  -- contains one row, set to 1
   17. );
   18. 
   19. CREATE TABLE local_files
   20. (
   21.  path  VARCHAR(1024) PRIMARY KEY, -- index, this is os.path.abspath(fn)
   22.  size  INTEGER,       -- os.stat(fn)[stat.ST_SIZE]
   23.  mtime NUMBER,        -- os.stat(fn)[stat.ST_MTIME]
   24.  ctime NUMBER,        -- os.stat(fn)[stat.ST_CTIME]
   25.  fileid INTEGER
   26. );
   27. 
   28. CREATE TABLE caps
   29. (
   30.  fileid INTEGER PRIMARY KEY AUTOINCREMENT,
   31.  filecap VARCHAR(256) UNIQUE       -- URI:CHK:...
   32. );
   33. 
   34. CREATE TABLE last_upload
   35. (
   36.  fileid INTEGER PRIMARY KEY,
   37.  last_uploaded TIMESTAMP,
   38.  last_checked TIMESTAMP
   39. );
   40. 
   41. """
   42. 
   43. def get_backupdb(dbfile, stderr=sys.stderr):
   44.     # open or create the given backupdb file. The parent directory must
   45.     # exist.
   46.     try:
   47.         import sqlite3
   48.         sqlite = sqlite3 # pyflakes whines about 'import sqlite3 as sqlite' ..
   49.     except ImportError:
   50.         from pysqlite2 import dbapi2
   51.         sqlite = dbapi2 # .. when this clause does it too
   52.         # This import should never fail, because setuptools requires that the
   53.         # "pysqlite" distribution is present at start time (if on Python < 2.5).
   54. 
   55.     must_create = not os.path.exists(dbfile)
   56.     try:
   57.         db = sqlite.connect(dbfile)
   58.     except (EnvironmentError, sqlite.OperationalError), e:
   59.         print >>stderr, "Unable to create/open backupdb file %s: %s" % (dbfile, e)
   60.         return None
   61. 
   62.     c = db.cursor()
   63.     if must_create:
   64.         c.executescript(SCHEMA_v1)
   65.         c.execute("INSERT INTO version (version) VALUES (1)")
   66.         db.commit()
   67. 
   68.     try:
   69.         c.execute("SELECT version FROM version")
   70.         version = c.fetchone()[0]
   71.     except sqlite.DatabaseError, e:
   72.         # this indicates that the file is not a compatible database format.
   73.         # Perhaps it was created with an old version, or it might be junk.
   74.         print >>stderr, "backupdb file is unusable: %s" % e
   75.         return None
   76. 
   77.     if version == 1:
   78.         return BackupDB_v1(sqlite, db)
   79.     print >>stderr, "Unable to handle backupdb version %s" % version
   80.     return None
   81. 
   82. MUST_UPLOAD, ALREADY_UPLOADED = range(2)
   83. class Result:
   84.     def __init__(self, bdb, filecap, should_check,
   85.                  path, mtime, ctime, size):
   86.         self.bdb = bdb
   87.         self.filecap = filecap
   88.         self.should_check_p = should_check
   89. 
   90.         self.path = path
   91.         self.mtime = mtime
   92.         self.ctime = ctime
   93.         self.size = size
   94. 
   95.     def was_uploaded(self):
   96.         if self.filecap:
   97.             return self.filecap
   98.         return False
   99. 
  100.     def did_upload(self, filecap):
  101.         self.bdb.did_upload(filecap,
  102.                             self.path,
  103.                             self.mtime, self.ctime, self.size)
  104. 
  105.     def should_check(self):
  106.         return self.should_check_p
  107. 
  108.     def did_check_healthy(self, results):
  109.         self.bdb.did_check_healthy(self.filecap, results)
  110. 
  111. class BackupDB_v1:
  112.     VERSION = 1
  113.     NO_CHECK_BEFORE = 1*MONTH
  114.     ALWAYS_CHECK_AFTER = 2*MONTH
  115. 
  116.     def __init__(self, sqlite_module, connection):
  117.         self.sqlite_module = sqlite_module
  118.         self.connection = connection
  119.         self.cursor = connection.cursor()
  120. 
  121.     def check_file(self, path, use_timestamps=True):
  122.         """I will tell you if a given local file needs to be uploaded or not,
  123.         by looking in a database and seeing if I have a record of this file
  124.         having been uploaded earlier.
  125. 
  126.         I return a Results object, synchronously. If r.was_uploaded() returns
  127.         False, you should upload the file. When you are finished uploading
  128.         it, call r.did_upload(filecap), so I can update my database.
  129. 
  130.         If was_uploaded() returns a filecap, you might be able to avoid an
  131.         upload. Call r.should_check(), and if it says False, you can skip the
  132.         upload and use the filecap returned by was_uploaded().
  133. 
  134.         If should_check() returns True, you should perform a filecheck on the
  135.         filecap returned by was_uploaded(). If the check indicates the file
  136.         is healthy, please call r.did_check_healthy(checker_results) so I can
  137.         update the database, using the de-JSONized response from the webapi
  138.         t=check call for 'checker_results'. If the check indicates the file
  139.         is not healthy, please upload the file and call r.did_upload(filecap)
  140.         when you're done.
  141. 
  142.         I use_timestamps=True (the default), I will compare ctime and mtime
  143.         of the local file against an entry in my database, and consider the
  144.         file to be unchanged if ctime, mtime, and filesize are all the same
  145.         as the earlier version. If use_timestamps=False, I will not trust the
  146.         timestamps, so more files (perhaps all) will be marked as needing
  147.         upload. A future version of this database may hash the file to make
  148.         equality decisions, in which case use_timestamps=False will not
  149.         always imply r.must_upload()==True.
  150. 
  151.         'path' points to a local file on disk, possibly relative to the
  152.         current working directory. The database stores absolute pathnames.
  153.         """
  154. 
  155.         path = os.path.abspath(path)
  156.         s = os.stat(path)
  157.         size = s[stat.ST_SIZE]
  158.         ctime = s[stat.ST_CTIME]
  159.         mtime = s[stat.ST_MTIME]
  160. 
  161.         now = time.time()
  162.         c = self.cursor
  163. 
  164.         c.execute("SELECT size,mtime,ctime,fileid"
  165.                   " FROM local_files"
  166.                   " WHERE path=?",
  167.                   (path,))
  168.         row = self.cursor.fetchone()
  169.         if not row:
  170.             return Result(self, None, False, path, mtime, ctime, size)
  171.         (last_size,last_mtime,last_ctime,last_fileid) = row
  172. 
  173.         c.execute("SELECT caps.filecap, last_upload.last_checked"
  174.                   " FROM caps,last_upload"
  175.                   " WHERE caps.fileid=? AND last_upload.fileid=?",
  176.                   (last_fileid, last_fileid))
  177.         row2 = c.fetchone()
  178. 
  179.         if ((last_size != size
  180.              or not use_timestamps
  181.              or last_mtime != mtime
  182.              or last_ctime != ctime) # the file has been changed
  183.             or (not row2) # we somehow forgot where we put the file last time
  184.             ):
  185.             c.execute("DELETE FROM local_files WHERE path=?", (path,))
  186.             self.connection.commit()
  187.             return Result(self, None, False, path, mtime, ctime, size)
  188. 
  189.         # at this point, we're allowed to assume the file hasn't been changed
  190.         (filecap, last_checked) = row2
  191.         age = now - last_checked
  192. 
  193.         probability = ((age - self.NO_CHECK_BEFORE) /
  194.                        (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
  195.         probability = min(max(probability, 0.0), 1.0)
  196.         should_check = bool(random.random() < probability)
  197. 
  198.         return Result(self, filecap, should_check, path, mtime, ctime, size)
  199. 
  200.     def get_or_allocate_fileid_for_cap(self, filecap):
  201.         # find an existing fileid for this filecap, or insert a new one. The
  202.         # caller is required to commit() afterwards.
  203. 
  204.         # mysql has "INSERT ... ON DUPLICATE KEY UPDATE", but not sqlite
  205.         # sqlite has "INSERT ON CONFLICT REPLACE", but not mysql
  206.         # So we use INSERT, ignore any error, then a SELECT
  207.         c = self.cursor
  208.         try:
  209.             c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,))
  210.         except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
  211.             # sqlite3 on sid gives IntegrityError
  212.             # pysqlite2 on dapper gives OperationalError
  213.             pass
  214.         c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,))
  215.         foundrow = c.fetchone()
  216.         assert foundrow
  217.         fileid = foundrow[0]
  218.         return fileid
  219. 
  220.     def did_upload(self, filecap, path, mtime, ctime, size):
  221.         now = time.time()
  222.         fileid = self.get_or_allocate_fileid_for_cap(filecap)
  223.         try:
  224.             self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)",
  225.                                 (fileid, now, now))
  226.         except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
  227.             self.cursor.execute("UPDATE last_upload"
  228.                                 " SET last_uploaded=?, last_checked=?"
  229.                                 " WHERE fileid=?",
  230.                                 (now, now, fileid))
  231.         try:
  232.             self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)",
  233.                                 (path, size, mtime, ctime, fileid))
  234.         except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
  235.             self.cursor.execute("UPDATE local_files"
  236.                                 " SET size=?, mtime=?, ctime=?, fileid=?"
  237.                                 " WHERE path=?",
  238.                                 (size, mtime, ctime, fileid, path))
  239.         self.connection.commit()
  240. 
  241.     def did_check_healthy(self, filecap, results):
  242.         now = time.time()
  243.         fileid = self.get_or_allocate_fileid_for_cap(filecap)
  244.         self.cursor.execute("UPDATE last_upload"
  245.                             " SET last_checked=?"
  246.                             " WHERE fileid=?",
  247.                             (now, fileid))
  248.         self.connection.commit()