Ticket #628: smaller-timefix-patch-allow-user-controlled-mtimectime.txt

File smaller-timefix-patch-allow-user-controlled-mtimectime.txt, 17.5 KB (added by zooko, at 2009-04-08T15:19:14Z)
Line 
1Reading pristine 46/427
2diff -rN -u old-smaller-timefix/docs/frontends/webapi.txt new-smaller-timefix/docs/frontends/webapi.txt
3--- old-smaller-timefix/docs/frontends/webapi.txt       2009-04-08 09:20:49.000000000 -0600
4+++ new-smaller-timefix/docs/frontends/webapi.txt       2009-04-08 09:20:49.000000000 -0600
5@@ -381,28 +381,44 @@
6 GET /uri/$DIRCAP/[SUBDIRS../]FILENAME?t=json
7 
8   This returns a machine-parseable JSON-encoded description of the given
9-  object. The JSON always contains a list, and the first element of the list
10-  is always a flag that indicates whether the referenced object is a file or a
11-  directory. If it is a file, then the information includes file size and URI,
12-  like this:
13+  object. The JSON always contains a list, and the first element of the list is
14+  always a flag that indicates whether the referenced object is a file or a
15+  directory. If it is a capability to a file, then the information includes
16+  file size and URI, like this:
17 
18    GET /uri/$FILECAP?t=json :
19+
20+    [ "filenode", { "ro_uri": file_uri,
21+                    "verify_uri": verify_uri,
22+                    "size": bytes,
23+                    "mutable": false,
24+                    } ]
25+
26+  If it is a capability to a directory followed by a path from that directory
27+  to a file, then the information also includes metadata from the link to the
28+  file in the parent directory, like this:
29+
30    GET /uri/$DIRCAP/[SUBDIRS../]FILENAME?t=json :
31 
32     [ "filenode", { "ro_uri": file_uri,
33                     "verify_uri": verify_uri,
34                     "size": bytes,
35                     "mutable": false,
36-                    "metadata": {"ctime": 1202777696.7564139,
37-                                 "mtime": 1202777696.7564139
38+                    "metadata": {
39+                                 "ctime": 1202777696.7564139,
40+                                 "mtime": 1202777696.7564139,
41+                                 "__sys": {
42+                                         "linkcrtime": 1202777696.7564139,
43+                                         "linkmotime": 1202777696.7564139,
44                                  }
45+                               }
46                     } ]
47 
48   If it is a directory, then it includes information about the children of
49   this directory, as a mapping from child name to a set of data about the
50   child (the same data that would appear in a corresponding GET?t=json of the
51   child itself). The child entries also include metadata about each child,
52-  including creation- and modification- timestamps. The output looks like
53+  including link-creation- and link-change- timestamps. The output looks like
54   this:
55 
56    GET /uri/$DIRCAP?t=json :
57@@ -418,13 +434,21 @@
58                                                 "metadata": {
59                                                   "ctime": 1202777696.7564139,
60                                                   "mtime": 1202777696.7564139
61+                                                  "__sys": {
62+                                                    "linkcrtime": 1202777696.7564139,
63+                                                    "linkmotime": 1202777696.7564139,
64                                                  }
65+                                                }
66                                                } ],
67                      "subdir":  [ "dirnode", { "rw_uri": rwuri,
68                                                "ro_uri": rouri,
69                                                 "metadata": {
70                                                   "ctime": 1202778102.7589991,
71                                                   "mtime": 1202778111.2160511,
72+                                                  "__sys": {
73+                                                    "linkcrtime": 1202777696.7564139,
74+                                                    "linkmotime": 1202777696.7564139,
75+                                                   }
76                                                  }
77                                               } ]
78                     } } ]
79diff -rN -u old-smaller-timefix/docs/specifications/dirnodes.txt new-smaller-timefix/docs/specifications/dirnodes.txt
80--- old-smaller-timefix/docs/specifications/dirnodes.txt        2009-04-08 09:20:49.000000000 -0600
81+++ new-smaller-timefix/docs/specifications/dirnodes.txt        2009-04-08 09:20:49.000000000 -0600
82@@ -176,30 +176,29 @@
83  netstring(cap) = 4+len(cap)
84  encrypted(cap) = 16+cap+32
85  JSON({}) = 2
86- JSON({ctime=float,mtime=float}): 57
87- netstring(metadata) = 4+57 = 61
88+ JSON({ctime=float,mtime=float,'__sys':{linkcrtime=float,linkmotime=float}}): 137
89+ netstring(metadata) = 4+137 = 141
90 
91 so a CHK entry is:
92- 5+ 4+len(name) + 4+97 + 5+16+97+32 + 4+57
93-And a 15-byte filename gives a 336-byte entry. When the entry points at a
94+ 5+ 4+len(name) + 4+97 + 5+16+97+32 + 4+137
95+And a 15-byte filename gives a 416-byte entry. When the entry points at a
96 subdirectory instead of a file, the entry is a little bit smaller. So an
97-empty directory uses 0 bytes, a directory with one child uses about 336
98-bytes, a directory with two children uses about 672, etc.
99+empty directory uses 0 bytes, a directory with one child uses about 416
100+bytes, a directory with two children uses about 832, etc.
101 
102 When the dirnode data is encoding using our default 3-of-10, that means we
103-get 112ish bytes of data in each share per child.
104+get 139ish bytes of data in each share per child.
105 
106 The pubkey, signature, and hashes form the first 935ish bytes of the
107 container, then comes our data, then about 1216 bytes of encprivkey. So if we
108 read the first:
109 
110  1kB: we get 65bytes of dirnode data : only empty directories
111- 1kiB: 89bytes of dirnode data : maybe one short-named subdir
112- 2kB: 1065bytes: about 9 entries
113- 3kB: 2065bytes: about 18 entries, or 7.5 entries plus the encprivkey
114- 4kB: 3065bytes: about 27 entries, or about 16.5 plus the encprivkey
115+ 2kB: 1065bytes: about 8
116+ 3kB: 2065bytes: about 15 entries, or 6 entries plus the encprivkey
117+ 4kB: 3065bytes: about 22 entries, or about 13 plus the encprivkey
118 
119-So we've written the code to do an initial read of 2kB from each share when
120+So we've written the code to do an initial read of 4kB from each share when
121 we read the mutable file, which should give good performance (one RTT) for
122 small directories.
123 
124diff -rN -u old-smaller-timefix/src/allmydata/dirnode.py new-smaller-timefix/src/allmydata/dirnode.py
125--- old-smaller-timefix/src/allmydata/dirnode.py        2009-04-08 09:20:49.000000000 -0600
126+++ new-smaller-timefix/src/allmydata/dirnode.py        2009-04-08 09:20:49.000000000 -0600
127@@ -83,15 +83,41 @@
128                 metadata = children[name][1].copy()
129             else:
130                 metadata = {"ctime": now,
131-                            "mtime": now}
132-            if new_metadata is None:
133-                # update timestamps
134+                            "mtime": now,
135+                            "__sys": {
136+                                "linkcrtime": now,
137+                                "linkmotime": now,
138+                                }
139+                            }
140+
141+            if new_metadata is not None:
142+                # Overwrite all metadata.
143+                newmd = new_metadata.copy()
144+
145+                # Except '__sys'.
146+                if newmd.has_key('__sys'):
147+                    del newmd['__sys']
148+                if metadata.has_key('__sys'):
149+                    newmd['__sys'] = metadata['__sys']
150+
151+                metadata = newmd
152+            else:
153+                # For backwards compatibility with Tahoe < 1.4.0:
154                 if "ctime" not in metadata:
155                     metadata["ctime"] = now
156                 metadata["mtime"] = now
157-            else:
158-                # just replace it
159-                metadata = new_metadata.copy()
160+
161+            # update timestamps
162+            sysmd = metadata.get('__sys', {})
163+            if not 'linkcrtime' in sysmd:
164+                if "ctime" in metadata:
165+                    # In Tahoe < 1.4.0 we used the word "ctime" to mean what Tahoe >= 1.4.0
166+                    # calls "linkcrtime".
167+                    sysmd["linkcrtime"] = metadata["ctime"]
168+                else:
169+                    sysmd["linkcrtime"] = now
170+            sysmd["linkmotime"] = now
171+
172             children[name] = (child, metadata)
173         new_contents = self.node._pack_contents(children)
174         return new_contents
175diff -rN -u old-smaller-timefix/src/allmydata/mutable/servermap.py new-smaller-timefix/src/allmydata/mutable/servermap.py
176--- old-smaller-timefix/src/allmydata/mutable/servermap.py      2009-04-08 09:20:49.000000000 -0600
177+++ new-smaller-timefix/src/allmydata/mutable/servermap.py      2009-04-08 09:20:49.000000000 -0600
178@@ -374,7 +374,7 @@
179         # fixed-size slots so we can retrieve less data. For now, we'll just
180         # read 2000 bytes, which also happens to read enough actual data to
181         # pre-fetch a 9-entry dirnode.
182-        self._read_size = 2000
183+        self._read_size = 4000
184         if mode == MODE_CHECK:
185             # we use unpack_prefix_and_signature, so we need 1k
186             self._read_size = 1000
187diff -rN -u old-smaller-timefix/src/allmydata/scripts/tahoe_ls.py new-smaller-timefix/src/allmydata/scripts/tahoe_ls.py
188--- old-smaller-timefix/src/allmydata/scripts/tahoe_ls.py       2009-04-08 09:20:49.000000000 -0600
189+++ new-smaller-timefix/src/allmydata/scripts/tahoe_ls.py       2009-04-08 09:20:49.000000000 -0600
190@@ -65,8 +65,20 @@
191         name = unicode(name)
192         child = children[name]
193         childtype = child[0]
194-        ctime = child[1]["metadata"].get("ctime")
195-        mtime = child[1]["metadata"].get("mtime")
196+
197+        # linkcrtime is not really what unix filesystems mean by "ctime", but
198+        # it *is* apparently what many or even most unix programmers and users
199+        # think that a unix filesystem means by "ctime"...
200+        ctime = child[1].get("metadata", {}).get('__sys', {}).get("linkcrtime")
201+        if not ctime:
202+            ctime = child[1]["metadata"].get("ctime")
203+
204+        # linkmotime is not really what unix filesystems mean by "mtime",
205+        # because linkmotime is a property of the link and mtime is a property
206+        # of the file contents...
207+        mtime = child[1].get("metadata", {}).get('__sys', {}).get("linkmotime")
208+        if not mtime:
209+            mtime = child[1]["metadata"].get("mtime")
210         rw_uri = child[1].get("rw_uri")
211         ro_uri = child[1].get("ro_uri")
212         if ctime:
213diff -rN -u old-smaller-timefix/src/allmydata/test/test_dirnode.py new-smaller-timefix/src/allmydata/test/test_dirnode.py
214--- old-smaller-timefix/src/allmydata/test/test_dirnode.py      2009-04-08 09:20:49.000000000 -0600
215+++ new-smaller-timefix/src/allmydata/test/test_dirnode.py      2009-04-08 09:20:50.000000000 -0600
216@@ -416,7 +416,7 @@
217             d.addCallback(lambda res: n.get_metadata_for(u"child"))
218             d.addCallback(lambda metadata:
219                           self.failUnlessEqual(sorted(metadata.keys()),
220-                                               ["ctime", "mtime"]))
221+                                               ["__sys", "ctime", "mtime"]))
222 
223             d.addCallback(lambda res:
224                           self.shouldFail(NoSuchChildError, "gcamap-no",
225@@ -439,7 +439,7 @@
226                 self.failUnlessEqual(child.get_uri(),
227                                      fake_file_uri.to_string())
228                 self.failUnlessEqual(sorted(metadata.keys()),
229-                                     ["ctime", "mtime"])
230+                                     ["__sys", "ctime", "mtime"])
231             d.addCallback(_check_child_and_metadata2)
232 
233             d.addCallback(lambda res:
234@@ -448,36 +448,43 @@
235                 child, metadata = res
236                 self.failUnless(isinstance(child, FakeDirectoryNode))
237                 self.failUnlessEqual(sorted(metadata.keys()),
238-                                     ["ctime", "mtime"])
239+                                     ["__sys", "ctime", "mtime"])
240             d.addCallback(_check_child_and_metadata3)
241 
242             # set_uri + metadata
243-            # it should be possible to add a child without any metadata
244+            # it should not be possible to add a child without any metadata
245             d.addCallback(lambda res: n.set_uri(u"c2", fake_file_uri.to_string(), {}))
246             d.addCallback(lambda res: n.get_metadata_for(u"c2"))
247-            d.addCallback(lambda metadata: self.failUnlessEqual(metadata, {}))
248+            def _has_ltimes(metadata):
249+                self.failUnless(metadata.has_key('ctime'))
250+                self.failUnless(metadata.has_key('mtime'))
251+                self.failUnless(metadata.has_key('__sys'))
252+                self.failUnless(metadata['__sys'].has_key('linkcrtime'))
253+                self.failUnless(metadata['__sys'].has_key('linkmotime'))
254+            d.addCallback(_has_ltimes)
255+
256+            # nor to override the link timestamps with the "metadata" argument
257+            d.addCallback(lambda res: n.set_uri(u"c2", fake_file_uri.to_string(), { '__sys': {'linkcrtime': "bogus"}}))
258+            d.addCallback(lambda res: n.get_metadata_for(u"c2"))
259+            def _has_good_linkcrtime(metadata):
260+                self.failUnless(metadata.has_key('__sys'))
261+                self.failUnless(metadata['__sys'].has_key('linkcrtime'))
262+                self.failIfEqual(metadata['__sys']['linkcrtime'], 'bogus')
263+            d.addCallback(_has_good_linkcrtime)
264 
265             # if we don't set any defaults, the child should get timestamps
266             d.addCallback(lambda res: n.set_uri(u"c3", fake_file_uri.to_string()))
267             d.addCallback(lambda res: n.get_metadata_for(u"c3"))
268             d.addCallback(lambda metadata:
269                           self.failUnlessEqual(sorted(metadata.keys()),
270-                                               ["ctime", "mtime"]))
271-
272-            # or we can add specific metadata at set_uri() time, which
273-            # overrides the timestamps
274-            d.addCallback(lambda res: n.set_uri(u"c4", fake_file_uri.to_string(),
275-                                                {"key": "value"}))
276-            d.addCallback(lambda res: n.get_metadata_for(u"c4"))
277-            d.addCallback(lambda metadata:
278-                          self.failUnlessEqual(metadata, {"key": "value"}))
279+                                               ["__sys", "ctime", "mtime"]))
280 
281             d.addCallback(lambda res: n.delete(u"c2"))
282             d.addCallback(lambda res: n.delete(u"c3"))
283             d.addCallback(lambda res: n.delete(u"c4"))
284 
285             # set_node + metadata
286-            # it should be possible to add a child without any metadata
287+            # it should be impossible to add a child without any metadata
288             d.addCallback(lambda res: n.set_node(u"d2", n, {}))
289             d.addCallback(lambda res: self.client.create_empty_dirnode())
290             d.addCallback(lambda n2:
291diff -rN -u old-smaller-timefix/src/allmydata/util/time_format.py new-smaller-timefix/src/allmydata/util/time_format.py
292--- old-smaller-timefix/src/allmydata/util/time_format.py       2009-04-08 09:20:49.000000000 -0600
293+++ new-smaller-timefix/src/allmydata/util/time_format.py       2009-04-08 09:20:50.000000000 -0600
294@@ -19,6 +19,11 @@
295         now = t()
296     return datetime.datetime.utcfromtimestamp(now).isoformat(sep)
297 
298+def iso_local(now=None, sep='_', t=time.time):
299+    if now is None:
300+        now = t()
301+    return datetime.datetime.fromtimestamp(now).isoformat(sep)
302+
303 def iso_utc_time_to_seconds(isotime, _conversion_re=re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})[T_ ](?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(?P<subsecond>\.\d+)?")):
304     """
305     The inverse of iso_utc().
306diff -rN -u old-smaller-timefix/src/allmydata/web/directory.py new-smaller-timefix/src/allmydata/web/directory.py
307--- old-smaller-timefix/src/allmydata/web/directory.py  2009-04-08 09:20:49.000000000 -0600
308+++ new-smaller-timefix/src/allmydata/web/directory.py  2009-04-08 09:20:50.000000000 -0600
309@@ -13,7 +13,7 @@
310 
311 from foolscap.eventual import fireEventually
312 
313-from allmydata.util import base32
314+from allmydata.util import base32, time_format
315 from allmydata.uri import from_string_dirnode
316 from allmydata.interfaces import IDirectoryNode, IFileNode, IMutableFileNode, \
317      ExistingChildError, NoSuchChildError
318@@ -592,16 +592,25 @@
319         ctx.fillSlots("rename", rename)
320 
321         times = []
322-        TIME_FORMAT = "%H:%M:%S %d-%b-%Y"
323-        if "ctime" in metadata:
324-            ctime = time.strftime(TIME_FORMAT,
325-                                  time.localtime(metadata["ctime"]))
326-            times.append("c: " + ctime)
327-        if "mtime" in metadata:
328-            mtime = time.strftime(TIME_FORMAT,
329-                                  time.localtime(metadata["mtime"]))
330+        linkcrtime = metadata.get('__sys', {}).get("linkcrtime")
331+        if linkcrtime is not None:
332+            times.append("lcr: " + time_format.iso_local(linkcrtime))
333+        else:
334+            # For backwards-compatibility with links last modified by Tahoe < 1.4.0:
335+            if "ctime" in metadata:
336+                ctime = time_format.iso_local(metadata["ctime"])
337+                times.append("c: " + ctime)
338+        linkmotime = metadata.get('__sys', {}).get("linkmotime")
339+        if linkmotime is not None:
340             if times:
341                 times.append(T.br())
342+            times.append("lmo: " + time_format.iso_local(linkmotime))
343+        else:
344+            # For backwards-compatibility with links last modified by Tahoe < 1.4.0:
345+            if "mtime" in metadata:
346+                mtime = time_format.iso_local(metadata["mtime"])
347+                if times:
348+                    times.append(T.br())
349                 times.append("m: " + mtime)
350         ctx.fillSlots("times", times)
351 
352