Ticket #628: smaller-timefix-patch.txt

File smaller-timefix-patch.txt, 17.6 KB (added by zooko, at 2009-04-08T15:14:08Z)
Line 
1diff -rN -u old-smaller-timefix/docs/frontends/webapi.txt new-smaller-timefix/docs/frontends/webapi.txt
2--- old-smaller-timefix/docs/frontends/webapi.txt       2009-04-08 09:15:52.000000000 -0600
3+++ new-smaller-timefix/docs/frontends/webapi.txt       2009-04-08 09:15:53.000000000 -0600
4@@ -381,28 +381,44 @@
5 GET /uri/$DIRCAP/[SUBDIRS../]FILENAME?t=json
6 
7   This returns a machine-parseable JSON-encoded description of the given
8-  object. The JSON always contains a list, and the first element of the list
9-  is always a flag that indicates whether the referenced object is a file or a
10-  directory. If it is a file, then the information includes file size and URI,
11-  like this:
12+  object. The JSON always contains a list, and the first element of the list is
13+  always a flag that indicates whether the referenced object is a file or a
14+  directory. If it is a capability to a file, then the information includes
15+  file size and URI, like this:
16 
17    GET /uri/$FILECAP?t=json :
18+
19+    [ "filenode", { "ro_uri": file_uri,
20+                    "verify_uri": verify_uri,
21+                    "size": bytes,
22+                    "mutable": false,
23+                    } ]
24+
25+  If it is a capability to a directory followed by a path from that directory
26+  to a file, then the information also includes metadata from the link to the
27+  file in the parent directory, like this:
28+
29    GET /uri/$DIRCAP/[SUBDIRS../]FILENAME?t=json :
30 
31     [ "filenode", { "ro_uri": file_uri,
32                     "verify_uri": verify_uri,
33                     "size": bytes,
34                     "mutable": false,
35-                    "metadata": {"ctime": 1202777696.7564139,
36-                                 "mtime": 1202777696.7564139
37+                    "metadata": {
38+                                 "ctime": 1202777696.7564139,
39+                                 "mtime": 1202777696.7564139,
40+                                 "__sys": {
41+                                         "linkcrtime": 1202777696.7564139,
42+                                         "linkmotime": 1202777696.7564139,
43                                  }
44+                               }
45                     } ]
46 
47   If it is a directory, then it includes information about the children of
48   this directory, as a mapping from child name to a set of data about the
49   child (the same data that would appear in a corresponding GET?t=json of the
50   child itself). The child entries also include metadata about each child,
51-  including creation- and modification- timestamps. The output looks like
52+  including link-creation- and link-change- timestamps. The output looks like
53   this:
54 
55    GET /uri/$DIRCAP?t=json :
56@@ -418,13 +434,21 @@
57                                                 "metadata": {
58                                                   "ctime": 1202777696.7564139,
59                                                   "mtime": 1202777696.7564139
60+                                                  "__sys": {
61+                                                    "linkcrtime": 1202777696.7564139,
62+                                                    "linkmotime": 1202777696.7564139,
63                                                  }
64+                                                }
65                                                } ],
66                      "subdir":  [ "dirnode", { "rw_uri": rwuri,
67                                                "ro_uri": rouri,
68                                                 "metadata": {
69                                                   "ctime": 1202778102.7589991,
70                                                   "mtime": 1202778111.2160511,
71+                                                  "__sys": {
72+                                                    "linkcrtime": 1202777696.7564139,
73+                                                    "linkmotime": 1202777696.7564139,
74+                                                   }
75                                                  }
76                                               } ]
77                     } } ]
78diff -rN -u old-smaller-timefix/docs/specifications/dirnodes.txt new-smaller-timefix/docs/specifications/dirnodes.txt
79--- old-smaller-timefix/docs/specifications/dirnodes.txt        2009-04-08 09:15:52.000000000 -0600
80+++ new-smaller-timefix/docs/specifications/dirnodes.txt        2009-04-08 09:15:53.000000000 -0600
81@@ -176,30 +176,29 @@
82  netstring(cap) = 4+len(cap)
83  encrypted(cap) = 16+cap+32
84  JSON({}) = 2
85- JSON({ctime=float,mtime=float}): 57
86- netstring(metadata) = 4+57 = 61
87+ JSON({ctime=float,mtime=float,'__sys':{linkcrtime=float,linkmotime=float}}): 137
88+ netstring(metadata) = 4+137 = 141
89 
90 so a CHK entry is:
91- 5+ 4+len(name) + 4+97 + 5+16+97+32 + 4+57
92-And a 15-byte filename gives a 336-byte entry. When the entry points at a
93+ 5+ 4+len(name) + 4+97 + 5+16+97+32 + 4+137
94+And a 15-byte filename gives a 416-byte entry. When the entry points at a
95 subdirectory instead of a file, the entry is a little bit smaller. So an
96-empty directory uses 0 bytes, a directory with one child uses about 336
97-bytes, a directory with two children uses about 672, etc.
98+empty directory uses 0 bytes, a directory with one child uses about 416
99+bytes, a directory with two children uses about 832, etc.
100 
101 When the dirnode data is encoding using our default 3-of-10, that means we
102-get 112ish bytes of data in each share per child.
103+get 139ish bytes of data in each share per child.
104 
105 The pubkey, signature, and hashes form the first 935ish bytes of the
106 container, then comes our data, then about 1216 bytes of encprivkey. So if we
107 read the first:
108 
109  1kB: we get 65bytes of dirnode data : only empty directories
110- 1kiB: 89bytes of dirnode data : maybe one short-named subdir
111- 2kB: 1065bytes: about 9 entries
112- 3kB: 2065bytes: about 18 entries, or 7.5 entries plus the encprivkey
113- 4kB: 3065bytes: about 27 entries, or about 16.5 plus the encprivkey
114+ 2kB: 1065bytes: about 8
115+ 3kB: 2065bytes: about 15 entries, or 6 entries plus the encprivkey
116+ 4kB: 3065bytes: about 22 entries, or about 13 plus the encprivkey
117 
118-So we've written the code to do an initial read of 2kB from each share when
119+So we've written the code to do an initial read of 4kB from each share when
120 we read the mutable file, which should give good performance (one RTT) for
121 small directories.
122 
123diff -rN -u old-smaller-timefix/src/allmydata/dirnode.py new-smaller-timefix/src/allmydata/dirnode.py
124--- old-smaller-timefix/src/allmydata/dirnode.py        2009-04-08 09:15:52.000000000 -0600
125+++ new-smaller-timefix/src/allmydata/dirnode.py        2009-04-08 09:15:53.000000000 -0600
126@@ -83,15 +83,41 @@
127                 metadata = children[name][1].copy()
128             else:
129                 metadata = {"ctime": now,
130-                            "mtime": now}
131-            if new_metadata is None:
132-                # update timestamps
133-                if "ctime" not in metadata:
134-                    metadata["ctime"] = now
135-                metadata["mtime"] = now
136-            else:
137-                # just replace it
138-                metadata = new_metadata.copy()
139+                            "mtime": now,
140+                            "__sys": {
141+                                "linkcrtime": now,
142+                                "linkmotime": now,
143+                                }
144+                            }
145+
146+            if new_metadata is not None:
147+                # Overwrite all metadata.
148+                newmd = new_metadata.copy()
149+
150+                # Except '__sys'.
151+                if newmd.has_key('__sys'):
152+                    del newmd['__sys']
153+                if metadata.has_key('__sys'):
154+                    newmd['__sys'] = metadata['__sys']
155+
156+                metadata = newmd
157+
158+            # update timestamps
159+            sysmd = metadata.get('__sys', {})
160+            if not 'linkcrtime' in sysmd:
161+                if "ctime" in metadata:
162+                    # In Tahoe < 1.4.0 we used the word "ctime" to mean what Tahoe >= 1.4.0
163+                    # calls "linkcrtime".
164+                    sysmd["linkcrtime"] = metadata["ctime"]
165+                else:
166+                    sysmd["linkcrtime"] = now
167+            sysmd["linkmotime"] = now
168+
169+            # For backwards compatibility with Tahoe < 1.4.0:
170+            if "ctime" not in metadata:
171+                metadata["ctime"] = now
172+            metadata["mtime"] = now
173+
174             children[name] = (child, metadata)
175         new_contents = self.node._pack_contents(children)
176         return new_contents
177diff -rN -u old-smaller-timefix/src/allmydata/mutable/servermap.py new-smaller-timefix/src/allmydata/mutable/servermap.py
178--- old-smaller-timefix/src/allmydata/mutable/servermap.py      2009-04-08 09:15:52.000000000 -0600
179+++ new-smaller-timefix/src/allmydata/mutable/servermap.py      2009-04-08 09:15:53.000000000 -0600
180@@ -374,7 +374,7 @@
181         # fixed-size slots so we can retrieve less data. For now, we'll just
182         # read 2000 bytes, which also happens to read enough actual data to
183         # pre-fetch a 9-entry dirnode.
184-        self._read_size = 2000
185+        self._read_size = 4000
186         if mode == MODE_CHECK:
187             # we use unpack_prefix_and_signature, so we need 1k
188             self._read_size = 1000
189diff -rN -u old-smaller-timefix/src/allmydata/scripts/tahoe_ls.py new-smaller-timefix/src/allmydata/scripts/tahoe_ls.py
190--- old-smaller-timefix/src/allmydata/scripts/tahoe_ls.py       2009-04-08 09:15:52.000000000 -0600
191+++ new-smaller-timefix/src/allmydata/scripts/tahoe_ls.py       2009-04-08 09:15:53.000000000 -0600
192@@ -65,8 +65,20 @@
193         name = unicode(name)
194         child = children[name]
195         childtype = child[0]
196-        ctime = child[1]["metadata"].get("ctime")
197-        mtime = child[1]["metadata"].get("mtime")
198+
199+        # linkcrtime is not really what unix filesystems mean by "ctime", but
200+        # it *is* apparently what many or even most unix programmers and users
201+        # think that a unix filesystem means by "ctime"...
202+        ctime = child[1].get("metadata", {}).get('__sys', {}).get("linkcrtime")
203+        if not ctime:
204+            ctime = child[1]["metadata"].get("ctime")
205+
206+        # linkmotime is not really what unix filesystems mean by "mtime",
207+        # because linkmotime is a property of the link and mtime is a property
208+        # of the file contents...
209+        mtime = child[1].get("metadata", {}).get('__sys', {}).get("linkmotime")
210+        if not mtime:
211+            mtime = child[1]["metadata"].get("mtime")
212         rw_uri = child[1].get("rw_uri")
213         ro_uri = child[1].get("ro_uri")
214         if ctime:
215diff -rN -u old-smaller-timefix/src/allmydata/test/test_dirnode.py new-smaller-timefix/src/allmydata/test/test_dirnode.py
216--- old-smaller-timefix/src/allmydata/test/test_dirnode.py      2009-04-08 09:15:52.000000000 -0600
217+++ new-smaller-timefix/src/allmydata/test/test_dirnode.py      2009-04-08 09:15:53.000000000 -0600
218@@ -416,7 +416,7 @@
219             d.addCallback(lambda res: n.get_metadata_for(u"child"))
220             d.addCallback(lambda metadata:
221                           self.failUnlessEqual(sorted(metadata.keys()),
222-                                               ["ctime", "mtime"]))
223+                                               ["__sys", "ctime", "mtime"]))
224 
225             d.addCallback(lambda res:
226                           self.shouldFail(NoSuchChildError, "gcamap-no",
227@@ -439,7 +439,7 @@
228                 self.failUnlessEqual(child.get_uri(),
229                                      fake_file_uri.to_string())
230                 self.failUnlessEqual(sorted(metadata.keys()),
231-                                     ["ctime", "mtime"])
232+                                     ["__sys", "ctime", "mtime"])
233             d.addCallback(_check_child_and_metadata2)
234 
235             d.addCallback(lambda res:
236@@ -448,36 +448,43 @@
237                 child, metadata = res
238                 self.failUnless(isinstance(child, FakeDirectoryNode))
239                 self.failUnlessEqual(sorted(metadata.keys()),
240-                                     ["ctime", "mtime"])
241+                                     ["__sys", "ctime", "mtime"])
242             d.addCallback(_check_child_and_metadata3)
243 
244             # set_uri + metadata
245-            # it should be possible to add a child without any metadata
246+            # it should not be possible to add a child without any metadata
247             d.addCallback(lambda res: n.set_uri(u"c2", fake_file_uri.to_string(), {}))
248             d.addCallback(lambda res: n.get_metadata_for(u"c2"))
249-            d.addCallback(lambda metadata: self.failUnlessEqual(metadata, {}))
250+            def _has_ltimes(metadata):
251+                self.failUnless(metadata.has_key('ctime'))
252+                self.failUnless(metadata.has_key('mtime'))
253+                self.failUnless(metadata.has_key('__sys'))
254+                self.failUnless(metadata['__sys'].has_key('linkcrtime'))
255+                self.failUnless(metadata['__sys'].has_key('linkmotime'))
256+            d.addCallback(_has_ltimes)
257+
258+            # nor to override the link timestamps with the "metadata" argument
259+            d.addCallback(lambda res: n.set_uri(u"c2", fake_file_uri.to_string(), { '__sys': {'linkcrtime': "bogus"}}))
260+            d.addCallback(lambda res: n.get_metadata_for(u"c2"))
261+            def _has_good_linkcrtime(metadata):
262+                self.failUnless(metadata.has_key('__sys'))
263+                self.failUnless(metadata['__sys'].has_key('linkcrtime'))
264+                self.failIfEqual(metadata['__sys']['linkcrtime'], 'bogus')
265+            d.addCallback(_has_good_linkcrtime)
266 
267             # if we don't set any defaults, the child should get timestamps
268             d.addCallback(lambda res: n.set_uri(u"c3", fake_file_uri.to_string()))
269             d.addCallback(lambda res: n.get_metadata_for(u"c3"))
270             d.addCallback(lambda metadata:
271                           self.failUnlessEqual(sorted(metadata.keys()),
272-                                               ["ctime", "mtime"]))
273-
274-            # or we can add specific metadata at set_uri() time, which
275-            # overrides the timestamps
276-            d.addCallback(lambda res: n.set_uri(u"c4", fake_file_uri.to_string(),
277-                                                {"key": "value"}))
278-            d.addCallback(lambda res: n.get_metadata_for(u"c4"))
279-            d.addCallback(lambda metadata:
280-                          self.failUnlessEqual(metadata, {"key": "value"}))
281+                                               ["__sys", "ctime", "mtime"]))
282 
283             d.addCallback(lambda res: n.delete(u"c2"))
284             d.addCallback(lambda res: n.delete(u"c3"))
285             d.addCallback(lambda res: n.delete(u"c4"))
286 
287             # set_node + metadata
288-            # it should be possible to add a child without any metadata
289+            # it should be impossible to add a child without any metadata
290             d.addCallback(lambda res: n.set_node(u"d2", n, {}))
291             d.addCallback(lambda res: self.client.create_empty_dirnode())
292             d.addCallback(lambda n2:
293diff -rN -u old-smaller-timefix/src/allmydata/util/time_format.py new-smaller-timefix/src/allmydata/util/time_format.py
294--- old-smaller-timefix/src/allmydata/util/time_format.py       2009-04-08 09:15:52.000000000 -0600
295+++ new-smaller-timefix/src/allmydata/util/time_format.py       2009-04-08 09:15:53.000000000 -0600
296@@ -19,6 +19,11 @@
297         now = t()
298     return datetime.datetime.utcfromtimestamp(now).isoformat(sep)
299 
300+def iso_local(now=None, sep='_', t=time.time):
301+    if now is None:
302+        now = t()
303+    return datetime.datetime.fromtimestamp(now).isoformat(sep)
304+
305 def iso_utc_time_to_seconds(isotime, _conversion_re=re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})[T_ ](?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(?P<subsecond>\.\d+)?")):
306     """
307     The inverse of iso_utc().
308diff -rN -u old-smaller-timefix/src/allmydata/web/directory.py new-smaller-timefix/src/allmydata/web/directory.py
309--- old-smaller-timefix/src/allmydata/web/directory.py  2009-04-08 09:15:52.000000000 -0600
310+++ new-smaller-timefix/src/allmydata/web/directory.py  2009-04-08 09:15:53.000000000 -0600
311@@ -13,7 +13,7 @@
312 
313 from foolscap.eventual import fireEventually
314 
315-from allmydata.util import base32
316+from allmydata.util import base32, time_format
317 from allmydata.uri import from_string_dirnode
318 from allmydata.interfaces import IDirectoryNode, IFileNode, IMutableFileNode, \
319      ExistingChildError, NoSuchChildError
320@@ -592,16 +592,25 @@
321         ctx.fillSlots("rename", rename)
322 
323         times = []
324-        TIME_FORMAT = "%H:%M:%S %d-%b-%Y"
325-        if "ctime" in metadata:
326-            ctime = time.strftime(TIME_FORMAT,
327-                                  time.localtime(metadata["ctime"]))
328-            times.append("c: " + ctime)
329-        if "mtime" in metadata:
330-            mtime = time.strftime(TIME_FORMAT,
331-                                  time.localtime(metadata["mtime"]))
332+        linkcrtime = metadata.get('__sys', {}).get("linkcrtime")
333+        if linkcrtime is not None:
334+            times.append("lcr: " + time_format.iso_local(linkcrtime))
335+        else:
336+            # For backwards-compatibility with links last modified by Tahoe < 1.4.0:
337+            if "ctime" in metadata:
338+                ctime = time_format.iso_local(metadata["ctime"])
339+                times.append("c: " + ctime)
340+        linkmotime = metadata.get('__sys', {}).get("linkmotime")
341+        if linkmotime is not None:
342             if times:
343                 times.append(T.br())
344+            times.append("lmo: " + time_format.iso_local(linkmotime))
345+        else:
346+            # For backwards-compatibility with links last modified by Tahoe < 1.4.0:
347+            if "mtime" in metadata:
348+                mtime = time_format.iso_local(metadata["mtime"])
349+                if times:
350+                    times.append(T.br())
351                 times.append("m: " + mtime)
352         ctx.fillSlots("times", times)
353 
354