Ticket #628: smaller-timefix-allow-user-controlled-metadata-and-dont-break-tests.patch.txt

File smaller-timefix-allow-user-controlled-metadata-and-dont-break-tests.patch.txt, 13.7 KB (added by zooko, at 2009-04-09T03:23:41Z)
Line 
1diff -rN -u old-timestamps/docs/frontends/webapi.txt new-timestamps/docs/frontends/webapi.txt
2--- old-timestamps/docs/frontends/webapi.txt    2009-04-08 21:17:06.000000000 -0600
3+++ new-timestamps/docs/frontends/webapi.txt    2009-04-08 21:17:08.000000000 -0600
4@@ -381,28 +381,44 @@
5 GET /uri/$DIRCAP/[SUBDIRS../]FILENAME?t=json
6 
7   This returns a machine-parseable JSON-encoded description of the given
8-  object. The JSON always contains a list, and the first element of the list
9-  is always a flag that indicates whether the referenced object is a file or a
10-  directory. If it is a file, then the information includes file size and URI,
11-  like this:
12+  object. The JSON always contains a list, and the first element of the list is
13+  always a flag that indicates whether the referenced object is a file or a
14+  directory. If it is a capability to a file, then the information includes
15+  file size and URI, like this:
16 
17    GET /uri/$FILECAP?t=json :
18+
19+    [ "filenode", { "ro_uri": file_uri,
20+                    "verify_uri": verify_uri,
21+                    "size": bytes,
22+                    "mutable": false,
23+                    } ]
24+
25+  If it is a capability to a directory followed by a path from that directory
26+  to a file, then the information also includes metadata from the link to the
27+  file in the parent directory, like this:
28+
29    GET /uri/$DIRCAP/[SUBDIRS../]FILENAME?t=json :
30 
31     [ "filenode", { "ro_uri": file_uri,
32                     "verify_uri": verify_uri,
33                     "size": bytes,
34                     "mutable": false,
35-                    "metadata": {"ctime": 1202777696.7564139,
36-                                 "mtime": 1202777696.7564139
37+                    "metadata": {
38+                                 "ctime": 1202777696.7564139,
39+                                 "mtime": 1202777696.7564139,
40+                                 "__sys": {
41+                                         "linkcrtime": 1202777696.7564139,
42+                                         "linkmotime": 1202777696.7564139,
43                                  }
44+                               }
45                     } ]
46 
47   If it is a directory, then it includes information about the children of
48   this directory, as a mapping from child name to a set of data about the
49   child (the same data that would appear in a corresponding GET?t=json of the
50   child itself). The child entries also include metadata about each child,
51-  including creation- and modification- timestamps. The output looks like
52+  including link-creation- and link-change- timestamps. The output looks like
53   this:
54 
55    GET /uri/$DIRCAP?t=json :
56@@ -418,13 +434,21 @@
57                                                 "metadata": {
58                                                   "ctime": 1202777696.7564139,
59                                                   "mtime": 1202777696.7564139
60+                                                  "__sys": {
61+                                                    "linkcrtime": 1202777696.7564139,
62+                                                    "linkmotime": 1202777696.7564139,
63                                                  }
64+                                                }
65                                                } ],
66                      "subdir":  [ "dirnode", { "rw_uri": rwuri,
67                                                "ro_uri": rouri,
68                                                 "metadata": {
69                                                   "ctime": 1202778102.7589991,
70                                                   "mtime": 1202778111.2160511,
71+                                                  "__sys": {
72+                                                    "linkcrtime": 1202777696.7564139,
73+                                                    "linkmotime": 1202777696.7564139,
74+                                                   }
75                                                  }
76                                               } ]
77                     } } ]
78diff -rN -u old-timestamps/docs/specifications/dirnodes.txt new-timestamps/docs/specifications/dirnodes.txt
79--- old-timestamps/docs/specifications/dirnodes.txt     2009-04-08 21:17:06.000000000 -0600
80+++ new-timestamps/docs/specifications/dirnodes.txt     2009-04-08 21:17:08.000000000 -0600
81@@ -176,30 +176,29 @@
82  netstring(cap) = 4+len(cap)
83  encrypted(cap) = 16+cap+32
84  JSON({}) = 2
85- JSON({ctime=float,mtime=float}): 57
86- netstring(metadata) = 4+57 = 61
87+ JSON({ctime=float,mtime=float,'__sys':{linkcrtime=float,linkmotime=float}}): 137
88+ netstring(metadata) = 4+137 = 141
89 
90 so a CHK entry is:
91- 5+ 4+len(name) + 4+97 + 5+16+97+32 + 4+57
92-And a 15-byte filename gives a 336-byte entry. When the entry points at a
93+ 5+ 4+len(name) + 4+97 + 5+16+97+32 + 4+137
94+And a 15-byte filename gives a 416-byte entry. When the entry points at a
95 subdirectory instead of a file, the entry is a little bit smaller. So an
96-empty directory uses 0 bytes, a directory with one child uses about 336
97-bytes, a directory with two children uses about 672, etc.
98+empty directory uses 0 bytes, a directory with one child uses about 416
99+bytes, a directory with two children uses about 832, etc.
100 
101 When the dirnode data is encoding using our default 3-of-10, that means we
102-get 112ish bytes of data in each share per child.
103+get 139ish bytes of data in each share per child.
104 
105 The pubkey, signature, and hashes form the first 935ish bytes of the
106 container, then comes our data, then about 1216 bytes of encprivkey. So if we
107 read the first:
108 
109  1kB: we get 65bytes of dirnode data : only empty directories
110- 1kiB: 89bytes of dirnode data : maybe one short-named subdir
111- 2kB: 1065bytes: about 9 entries
112- 3kB: 2065bytes: about 18 entries, or 7.5 entries plus the encprivkey
113- 4kB: 3065bytes: about 27 entries, or about 16.5 plus the encprivkey
114+ 2kB: 1065bytes: about 8
115+ 3kB: 2065bytes: about 15 entries, or 6 entries plus the encprivkey
116+ 4kB: 3065bytes: about 22 entries, or about 13 plus the encprivkey
117 
118-So we've written the code to do an initial read of 2kB from each share when
119+So we've written the code to do an initial read of 4kB from each share when
120 we read the mutable file, which should give good performance (one RTT) for
121 small directories.
122 
123diff -rN -u old-timestamps/src/allmydata/dirnode.py new-timestamps/src/allmydata/dirnode.py
124--- old-timestamps/src/allmydata/dirnode.py     2009-04-08 21:17:06.000000000 -0600
125+++ new-timestamps/src/allmydata/dirnode.py     2009-04-08 21:17:08.000000000 -0600
126@@ -83,15 +83,41 @@
127                 metadata = children[name][1].copy()
128             else:
129                 metadata = {"ctime": now,
130-                            "mtime": now}
131-            if new_metadata is None:
132-                # update timestamps
133+                            "mtime": now,
134+                            "__sys": {
135+                                "linkcrtime": now,
136+                                "linkmotime": now,
137+                                }
138+                            }
139+
140+            if new_metadata is not None:
141+                # Overwrite all metadata.
142+                newmd = new_metadata.copy()
143+
144+                # Except '__sys'.
145+                if newmd.has_key('__sys'):
146+                    del newmd['__sys']
147+                if metadata.has_key('__sys'):
148+                    newmd['__sys'] = metadata['__sys']
149+
150+                metadata = newmd
151+            else:
152+                # For backwards compatibility with Tahoe < 1.4.0:
153                 if "ctime" not in metadata:
154                     metadata["ctime"] = now
155                 metadata["mtime"] = now
156-            else:
157-                # just replace it
158-                metadata = new_metadata.copy()
159+
160+            # update timestamps
161+            sysmd = metadata.get('__sys', {})
162+            if not 'linkcrtime' in sysmd:
163+                if "ctime" in metadata:
164+                    # In Tahoe < 1.4.0 we used the word "ctime" to mean what Tahoe >= 1.4.0
165+                    # calls "linkcrtime".
166+                    sysmd["linkcrtime"] = metadata["ctime"]
167+                else:
168+                    sysmd["linkcrtime"] = now
169+            sysmd["linkmotime"] = now
170+
171             children[name] = (child, metadata)
172         new_contents = self.node._pack_contents(children)
173         return new_contents
174diff -rN -u old-timestamps/src/allmydata/mutable/servermap.py new-timestamps/src/allmydata/mutable/servermap.py
175--- old-timestamps/src/allmydata/mutable/servermap.py   2009-04-08 21:17:07.000000000 -0600
176+++ new-timestamps/src/allmydata/mutable/servermap.py   2009-04-08 21:17:08.000000000 -0600
177@@ -374,7 +374,7 @@
178         # fixed-size slots so we can retrieve less data. For now, we'll just
179         # read 2000 bytes, which also happens to read enough actual data to
180         # pre-fetch a 9-entry dirnode.
181-        self._read_size = 2000
182+        self._read_size = 4000
183         if mode == MODE_CHECK:
184             # we use unpack_prefix_and_signature, so we need 1k
185             self._read_size = 1000
186diff -rN -u old-timestamps/src/allmydata/scripts/tahoe_ls.py new-timestamps/src/allmydata/scripts/tahoe_ls.py
187--- old-timestamps/src/allmydata/scripts/tahoe_ls.py    2009-04-08 21:17:07.000000000 -0600
188+++ new-timestamps/src/allmydata/scripts/tahoe_ls.py    2009-04-08 21:17:08.000000000 -0600
189@@ -65,8 +65,20 @@
190         name = unicode(name)
191         child = children[name]
192         childtype = child[0]
193-        ctime = child[1]["metadata"].get("ctime")
194-        mtime = child[1]["metadata"].get("mtime")
195+
196+        # linkcrtime is not really what unix filesystems mean by "ctime", but
197+        # it *is* apparently what many or even most unix programmers and users
198+        # think that a unix filesystem means by "ctime"...
199+        ctime = child[1].get("metadata", {}).get('__sys', {}).get("linkcrtime")
200+        if not ctime:
201+            ctime = child[1]["metadata"].get("ctime")
202+
203+        # linkmotime is not really what unix filesystems mean by "mtime",
204+        # because linkmotime is a property of the link and mtime is a property
205+        # of the file contents...
206+        mtime = child[1].get("metadata", {}).get('__sys', {}).get("linkmotime")
207+        if not mtime:
208+            mtime = child[1]["metadata"].get("mtime")
209         rw_uri = child[1].get("rw_uri")
210         ro_uri = child[1].get("ro_uri")
211         if ctime:
212diff -rN -u old-timestamps/src/allmydata/test/test_dirnode.py new-timestamps/src/allmydata/test/test_dirnode.py
213--- old-timestamps/src/allmydata/test/test_dirnode.py   2009-04-08 21:17:07.000000000 -0600
214+++ new-timestamps/src/allmydata/test/test_dirnode.py   2009-04-08 21:17:08.000000000 -0600
215@@ -416,7 +416,7 @@
216             d.addCallback(lambda res: n.get_metadata_for(u"child"))
217             d.addCallback(lambda metadata:
218                           self.failUnlessEqual(sorted(metadata.keys()),
219-                                               ["ctime", "mtime"]))
220+                                               ["__sys", "ctime", "mtime"]))
221 
222             d.addCallback(lambda res:
223                           self.shouldFail(NoSuchChildError, "gcamap-no",
224diff -rN -u old-timestamps/src/allmydata/util/time_format.py new-timestamps/src/allmydata/util/time_format.py
225--- old-timestamps/src/allmydata/util/time_format.py    2009-04-08 21:17:07.000000000 -0600
226+++ new-timestamps/src/allmydata/util/time_format.py    2009-04-08 21:17:08.000000000 -0600
227@@ -19,6 +19,11 @@
228         now = t()
229     return datetime.datetime.utcfromtimestamp(now).isoformat(sep)
230 
231+def iso_local(now=None, sep='_', t=time.time):
232+    if now is None:
233+        now = t()
234+    return datetime.datetime.fromtimestamp(now).isoformat(sep)
235+
236 def iso_utc_time_to_seconds(isotime, _conversion_re=re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})[T_ ](?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(?P<subsecond>\.\d+)?")):
237     """
238     The inverse of iso_utc().
239diff -rN -u old-timestamps/src/allmydata/web/directory.py new-timestamps/src/allmydata/web/directory.py
240--- old-timestamps/src/allmydata/web/directory.py       2009-04-08 21:17:07.000000000 -0600
241+++ new-timestamps/src/allmydata/web/directory.py       2009-04-08 21:17:08.000000000 -0600
242@@ -13,7 +13,7 @@
243 
244 from foolscap.eventual import fireEventually
245 
246-from allmydata.util import base32
247+from allmydata.util import base32, time_format
248 from allmydata.uri import from_string_dirnode
249 from allmydata.interfaces import IDirectoryNode, IFileNode, IMutableFileNode, \
250      ExistingChildError, NoSuchChildError
251@@ -592,16 +592,25 @@
252         ctx.fillSlots("rename", rename)
253 
254         times = []
255-        TIME_FORMAT = "%H:%M:%S %d-%b-%Y"
256-        if "ctime" in metadata:
257-            ctime = time.strftime(TIME_FORMAT,
258-                                  time.localtime(metadata["ctime"]))
259-            times.append("c: " + ctime)
260-        if "mtime" in metadata:
261-            mtime = time.strftime(TIME_FORMAT,
262-                                  time.localtime(metadata["mtime"]))
263+        linkcrtime = metadata.get('__sys', {}).get("linkcrtime")
264+        if linkcrtime is not None:
265+            times.append("lcr: " + time_format.iso_local(linkcrtime))
266+        else:
267+            # For backwards-compatibility with links last modified by Tahoe < 1.4.0:
268+            if "ctime" in metadata:
269+                ctime = time_format.iso_local(metadata["ctime"])
270+                times.append("c: " + ctime)
271+        linkmotime = metadata.get('__sys', {}).get("linkmotime")
272+        if linkmotime is not None:
273             if times:
274                 times.append(T.br())
275+            times.append("lmo: " + time_format.iso_local(linkmotime))
276+        else:
277+            # For backwards-compatibility with links last modified by Tahoe < 1.4.0:
278+            if "mtime" in metadata:
279+                mtime = time_format.iso_local(metadata["mtime"])
280+                if times:
281+                    times.append(T.br())
282                 times.append("m: " + mtime)
283         ctx.fillSlots("times", times)
284 
285