Ticket #989: improve-http_1_1-byterange-handling.dpatch.txt

File improve-http_1_1-byterange-handling.dpatch.txt, 10.1 KB (added by davidsarah, at 2010-03-10T03:39:55Z)

Improve HTTP/1.1 byterange handling

Line 
1
2New patches:
3
4[Improve HTTP/1.1 byterange handling
5Jeremy Fitzhardinge <jeremy@goop.org>**20100310025913
6 Ignore-this: 6d69e694973d618f0dc65983735cd9be
7 
8 Fix parsing of a Range: header to support:
9  - multiple ranges (parsed, but not returned)
10  - suffix byte ranges ("-2139")
11  - correct handling of incorrectly formatted range headers
12    (correct behaviour is to ignore the header and return the full
13     file)
14  - return appropriate error for ranges outside the file
15 
16 Multiple ranges are parsed, but only the first range is returned.
17 Returning multiple ranges requires using the multipart/byterange
18 content type.
19 
20] {
21hunk ./src/allmydata/test/test_web.py 582
22         d.addCallback(_got)
23         return d
24 
25+    def test_GET_FILEURL_partial_end_range(self):
26+        headers = {"range": "bytes=-5"}
27+        length  = len(self.BAR_CONTENTS)
28+        d = self.GET(self.public_url + "/foo/bar.txt", headers=headers,
29+                     return_response=True)
30+        def _got((res, status, headers)):
31+            self.failUnlessEqual(int(status), 206)
32+            self.failUnless(headers.has_key("content-range"))
33+            self.failUnlessEqual(headers["content-range"][0],
34+                                 "bytes %d-%d/%d" % (length-5, length-1, length))
35+            self.failUnlessEqual(res, self.BAR_CONTENTS[-5:])
36+        d.addCallback(_got)
37+        return d
38+
39+    def test_GET_FILEURL_partial_range_overrun(self):
40+        headers = {"range": "bytes=100-200"}
41+        length  = len(self.BAR_CONTENTS)
42+        d = self.shouldFail2(error.Error, "test_GET_FILEURL_range_overrun",
43+                             "416 Requested Range not satisfiable",
44+                             "First beyond end of file",
45+                             self.GET, self.public_url + "/foo/bar.txt",
46+                             headers=headers)
47+        return d
48+
49     def test_HEAD_FILEURL_range(self):
50         headers = {"range": "bytes=1-10"}
51         d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers,
52hunk ./src/allmydata/test/test_web.py 632
53         d.addCallback(_got)
54         return d
55 
56+    def test_HEAD_FILEURL_partial_end_range(self):
57+        headers = {"range": "bytes=-5"}
58+        length  = len(self.BAR_CONTENTS)
59+        d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers,
60+                     return_response=True)
61+        def _got((res, status, headers)):
62+            self.failUnlessEqual(int(status), 206)
63+            self.failUnless(headers.has_key("content-range"))
64+            self.failUnlessEqual(headers["content-range"][0],
65+                                 "bytes %d-%d/%d" % (length-5, length-1, length))
66+        d.addCallback(_got)
67+        return d
68+
69+    def test_HEAD_FILEURL_partial_range_overrun(self):
70+        headers = {"range": "bytes=100-200"}
71+        length  = len(self.BAR_CONTENTS)
72+        d = self.shouldFail2(error.Error, "test_HEAD_FILEURL_range_overrun",
73+                             "416 Requested Range not satisfiable",
74+                             "",
75+                             self.HEAD, self.public_url + "/foo/bar.txt",
76+                             headers=headers)
77+        return d
78+
79     def test_GET_FILEURL_range_bad(self):
80         headers = {"range": "BOGUS=fizbop-quarnak"}
81hunk ./src/allmydata/test/test_web.py 657
82-        d = self.shouldFail2(error.Error, "test_GET_FILEURL_range_bad",
83-                             "400 Bad Request",
84-                             "Syntactically invalid http range header",
85-                             self.GET, self.public_url + "/foo/bar.txt",
86-                             headers=headers)
87+        d = self.GET(self.public_url + "/foo/bar.txt", headers=headers,
88+                     return_response=True)
89+        def _got((res, status, headers)):
90+            self.failUnlessEqual(int(status), 200)
91+            self.failUnless(not headers.has_key("content-range"))
92+            self.failUnlessEqual(res, self.BAR_CONTENTS)
93+        d.addCallback(_got)
94         return d
95 
96     def test_HEAD_FILEURL(self):
97hunk ./src/allmydata/web/filenode.py 338
98         self.filenode = filenode
99         self.filename = filename
100 
101+    def parse_range_header(self, range):
102+        # Parse a byte ranges according to RFC 2616 "14.35.1 Byte
103+        # Ranges".  Returns None if the range doesn't make sense so it
104+        # can be ignored (per the spec).  When successful, returns a
105+        # list of (first,last) inclusive range tuples.
106+
107+        filesize = self.filenode.get_size()
108+        assert isinstance(filesize, (int,long)), filesize
109+
110+        try:
111+            # byte-ranges-specifier
112+            units, rangeset = range.split('=', 1)
113+            if units != 'bytes':
114+                return None     # nothing else supported
115+
116+            def parse_range(r):
117+                first, last = r.split('-', 1)
118+
119+                if first is '':
120+                    # suffix-byte-range-spec
121+                    first = filesize - long(last)
122+                    last = filesize - 1
123+                else:
124+                    # byte-range-spec
125+
126+                    # first-byte-pos
127+                    first = long(first)
128+
129+                    # last-byte-pos
130+                    if last is '':
131+                        last = filesize - 1
132+                    else:
133+                        last = long(last)
134+
135+                if last < first:
136+                    raise ValueError
137+
138+                return (first, last)
139+
140+            # byte-range-set
141+            #
142+            # Note: the spec uses "1#" for the list of ranges, which
143+            # implicitly allows whitespace around the ',' separators,
144+            # so strip it.
145+            return [ parse_range(r.strip()) for r in rangeset.split(',') ]
146+        except ValueError:
147+            return None
148+
149     def renderHTTP(self, ctx):
150         req = IRequest(ctx)
151         gte = static.getTypeAndEncoding
152hunk ./src/allmydata/web/filenode.py 407
153 
154         filesize = self.filenode.get_size()
155         assert isinstance(filesize, (int,long)), filesize
156-        offset, size = 0, None
157+        first, size = 0, None
158         contentsize = filesize
159         req.setHeader("accept-ranges", "bytes")
160         if not self.filenode.is_mutable():
161hunk ./src/allmydata/web/filenode.py 421
162         # or maybe just use the URI for CHK and LIT.
163         rangeheader = req.getHeader('range')
164         if rangeheader:
165-            # adapted from nevow.static.File
166-            bytesrange = rangeheader.split('=')
167-            if bytesrange[0] != 'bytes':
168-                raise WebError("Syntactically invalid http range header!")
169-            start, end = bytesrange[1].split('-')
170-            if start:
171-                offset = int(start)
172-                if not end:
173-                    # RFC 2616 says:
174-                    #
175-                    # "If the last-byte-pos value is absent, or if the value is
176-                    # greater than or equal to the current length of the
177-                    # entity-body, last-byte-pos is taken to be equal to one less
178-                    # than the current length of the entity- body in bytes."
179-                    end = filesize - 1
180-                size = int(end) - offset + 1
181-            req.setResponseCode(http.PARTIAL_CONTENT)
182-            req.setHeader('content-range',"bytes %s-%s/%s" %
183-                          (str(offset), str(offset+size-1), str(filesize)))
184-            contentsize = size
185+            ranges = self.parse_range_header(rangeheader)
186+
187+            # ranges = None means the header didn't parse, so ignore
188+            # the header as if it didn't exist.  If is more than one
189+            # range, then just return the first for now, until we can
190+            # generate multipart/byteranges.
191+            if ranges is not None:
192+                first, last = ranges[0]
193+
194+                if first >= filesize:
195+                    raise WebError('First beyond end of file',
196+                                   http.REQUESTED_RANGE_NOT_SATISFIABLE)
197+                else:
198+                    first = max(0, first)
199+                    last = min(filesize-1, last)
200+
201+                    req.setResponseCode(http.PARTIAL_CONTENT)
202+                    req.setHeader('content-range',"bytes %s-%s/%s" %
203+                                  (str(first), str(last),
204+                                   str(filesize)))
205+                    contentsize = last - first + 1
206+                    size = contentsize
207+
208         req.setHeader("content-length", str(contentsize))
209         if req.method == "HEAD":
210             return ""
211hunk ./src/allmydata/web/filenode.py 447
212-        d = self.filenode.read(req, offset, size)
213+        d = self.filenode.read(req, first, size)
214         def _error(f):
215             if req.startedWriting:
216                 # The content-type is already set, and the response code has
217}
218
219Context:
220
221[setup: add licensing declaration for setuptools (noticed by the FSF compliance folks)
222zooko@zooko.com**20100309184415
223 Ignore-this: 2dfa7d812d65fec7c72ddbf0de609ccb
224]
225[setup: fix error in licensing declaration from Shawn Willden, as noted by the FSF compliance division
226zooko@zooko.com**20100309163736
227 Ignore-this: c0623d27e469799d86cabf67921a13f8
228]
229[CREDITS to Jacob Appelbaum
230zooko@zooko.com**20100304015616
231 Ignore-this: 70db493abbc23968fcc8db93f386ea54
232]
233[desert-island-build-with-proper-versions
234jacob@appelbaum.net**20100304013858]
235[docs: a few small edits to try to guide newcomers through the docs
236zooko@zooko.com**20100303231902
237 Ignore-this: a6aab44f5bf5ad97ea73e6976bc4042d
238 These edits were suggested by my watching over Jake Appelbaum's shoulder as he completely ignored/skipped/missed install.html and also as he decided that debian.txt wouldn't help him with basic installation. Then I threw in a few docs edits that have been sitting around in my sandbox asking to be committed for months.
239]
240[TAG allmydata-tahoe-1.6.1
241david-sarah@jacaranda.org**20100228062314
242 Ignore-this: eb5f03ada8ea953ee7780e7fe068539
243]
244Patch bundle hash:
24571df1d7c5d480486075c91c692f9569f9789c29a