New patches: [Improve HTTP/1.1 byterange handling Jeremy Fitzhardinge **20100310025913 Ignore-this: 6d69e694973d618f0dc65983735cd9be Fix parsing of a Range: header to support: - multiple ranges (parsed, but not returned) - suffix byte ranges ("-2139") - correct handling of incorrectly formatted range headers (correct behaviour is to ignore the header and return the full file) - return appropriate error for ranges outside the file Multiple ranges are parsed, but only the first range is returned. Returning multiple ranges requires using the multipart/byterange content type. ] { hunk ./src/allmydata/test/test_web.py 582 d.addCallback(_got) return d + def test_GET_FILEURL_partial_end_range(self): + headers = {"range": "bytes=-5"} + length = len(self.BAR_CONTENTS) + d = self.GET(self.public_url + "/foo/bar.txt", headers=headers, + return_response=True) + def _got((res, status, headers)): + self.failUnlessEqual(int(status), 206) + self.failUnless(headers.has_key("content-range")) + self.failUnlessEqual(headers["content-range"][0], + "bytes %d-%d/%d" % (length-5, length-1, length)) + self.failUnlessEqual(res, self.BAR_CONTENTS[-5:]) + d.addCallback(_got) + return d + + def test_GET_FILEURL_partial_range_overrun(self): + headers = {"range": "bytes=100-200"} + length = len(self.BAR_CONTENTS) + d = self.shouldFail2(error.Error, "test_GET_FILEURL_range_overrun", + "416 Requested Range not satisfiable", + "First beyond end of file", + self.GET, self.public_url + "/foo/bar.txt", + headers=headers) + return d + def test_HEAD_FILEURL_range(self): headers = {"range": "bytes=1-10"} d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers, hunk ./src/allmydata/test/test_web.py 632 d.addCallback(_got) return d + def test_HEAD_FILEURL_partial_end_range(self): + headers = {"range": "bytes=-5"} + length = len(self.BAR_CONTENTS) + d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers, + return_response=True) + def _got((res, status, headers)): + self.failUnlessEqual(int(status), 206) + self.failUnless(headers.has_key("content-range")) + self.failUnlessEqual(headers["content-range"][0], + "bytes %d-%d/%d" % (length-5, length-1, length)) + d.addCallback(_got) + return d + + def test_HEAD_FILEURL_partial_range_overrun(self): + headers = {"range": "bytes=100-200"} + length = len(self.BAR_CONTENTS) + d = self.shouldFail2(error.Error, "test_HEAD_FILEURL_range_overrun", + "416 Requested Range not satisfiable", + "", + self.HEAD, self.public_url + "/foo/bar.txt", + headers=headers) + return d + def test_GET_FILEURL_range_bad(self): headers = {"range": "BOGUS=fizbop-quarnak"} hunk ./src/allmydata/test/test_web.py 657 - d = self.shouldFail2(error.Error, "test_GET_FILEURL_range_bad", - "400 Bad Request", - "Syntactically invalid http range header", - self.GET, self.public_url + "/foo/bar.txt", - headers=headers) + d = self.GET(self.public_url + "/foo/bar.txt", headers=headers, + return_response=True) + def _got((res, status, headers)): + self.failUnlessEqual(int(status), 200) + self.failUnless(not headers.has_key("content-range")) + self.failUnlessEqual(res, self.BAR_CONTENTS) + d.addCallback(_got) return d def test_HEAD_FILEURL(self): hunk ./src/allmydata/web/filenode.py 338 self.filenode = filenode self.filename = filename + def parse_range_header(self, range): + # Parse a byte ranges according to RFC 2616 "14.35.1 Byte + # Ranges". Returns None if the range doesn't make sense so it + # can be ignored (per the spec). When successful, returns a + # list of (first,last) inclusive range tuples. + + filesize = self.filenode.get_size() + assert isinstance(filesize, (int,long)), filesize + + try: + # byte-ranges-specifier + units, rangeset = range.split('=', 1) + if units != 'bytes': + return None # nothing else supported + + def parse_range(r): + first, last = r.split('-', 1) + + if first is '': + # suffix-byte-range-spec + first = filesize - long(last) + last = filesize - 1 + else: + # byte-range-spec + + # first-byte-pos + first = long(first) + + # last-byte-pos + if last is '': + last = filesize - 1 + else: + last = long(last) + + if last < first: + raise ValueError + + return (first, last) + + # byte-range-set + # + # Note: the spec uses "1#" for the list of ranges, which + # implicitly allows whitespace around the ',' separators, + # so strip it. + return [ parse_range(r.strip()) for r in rangeset.split(',') ] + except ValueError: + return None + def renderHTTP(self, ctx): req = IRequest(ctx) gte = static.getTypeAndEncoding hunk ./src/allmydata/web/filenode.py 407 filesize = self.filenode.get_size() assert isinstance(filesize, (int,long)), filesize - offset, size = 0, None + first, size = 0, None contentsize = filesize req.setHeader("accept-ranges", "bytes") if not self.filenode.is_mutable(): hunk ./src/allmydata/web/filenode.py 421 # or maybe just use the URI for CHK and LIT. rangeheader = req.getHeader('range') if rangeheader: - # adapted from nevow.static.File - bytesrange = rangeheader.split('=') - if bytesrange[0] != 'bytes': - raise WebError("Syntactically invalid http range header!") - start, end = bytesrange[1].split('-') - if start: - offset = int(start) - if not end: - # RFC 2616 says: - # - # "If the last-byte-pos value is absent, or if the value is - # greater than or equal to the current length of the - # entity-body, last-byte-pos is taken to be equal to one less - # than the current length of the entity- body in bytes." - end = filesize - 1 - size = int(end) - offset + 1 - req.setResponseCode(http.PARTIAL_CONTENT) - req.setHeader('content-range',"bytes %s-%s/%s" % - (str(offset), str(offset+size-1), str(filesize))) - contentsize = size + ranges = self.parse_range_header(rangeheader) + + # ranges = None means the header didn't parse, so ignore + # the header as if it didn't exist. If is more than one + # range, then just return the first for now, until we can + # generate multipart/byteranges. + if ranges is not None: + first, last = ranges[0] + + if first >= filesize: + raise WebError('First beyond end of file', + http.REQUESTED_RANGE_NOT_SATISFIABLE) + else: + first = max(0, first) + last = min(filesize-1, last) + + req.setResponseCode(http.PARTIAL_CONTENT) + req.setHeader('content-range',"bytes %s-%s/%s" % + (str(first), str(last), + str(filesize))) + contentsize = last - first + 1 + size = contentsize + req.setHeader("content-length", str(contentsize)) if req.method == "HEAD": return "" hunk ./src/allmydata/web/filenode.py 447 - d = self.filenode.read(req, offset, size) + d = self.filenode.read(req, first, size) def _error(f): if req.startedWriting: # The content-type is already set, and the response code has } Context: [setup: add licensing declaration for setuptools (noticed by the FSF compliance folks) zooko@zooko.com**20100309184415 Ignore-this: 2dfa7d812d65fec7c72ddbf0de609ccb ] [setup: fix error in licensing declaration from Shawn Willden, as noted by the FSF compliance division zooko@zooko.com**20100309163736 Ignore-this: c0623d27e469799d86cabf67921a13f8 ] [CREDITS to Jacob Appelbaum zooko@zooko.com**20100304015616 Ignore-this: 70db493abbc23968fcc8db93f386ea54 ] [desert-island-build-with-proper-versions jacob@appelbaum.net**20100304013858] [docs: a few small edits to try to guide newcomers through the docs zooko@zooko.com**20100303231902 Ignore-this: a6aab44f5bf5ad97ea73e6976bc4042d These edits were suggested by my watching over Jake Appelbaum's shoulder as he completely ignored/skipped/missed install.html and also as he decided that debian.txt wouldn't help him with basic installation. Then I threw in a few docs edits that have been sitting around in my sandbox asking to be committed for months. ] [TAG allmydata-tahoe-1.6.1 david-sarah@jacaranda.org**20100228062314 Ignore-this: eb5f03ada8ea953ee7780e7fe068539 ] Patch bundle hash: 71df1d7c5d480486075c91c692f9569f9789c29a