Changeset efafcfb in trunk


Ignore:
Timestamp:
2009-07-05T02:51:09Z (16 years ago)
Author:
Zooko O'Whielacronx <zooko@…>
Branches:
master
Children:
c678e8c
Parents:
4206a2c
Message:

directories: keep track of your position as you decode netstring after netstring from an input buffer instead of copying the trailing part
This makes decoding linear in the number of netstrings instead of O(N2).

Location:
src/allmydata
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • TabularUnified src/allmydata/dirnode.py

    r4206a2c refafcfb  
    205205        # cleartext. The 'name' is UTF-8 encoded. The rwcap is formatted as:
    206206        # pack("16ss32s", iv, AES(H(writekey+iv), plaintextrwcap), mac)
    207         assert isinstance(data, str)
     207        assert isinstance(data, str), (repr(data), type(data))
    208208        # an empty directory is serialized as an empty string
    209209        if data == "":
     
    211211        writeable = not self.is_readonly()
    212212        children = {}
    213         while len(data) > 0:
    214             entry, data = split_netstring(data, 1, True)
    215             name, rocap, rwcapdata, metadata_s = split_netstring(entry, 4)
     213        position = 0
     214        while position < len(data):
     215            entries, position = split_netstring(data, 1, position)
     216            (name, rocap, rwcapdata, metadata_s), subpos = split_netstring(entries[0], 4)
    216217            name = name.decode("utf-8")
    217218            rwcap = None
  • TabularUnified src/allmydata/test/test_netstring.py

    r4206a2c refafcfb  
    66    def test_split(self):
    77        a = netstring("hello") + netstring("world")
    8         self.failUnlessEqual(split_netstring(a, 2), ("hello", "world"))
    9         self.failUnlessEqual(split_netstring(a, 2, False), ("hello", "world"))
    10         self.failUnlessEqual(split_netstring(a, 2, True),
    11                              ("hello", "world", ""))
     8        self.failUnlessEqual(split_netstring(a, 2), (["hello", "world"], len(a)))
     9        self.failUnlessEqual(split_netstring(a, 2, required_trailer=""), (["hello", "world"], len(a)))
    1210        self.failUnlessRaises(ValueError, split_netstring, a, 3)
    13         self.failUnlessRaises(ValueError, split_netstring, a+" extra", 2)
    14         self.failUnlessRaises(ValueError, split_netstring, a+" extra", 2, False)
     11        self.failUnlessRaises(ValueError, split_netstring, a+" extra", 2, required_trailer="")
     12        self.failUnlessEqual(split_netstring(a+" extra", 2), (["hello", "world"], len(a)))
    1513        self.failUnlessEqual(split_netstring(a+"++", 2, required_trailer="++"),
    16                              ("hello", "world"))
     14                             (["hello", "world"], len(a)+2))
    1715        self.failUnlessRaises(ValueError,
    1816                              split_netstring, a+"+", 2, required_trailer="not")
     
    2018    def test_extra(self):
    2119        a = netstring("hello")
    22         self.failUnlessEqual(split_netstring(a, 1, True), ("hello", ""))
     20        self.failUnlessEqual(split_netstring(a, 1), (["hello"], len(a)))
    2321        b = netstring("hello") + "extra stuff"
    24         self.failUnlessEqual(split_netstring(b, 1, True),
    25                              ("hello", "extra stuff"))
     22        self.failUnlessEqual(split_netstring(b, 1),
     23                             (["hello"], len(a)))
    2624
    2725    def test_nested(self):
    2826        a = netstring("hello") + netstring("world") + "extra stuff"
    2927        b = netstring("a") + netstring("is") + netstring(a) + netstring(".")
    30         top = split_netstring(b, 4)
     28        (top, pos) = split_netstring(b, 4)
    3129        self.failUnlessEqual(len(top), 4)
    3230        self.failUnlessEqual(top[0], "a")
     
    3432        self.failUnlessEqual(top[2], a)
    3533        self.failUnlessEqual(top[3], ".")
    36         self.failUnlessRaises(ValueError, split_netstring, a, 2)
    37         self.failUnlessRaises(ValueError, split_netstring, a, 2, False)
    38         bottom = split_netstring(a, 2, True)
    39         self.failUnlessEqual(bottom, ("hello", "world", "extra stuff"))
    40 
    41 
     34        self.failUnlessRaises(ValueError, split_netstring, a, 2, required_trailer="")
     35        bottom = split_netstring(a, 2)
     36        self.failUnlessEqual(bottom, (["hello", "world"], len(netstring("hello")+netstring("world"))))
  • TabularUnified src/allmydata/util/netstring.py

    r4206a2c refafcfb  
    66
    77def split_netstring(data, numstrings,
    8                     allow_leftover=False,
    9                     required_trailer=""):
    10     """like string.split(), but extracts netstrings. If allow_leftover=False,
    11     I return numstrings elements, and throw ValueError if there was leftover
    12     data that does not exactly equal 'required_trailer'. If
    13     allow_leftover=True, required_trailer must be empty, and I return
    14     numstrings+1 elements, in which the last element is the leftover data
    15     (possibly an empty string)"""
     8                    position=0,
     9                    required_trailer=None):
     10    """like string.split(), but extracts netstrings. Ignore all bytes of data
     11    before the 'position' byte. Return a tuple of (list of elements (numstrings
     12    in length), new position index). The new position index points to the first
     13    byte which was not consumed (the 'required_trailer', if any, counts as
     14    consumed).  If 'required_trailer' is not None, throw ValueError if leftover
     15    data does not exactly equal 'required_trailer'."""
    1616
    17     assert not (allow_leftover and required_trailer)
    18 
     17    assert type(position) in (int, long), (repr(position), type(position))
    1918    elements = []
    2019    assert numstrings >= 0
    21     while data:
    22         colon = data.index(":")
    23         length = int(data[:colon])
     20    while position < len(data):
     21        colon = data.index(":", position)
     22        length = int(data[position:colon])
    2423        string = data[colon+1:colon+1+length]
    25         assert len(string) == length
     24        assert len(string) == length, (len(string), length)
    2625        elements.append(string)
    27         assert data[colon+1+length] == ","
    28         data = data[colon+1+length+1:]
     26        position = colon+1+length
     27        assert data[position] == ",", position
     28        position += 1
    2929        if len(elements) == numstrings:
    3030            break
    3131    if len(elements) < numstrings:
    3232        raise ValueError("ran out of netstrings")
    33     if allow_leftover:
    34         return tuple(elements + [data])
    35     if data != required_trailer:
    36         raise ValueError("leftover data in netstrings")
    37     return tuple(elements)
    38 
     33    if required_trailer is not None:
     34        if ((len(data) - position) != len(required_trailer)) or (data[position:] != required_trailer):
     35            raise ValueError("leftover data in netstrings")
     36        return (elements, position + len(required_trailer))
     37    else:
     38        return (elements, position)
Note: See TracChangeset for help on using the changeset viewer.