| 1 | #! /usr/bin/env python |
|---|
| 2 | |
|---|
| 3 | import random, math, re |
|---|
| 4 | from twisted.python import usage |
|---|
| 5 | |
|---|
| 6 | class Args(usage.Options): |
|---|
| 7 | optParameters = [ |
|---|
| 8 | ["mode", "m", "alpha", "validation scheme"], |
|---|
| 9 | ["arity", "k", 2, "k (airty) for hash tree"], |
|---|
| 10 | ] |
|---|
| 11 | def opt_arity(self, option): |
|---|
| 12 | self['arity'] = int(option) |
|---|
| 13 | def parseArgs(self, *args): |
|---|
| 14 | if len(args) > 0: |
|---|
| 15 | self['mode'] = args[0] |
|---|
| 16 | |
|---|
| 17 | |
|---|
| 18 | def charttest(): |
|---|
| 19 | import gdchart |
|---|
| 20 | sizes = [random.randrange(10, 20) for i in range(10)] |
|---|
| 21 | x = gdchart.Line() |
|---|
| 22 | x.width = 250 |
|---|
| 23 | x.height = 250 |
|---|
| 24 | x.xtitle = "sample" |
|---|
| 25 | x.ytitle = "size" |
|---|
| 26 | x.title = "Example Graph" |
|---|
| 27 | #x.ext_color = [ "white", "yellow", "red", "blue", "green"] |
|---|
| 28 | x.setData(sizes) |
|---|
| 29 | #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"]) |
|---|
| 30 | x.draw("simple.png") |
|---|
| 31 | |
|---|
| 32 | KiB=1024 |
|---|
| 33 | MiB=1024*KiB |
|---|
| 34 | GiB=1024*MiB |
|---|
| 35 | TiB=1024*GiB |
|---|
| 36 | PiB=1024*TiB |
|---|
| 37 | |
|---|
| 38 | class Sizes: |
|---|
| 39 | def __init__(self, mode, file_size, arity=2): |
|---|
| 40 | MAX_SEGSIZE = 128*KiB |
|---|
| 41 | self.mode = mode |
|---|
| 42 | self.file_size = file_size |
|---|
| 43 | self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size) |
|---|
| 44 | self.num_segs = num_segs = math.ceil(file_size / seg_size) |
|---|
| 45 | self.num_blocks = num_blocks = num_segs |
|---|
| 46 | |
|---|
| 47 | self.num_shares = num_shares = 10 |
|---|
| 48 | self.shares_needed = shares_needed = 3 |
|---|
| 49 | |
|---|
| 50 | self.block_size = block_size = seg_size / shares_needed |
|---|
| 51 | self.share_size = share_size = block_size * num_blocks |
|---|
| 52 | |
|---|
| 53 | # none of this includes the share-level hash chain yet, since that is |
|---|
| 54 | # only a function of the number of shares. All overhead numbers |
|---|
| 55 | # assume that the share-level hash chain has already been sent, |
|---|
| 56 | # including the root of the block-level hash tree. |
|---|
| 57 | |
|---|
| 58 | if mode == "alpha": |
|---|
| 59 | # no hash tree at all |
|---|
| 60 | self.block_arity = 0 |
|---|
| 61 | self.block_tree_depth = 0 |
|---|
| 62 | self.block_overhead = 0 |
|---|
| 63 | self.bytes_until_some_data = 32 + share_size |
|---|
| 64 | self.share_storage_overhead = 0 |
|---|
| 65 | self.share_transmission_overhead = 0 |
|---|
| 66 | |
|---|
| 67 | elif mode == "beta": |
|---|
| 68 | # k=num_blocks, d=1 |
|---|
| 69 | # each block has a 32-byte hash |
|---|
| 70 | self.block_arity = num_blocks |
|---|
| 71 | self.block_tree_depth = 1 |
|---|
| 72 | self.block_overhead = 32 |
|---|
| 73 | # the share has a list of hashes, one for each block |
|---|
| 74 | self.share_storage_overhead = (self.block_overhead * |
|---|
| 75 | num_blocks) |
|---|
| 76 | # we can get away with not sending the hash of the share that |
|---|
| 77 | # we're sending in full, once |
|---|
| 78 | self.share_transmission_overhead = self.share_storage_overhead - 32 |
|---|
| 79 | # we must get the whole list (so it can be validated) before |
|---|
| 80 | # any data can be validated |
|---|
| 81 | self.bytes_until_some_data = (self.share_transmission_overhead + |
|---|
| 82 | block_size) |
|---|
| 83 | |
|---|
| 84 | elif mode == "gamma": |
|---|
| 85 | self.block_arity = k = arity |
|---|
| 86 | d = math.ceil(math.log(num_blocks, k)) |
|---|
| 87 | self.block_tree_depth = d |
|---|
| 88 | num_leaves = k ** d |
|---|
| 89 | # to make things easier, we make the pessimistic assumption that |
|---|
| 90 | # we have to store hashes for all the empty places in the tree |
|---|
| 91 | # (when the number of shares is not an exact exponent of k) |
|---|
| 92 | self.block_overhead = 32 |
|---|
| 93 | # the block hashes are organized into a k-ary tree, which |
|---|
| 94 | # means storing (and eventually transmitting) more hashes. This |
|---|
| 95 | # count includes all the low-level share hashes and the root. |
|---|
| 96 | hash_nodes = (num_leaves*k - 1) / (k - 1) |
|---|
| 97 | #print("hash_depth", d) |
|---|
| 98 | #print("num_leaves", num_leaves) |
|---|
| 99 | #print("hash_nodes", hash_nodes) |
|---|
| 100 | # the storage overhead is this |
|---|
| 101 | self.share_storage_overhead = 32 * (hash_nodes - 1) |
|---|
| 102 | # the transmission overhead is smaller: if we actually transmit |
|---|
| 103 | # every block, we don't have to transmit 1/k of the |
|---|
| 104 | # lowest-level block hashes, and we don't have to transmit the |
|---|
| 105 | # root because it was already sent with the share-level hash tree |
|---|
| 106 | self.share_transmission_overhead = 32 * (hash_nodes |
|---|
| 107 | - 1 # the root |
|---|
| 108 | - num_leaves / k) |
|---|
| 109 | # we must get a full sibling hash chain before we can validate |
|---|
| 110 | # any data |
|---|
| 111 | sibling_length = d * (k-1) |
|---|
| 112 | self.bytes_until_some_data = 32 * sibling_length + block_size |
|---|
| 113 | |
|---|
| 114 | else: |
|---|
| 115 | raise ValueError("unknown mode '%s" % mode) |
|---|
| 116 | |
|---|
| 117 | self.storage_overhead = self.share_storage_overhead * num_shares |
|---|
| 118 | self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size |
|---|
| 119 | |
|---|
| 120 | def dump(self): |
|---|
| 121 | for k in ("mode", "file_size", "seg_size", |
|---|
| 122 | "num_segs", "num_blocks", "num_shares", "shares_needed", |
|---|
| 123 | "block_size", "share_size", |
|---|
| 124 | "block_arity", "block_tree_depth", |
|---|
| 125 | "block_overhead", |
|---|
| 126 | "share_storage_overhead", "share_transmission_overhead", |
|---|
| 127 | "storage_overhead", "storage_overhead_percentage", |
|---|
| 128 | "bytes_until_some_data"): |
|---|
| 129 | print(k, getattr(self, k)) |
|---|
| 130 | |
|---|
| 131 | def fmt(num, trim=False): |
|---|
| 132 | if num < KiB: |
|---|
| 133 | #s = str(num) + "#" |
|---|
| 134 | s = "%.2f#" % num |
|---|
| 135 | elif num < MiB: |
|---|
| 136 | s = "%.2fk" % (num / KiB) |
|---|
| 137 | elif num < GiB: |
|---|
| 138 | s = "%.2fM" % (num / MiB) |
|---|
| 139 | elif num < TiB: |
|---|
| 140 | s = "%.2fG" % (num / GiB) |
|---|
| 141 | elif num < PiB: |
|---|
| 142 | s = "%.2fT" % (num / TiB) |
|---|
| 143 | else: |
|---|
| 144 | s = "big" |
|---|
| 145 | if trim: |
|---|
| 146 | s = re.sub(r'(\.0+)([kMGT#])', |
|---|
| 147 | lambda m: m.group(2), |
|---|
| 148 | s) |
|---|
| 149 | else: |
|---|
| 150 | s = re.sub(r'(\.0+)([kMGT#])', |
|---|
| 151 | lambda m: (" "*len(m.group(1))+m.group(2)), |
|---|
| 152 | s) |
|---|
| 153 | if s.endswith("#"): |
|---|
| 154 | s = s[:-1] + " " |
|---|
| 155 | return s |
|---|
| 156 | |
|---|
| 157 | def text(): |
|---|
| 158 | opts = Args() |
|---|
| 159 | opts.parseOptions() |
|---|
| 160 | mode = opts["mode"] |
|---|
| 161 | arity = opts["arity"] |
|---|
| 162 | # 0123456789012345678901234567890123456789012345678901234567890123456 |
|---|
| 163 | print("mode=%s" % mode, " arity=%d" % arity) |
|---|
| 164 | print(" storage storage") |
|---|
| 165 | print("Size sharesize overhead overhead k d alacrity") |
|---|
| 166 | print(" (bytes) (%)") |
|---|
| 167 | print("------- ------- -------- -------- ---- -- --------") |
|---|
| 168 | #sizes = [2 ** i for i in range(7, 41)] |
|---|
| 169 | #radix = math.sqrt(10); expstep = 2 |
|---|
| 170 | radix = 2; expstep = 2 |
|---|
| 171 | #radix = 10; expstep = 1 |
|---|
| 172 | maxexp = int(math.ceil(math.log(1e12, radix)))+2 |
|---|
| 173 | sizes = [radix ** i for i in range(2,maxexp,expstep)] |
|---|
| 174 | for file_size in sizes: |
|---|
| 175 | s = Sizes(mode, file_size, arity) |
|---|
| 176 | out = "" |
|---|
| 177 | out += "%7s " % fmt(file_size, trim=True) |
|---|
| 178 | out += "%7s " % fmt(s.share_size) |
|---|
| 179 | out += "%8s" % fmt(s.storage_overhead) |
|---|
| 180 | out += "%10.2f " % s.storage_overhead_percentage |
|---|
| 181 | out += " %4d" % int(s.block_arity) |
|---|
| 182 | out += " %2d" % int(s.block_tree_depth) |
|---|
| 183 | out += " %8s" % fmt(s.bytes_until_some_data) |
|---|
| 184 | print(out) |
|---|
| 185 | |
|---|
| 186 | |
|---|
| 187 | def graph(): |
|---|
| 188 | # doesn't work yet |
|---|
| 189 | import Gnuplot |
|---|
| 190 | opts = Args() |
|---|
| 191 | opts.parseOptions() |
|---|
| 192 | mode = opts["mode"] |
|---|
| 193 | arity = opts["arity"] |
|---|
| 194 | g = Gnuplot.Gnuplot(debug=1) |
|---|
| 195 | g.title("overhead / alacrity tradeoffs") |
|---|
| 196 | g.xlabel("file size") |
|---|
| 197 | g.ylabel("stuff") |
|---|
| 198 | sizes = [2 ** i for i in range(7, 32)] |
|---|
| 199 | series = {"overhead": {}, "alacrity": {}} |
|---|
| 200 | for file_size in sizes: |
|---|
| 201 | s = Sizes(mode, file_size, arity) |
|---|
| 202 | series["overhead"][file_size] = s.storage_overhead_percentage |
|---|
| 203 | series["alacrity"][file_size] = s.bytes_until_some_data |
|---|
| 204 | g.plot([ (fs, series["overhead"][fs]) |
|---|
| 205 | for fs in sizes ]) |
|---|
| 206 | input("press return") |
|---|
| 207 | |
|---|
| 208 | |
|---|
| 209 | if __name__ == '__main__': |
|---|
| 210 | text() |
|---|
| 211 | #graph() |
|---|