1 | """Implementation of the deep stats class.""" |
---|
2 | |
---|
3 | import math |
---|
4 | |
---|
5 | from allmydata.interfaces import IImmutableFileNode |
---|
6 | from allmydata.interfaces import IMutableFileNode |
---|
7 | from allmydata.interfaces import IDirectoryNode |
---|
8 | from allmydata.unknown import UnknownNode |
---|
9 | from allmydata.uri import LiteralFileURI |
---|
10 | from allmydata.uri import from_string |
---|
11 | from allmydata.util import mathutil |
---|
12 | |
---|
13 | class DeepStats(object): |
---|
14 | """Deep stats object. |
---|
15 | |
---|
16 | Holds results of the deep-stats opetation. |
---|
17 | Used for json generation in the API.""" |
---|
18 | |
---|
19 | # Json API version. |
---|
20 | # Rules: |
---|
21 | # - increment each time a field is removed or changes meaning. |
---|
22 | # - it's ok to add a new field without incrementing the version. |
---|
23 | API_VERSION = 1 |
---|
24 | |
---|
25 | def __init__(self, origin): |
---|
26 | """Initializes DeepStats object. Sets most of the fields to 0.""" |
---|
27 | self.monitor = None |
---|
28 | self.origin = origin |
---|
29 | self.stats = { |
---|
30 | 'api-version': self.API_VERSION |
---|
31 | } |
---|
32 | for k in ["count-immutable-files", |
---|
33 | "count-mutable-files", |
---|
34 | "count-literal-files", |
---|
35 | "count-files", |
---|
36 | "count-directories", |
---|
37 | "count-unknown", |
---|
38 | "size-immutable-files", |
---|
39 | #"size-mutable-files", |
---|
40 | "size-literal-files", |
---|
41 | "size-directories", |
---|
42 | "largest-directory", |
---|
43 | "largest-directory-children", |
---|
44 | "largest-immutable-file", |
---|
45 | #"largest-mutable-file", |
---|
46 | ]: |
---|
47 | self.stats[k] = 0 |
---|
48 | self.histograms = {} |
---|
49 | for k in ["size-files-histogram"]: |
---|
50 | self.histograms[k] = {} # maps (min,max) to count |
---|
51 | self.buckets = [(0, 0), (1, 3)] |
---|
52 | self.root = math.sqrt(10) |
---|
53 | |
---|
54 | def set_monitor(self, monitor): |
---|
55 | self.monitor = monitor |
---|
56 | monitor.origin_si = self.origin.get_storage_index() |
---|
57 | monitor.set_status(self.get_results()) |
---|
58 | |
---|
59 | def add_node(self, node, childpath): |
---|
60 | if isinstance(node, UnknownNode): |
---|
61 | self.add("count-unknown") |
---|
62 | elif IDirectoryNode.providedBy(node): |
---|
63 | self.add("count-directories") |
---|
64 | elif IMutableFileNode.providedBy(node): |
---|
65 | self.add("count-files") |
---|
66 | self.add("count-mutable-files") |
---|
67 | # TODO: update the servermap, compute a size, add it to |
---|
68 | # size-mutable-files, max it into "largest-mutable-file" |
---|
69 | elif IImmutableFileNode.providedBy(node): # CHK and LIT |
---|
70 | self.add("count-files") |
---|
71 | size = node.get_size() |
---|
72 | self.histogram("size-files-histogram", size) |
---|
73 | theuri = from_string(node.get_uri()) |
---|
74 | if isinstance(theuri, LiteralFileURI): |
---|
75 | self.add("count-literal-files") |
---|
76 | self.add("size-literal-files", size) |
---|
77 | else: |
---|
78 | self.add("count-immutable-files") |
---|
79 | self.add("size-immutable-files", size) |
---|
80 | self.max("largest-immutable-file", size) |
---|
81 | |
---|
82 | def enter_directory(self, parent, children): |
---|
83 | dirsize_bytes = parent.get_size() |
---|
84 | if dirsize_bytes is not None: |
---|
85 | self.add("size-directories", dirsize_bytes) |
---|
86 | self.max("largest-directory", dirsize_bytes) |
---|
87 | dirsize_children = len(children) |
---|
88 | self.max("largest-directory-children", dirsize_children) |
---|
89 | |
---|
90 | def add(self, key, value=1): |
---|
91 | self.stats[key] += value |
---|
92 | |
---|
93 | def max(self, key, value): |
---|
94 | self.stats[key] = max(self.stats[key], value) |
---|
95 | |
---|
96 | def which_bucket(self, size): |
---|
97 | # return (min,max) such that min <= size <= max |
---|
98 | # values are from the set (0,0), (1,3), (4,10), (11,31), (32,100), |
---|
99 | # (101,316), (317, 1000), etc: two per decade |
---|
100 | assert size >= 0 |
---|
101 | i = 0 |
---|
102 | while True: |
---|
103 | if i >= len(self.buckets): |
---|
104 | # extend the list |
---|
105 | new_lower = self.buckets[i-1][1]+1 |
---|
106 | new_upper = int(mathutil.next_power_of_k(new_lower, self.root)) |
---|
107 | self.buckets.append((new_lower, new_upper)) |
---|
108 | maybe = self.buckets[i] |
---|
109 | if maybe[0] <= size <= maybe[1]: |
---|
110 | return maybe |
---|
111 | i += 1 |
---|
112 | |
---|
113 | def histogram(self, key, size): |
---|
114 | bucket = self.which_bucket(size) |
---|
115 | h = self.histograms[key] |
---|
116 | if bucket not in h: |
---|
117 | h[bucket] = 0 |
---|
118 | h[bucket] += 1 |
---|
119 | |
---|
120 | def get_results(self): |
---|
121 | stats = self.stats.copy() |
---|
122 | for key in self.histograms: |
---|
123 | h = self.histograms[key] |
---|
124 | out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ] |
---|
125 | out.sort() |
---|
126 | stats[key] = out |
---|
127 | return stats |
---|
128 | |
---|
129 | def finish(self): |
---|
130 | return self.get_results() |
---|