Context Navigation

← Previous Changeset
Next Changeset →

Changeset 67ad0175 in trunk

Timestamp:

2011-05-27T12:01:35Z (14 years ago)

Author:

wilcoxjg <wilcoxjg@…>

Branches:

master

Children:

ff136b8e

Parents:

d566e46

Message:

server.py: get_latencies now reports percentiles _only_ if there are sufficient observations for the interpretation of the percentile to be unambiguous.
interfaces.py: modified the return type of RIStatsProvider.get_stats to allow for None as a return value
NEWS.rst, stats.py: documentation of change to get_latencies
stats.rst: now documents percentile modification in get_latencies
test_storage.py: test_latencies now expects None in output categories that contain too few samples for the associated percentile to be unambiguously reported.
fixes #1392

Files:

: 5 edited

NEWS.rst (modified) (1 diff)
docs/stats.rst (modified) (4 diffs)
src/allmydata/interfaces.py (modified) (1 diff)
src/allmydata/storage/server.py (modified) (3 diffs)
src/allmydata/test/test_storage.py (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

TabularUnified NEWS.rst ¶

-                      rd566e46
+                      r67ad0175
 ==================================
+==================================
 User-Visible Changes in Tahoe-LAFS
 ==================================
+Release 1.9.0 (2011-??-??)
+--------------------------
+- Nodes now emit "None" for percentiles with higher implied precision
+  than the number of observations can support. Older stats gatherers
+  will throw an exception if they gather stats from a new storage
+  server and it sends a "None" for a percentile. (`#1392`_)
 Release 1.8.2 (2011-01-30)

TabularUnified docs/stats.rst ¶

-                      rd566e46
+                      r67ad0175
 ================
+================
 Tahoe Statistics
 ================
 …
     by client-only nodes which have been configured to not run a storage server
     (with [storage]enabled=false in tahoe.cfg)
     allocate, write, close, abort
         these are for immutable file uploads. 'allocate' is incremented when a
 …
         given number, and is the same threshold used by Amazon's
         internal SLA, according to the Dynamo paper).
+        Percentiles are only reported in the case of a sufficient
+        number of observations for unambiguous interpretation. For
+        example, the 99.9th percentile is (at the level of thousandths
+        precision) 9 thousandths greater than the 99th
+        percentile for sample sizes greater than or equal to 1000,
+        thus the 99.9th percentile is only reported for samples of 1000
+        or more observations.
 **counters.uploader.files_uploaded**
 …
     active_uploads
         how many files are currently being uploaded. 0 when idle.
     incoming_count
         how many cache files are present in the incoming/ directory,

TabularUnified src/allmydata/interfaces.py ¶

-                      rd566e46
+                      r67ad0175
         """
         returns a dictionary containing 'counters' and 'stats', each a
         dictionary with string counter/stat name keys, and numeric values.
+        dictionary with string counter/stat name keys, and numeric or None values.
         counters are monotonically increasing measures of work done, and
         stats are instantaneous measures (potentially time averaged
         internally)
         """
         return DictOf(str, DictOf(str, ChoiceOf(float, int, long)))
+        return DictOf(str, DictOf(str, ChoiceOf(float, int, long, None)))
 class RIStatsGatherer(RemoteInterface):

TabularUnified src/allmydata/storage/server.py ¶

-                      rd566e46
+                      r67ad0175
     def get_latencies(self):
         """Return a dict, indexed by category, that contains a dict of
+        latency numbers for each category. Each dict will contain the
+        latency numbers for each category. If there are sufficient samples
+        for unambiguous interpretation, each dict will contain the
         following keys: mean, 01_0_percentile, 10_0_percentile,
 _0_percentile (median), 90_0_percentile, 95_0_percentile,
+_0_percentile, 99_9_percentile. If no samples have been collected
+        for the given category, then that category name will not be present
+        in the return value."""
+_0_percentile, 99_9_percentile.  If there are insufficient
+        samples for a given percentile to be interpreted unambiguously
+        that percentile will be reported as None. If no samples have been
+        collected for the given category, then that category name will
+        not be present in the return value. """
         # note that Amazon's Dynamo paper says they use 99.9% percentile.
         output = {}
 …
             stats = {}
             samples = self.latencies[category][:]
+            count = len(samples)
+            stats["samplesize"] = count
             samples.sort()
+            count = len(samples)
+            stats["mean"] = sum(samples) / count
+            stats["01_0_percentile"] = samples[int(0.01 * count)]
+            stats["10_0_percentile"] = samples[int(0.1 * count)]
+            stats["50_0_percentile"] = samples[int(0.5 * count)]
+            stats["90_0_percentile"] = samples[int(0.9 * count)]
+            stats["95_0_percentile"] = samples[int(0.95 * count)]
+            stats["99_0_percentile"] = samples[int(0.99 * count)]
+            stats["99_9_percentile"] = samples[int(0.999 * count)]
+            if count > 1:
+                stats["mean"] = sum(samples) / count
+            else:
+                stats["mean"] = None
+            orderstatlist = [(0.01, "01_0_percentile", 100), (0.1, "10_0_percentile", 10),\
+                             (0.50, "50_0_percentile", 10), (0.90, "90_0_percentile", 10),\
+                             (0.95, "95_0_percentile", 20), (0.99, "99_0_percentile", 100),\
+                             (0.999, "99_9_percentile", 1000)]
+            for percentile, percentilestring, minnumtoobserve in orderstatlist:
+                if count >= minnumtoobserve:
+                    stats[percentilestring] = samples[int(percentile*count)]
+                else:
+                    stats[percentilestring] = None
             output[category] = stats
         return output
 …
                 level=log.SCARY, umid="SGx2fA")
         return None

TabularUnified src/allmydata/test/test_storage.py ¶

-                      rd566e46
+                      r67ad0175
         for i in range(1000):
             ss.add_latency("renew", 1.0 * i)
+        for i in range(20):
+            ss.add_latency("write", 1.0 * i)
         for i in range(10):
             ss.add_latency("cancel", 2.0 * i)
 …
         self.failUnlessEqual(sorted(output.keys()),
                              sorted(["allocate", "renew", "cancel", "get"]))
+                             sorted(["allocate", "renew", "cancel", "write", "get"]))
         self.failUnlessEqual(len(ss.latencies["allocate"]), 1000)
         self.failUnless(abs(output["allocate"]["mean"] - 9500) < 1, output)
 …
         self.failUnless(abs(output["renew"]["99_9_percentile"] - 999) < 1, output)
+        self.failUnlessEqual(len(ss.latencies["write"]), 20)
+        self.failUnless(abs(output["write"]["mean"] - 9) < 1, output)
+        self.failUnless(output["write"]["01_0_percentile"] is None, output)
+        self.failUnless(abs(output["write"]["10_0_percentile"] -  2) < 1, output)
+        self.failUnless(abs(output["write"]["50_0_percentile"] - 10) < 1, output)
+        self.failUnless(abs(output["write"]["90_0_percentile"] - 18) < 1, output)
+        self.failUnless(abs(output["write"]["95_0_percentile"] - 19) < 1, output)
+        self.failUnless(output["write"]["99_0_percentile"] is None, output)
+        self.failUnless(output["write"]["99_9_percentile"] is None, output)
         self.failUnlessEqual(len(ss.latencies["cancel"]), 10)
         self.failUnless(abs(output["cancel"]["mean"] - 9) < 1, output)
         self.failUnless(abs(output["cancel"]["01_0_percentile"] -  0) < 1, output)
+        self.failUnless(output["cancel"]["01_0_percentile"] is None, output)
         self.failUnless(abs(output["cancel"]["10_0_percentile"] -  2) < 1, output)
         self.failUnless(abs(output["cancel"]["50_0_percentile"] - 10) < 1, output)
         self.failUnless(abs(output["cancel"]["90_0_percentile"] - 18) < 1, output)
         self.failUnless(abs(output["cancel"]["95_0_percentile"] - 18) < 1, output)
         self.failUnless(abs(output["cancel"]["99_0_percentile"] - 18) < 1, output)
         self.failUnless(abs(output["cancel"]["99_9_percentile"] - 18) < 1, output)
+        self.failUnless(output["cancel"]["95_0_percentile"] is None, output)
+        self.failUnless(output["cancel"]["99_0_percentile"] is None, output)
+        self.failUnless(output["cancel"]["99_9_percentile"] is None, output)
         self.failUnlessEqual(len(ss.latencies["get"]), 1)
         self.failUnless(abs(output["get"]["mean"] - 5) < 1, output)
         self.failUnless(abs(output["get"]["01_0_percentile"] - 5) < 1, output)
         self.failUnless(abs(output["get"]["10_0_percentile"] - 5) < 1, output)
         self.failUnless(abs(output["get"]["50_0_percentile"] - 5) < 1, output)
         self.failUnless(abs(output["get"]["90_0_percentile"] - 5) < 1, output)
         self.failUnless(abs(output["get"]["95_0_percentile"] - 5) < 1, output)
         self.failUnless(abs(output["get"]["99_0_percentile"] - 5) < 1, output)
         self.failUnless(abs(output["get"]["99_9_percentile"] - 5) < 1, output)
+        self.failUnless(output["get"]["mean"] is None, output)
+        self.failUnless(output["get"]["01_0_percentile"] is None, output)
+        self.failUnless(output["get"]["10_0_percentile"] is None, output)
+        self.failUnless(output["get"]["50_0_percentile"] is None, output)
+        self.failUnless(output["get"]["90_0_percentile"] is None, output)
+        self.failUnless(output["get"]["95_0_percentile"] is None, output)
+        self.failUnless(output["get"]["99_0_percentile"] is None, output)
+        self.failUnless(output["get"]["99_9_percentile"] is None, output)
 def remove_tags(s):

Note: See TracChangeset for help on using the changeset viewer.