| 1 | #!/usr/bin/env python |
|---|
| 2 | |
|---|
| 3 | # retrieve a latency statistic for a given operation and percentile from a |
|---|
| 4 | # set of storage servers. |
|---|
| 5 | |
|---|
| 6 | # the OPERATION value should come from the following list: |
|---|
| 7 | # allocate: allocate_buckets, first step to upload an immutable file |
|---|
| 8 | # write: write data to an immutable share |
|---|
| 9 | # close: finish writing to an immutable share |
|---|
| 10 | # cancel: abandon a partial immutable share |
|---|
| 11 | # get: get_buckets, first step to download an immutable file |
|---|
| 12 | # read: read data from an immutable share |
|---|
| 13 | # writev: slot_testv_and_readv_and_writev, modify/create a directory |
|---|
| 14 | # readv: read a directory (or mutable file) |
|---|
| 15 | |
|---|
| 16 | # the PERCENTILE value should come from the following list: |
|---|
| 17 | # 01_0: 1% |
|---|
| 18 | # 10_0: 10% |
|---|
| 19 | # 50_0: 50% (median) |
|---|
| 20 | # 90_0: 90% |
|---|
| 21 | # 99_0: 99% |
|---|
| 22 | # 99_9: 99.9% |
|---|
| 23 | # mean: |
|---|
| 24 | |
|---|
| 25 | # To use this, create a symlink from |
|---|
| 26 | # /etc/munin/plugins/tahoe_server_latency_OPERATION_PERCENTILE to this |
|---|
| 27 | # script. For example: |
|---|
| 28 | |
|---|
| 29 | # ln -s /usr/share/munin/plugins/tahoe_server_latency_ \ |
|---|
| 30 | # /etc/munin/plugins/tahoe_server_latency_allocate_99_9 |
|---|
| 31 | |
|---|
| 32 | # Also, you will need to put a list of node statistics URLs in the plugin's |
|---|
| 33 | # environment, by adding a stanza like the following to a file in |
|---|
| 34 | # /etc/munin/plugin-conf.d/, such as /etc/munin/plugin-conf.d/tahoe_latencies: |
|---|
| 35 | # |
|---|
| 36 | # [tahoe_server_latency*] |
|---|
| 37 | # env.url_storage1 http://localhost:9011/statistics?t=json |
|---|
| 38 | # env.url_storage2 http://localhost:9012/statistics?t=json |
|---|
| 39 | # env.url_storage3 http://localhost:9013/statistics?t=json |
|---|
| 40 | # env.url_storage4 http://localhost:9014/statistics?t=json |
|---|
| 41 | |
|---|
| 42 | # of course, these URLs must match the webports you have configured into the |
|---|
| 43 | # storage nodes. |
|---|
| 44 | |
|---|
| 45 | |
|---|
| 46 | import os, sys |
|---|
| 47 | import urllib |
|---|
| 48 | import json |
|---|
| 49 | |
|---|
| 50 | node_urls = [] |
|---|
| 51 | for k,v in os.environ.items(): |
|---|
| 52 | if k.startswith("url_"): |
|---|
| 53 | nodename = k[len("url_"):] |
|---|
| 54 | node_urls.append( (nodename, v) ) |
|---|
| 55 | node_urls.sort() |
|---|
| 56 | |
|---|
| 57 | my_name = os.path.basename(sys.argv[0]) |
|---|
| 58 | PREFIX = "tahoe_server_latency_" |
|---|
| 59 | assert my_name.startswith(PREFIX) |
|---|
| 60 | my_name = my_name[len(PREFIX):] |
|---|
| 61 | (operation, percentile) = my_name.split("_", 1) |
|---|
| 62 | if percentile == "mean": |
|---|
| 63 | what = "mean" |
|---|
| 64 | else: |
|---|
| 65 | what = percentile.replace("_", ".") + "th percentile" |
|---|
| 66 | |
|---|
| 67 | configinfo = \ |
|---|
| 68 | """graph_title Tahoe Server '%(operation)s' Latency (%(what)s) |
|---|
| 69 | graph_vlabel seconds |
|---|
| 70 | graph_category tahoe |
|---|
| 71 | graph_info This graph shows how long '%(operation)s' operations took on the storage server, the %(what)s delay between message receipt and response generation, calculated over the last thousand operations. |
|---|
| 72 | """ % {'operation': operation, |
|---|
| 73 | 'what': what} |
|---|
| 74 | |
|---|
| 75 | for nodename, url in node_urls: |
|---|
| 76 | configinfo += "%s.label %s\n" % (nodename, nodename) |
|---|
| 77 | configinfo += "%s.draw LINE2\n" % (nodename,) |
|---|
| 78 | |
|---|
| 79 | |
|---|
| 80 | if len(sys.argv) > 1: |
|---|
| 81 | if sys.argv[1] == "config": |
|---|
| 82 | print(configinfo.rstrip()) |
|---|
| 83 | sys.exit(0) |
|---|
| 84 | |
|---|
| 85 | for nodename, url in node_urls: |
|---|
| 86 | data = json.loads(urllib.urlopen(url).read()) |
|---|
| 87 | if percentile == "mean": |
|---|
| 88 | p_key = "mean" |
|---|
| 89 | else: |
|---|
| 90 | p_key = percentile + "_percentile" |
|---|
| 91 | key = "storage_server.latencies.%s.%s" % (operation, p_key) |
|---|
| 92 | value = data["stats"][key] |
|---|
| 93 | print("%s.value %s" % (nodename, value)) |
|---|
| 94 | |
|---|