source file: /home/buildslave/tahoe/edgy/build/src/allmydata/provisioning.py
file stats: 376 lines, 374 executed: 99.5% covered
coverage versus previous test: 0 lines added, 0 lines removed
    1. 
    2. from nevow import inevow, rend, tags as T
    3. import math
    4. from allmydata.util import mathutil
    5. from allmydata.web.common import getxmlfile
    6. 
    7. # factorial and binomial copied from
    8. # http://mail.python.org/pipermail/python-list/2007-April/435718.html
    9. 
   10. def factorial(n):
   11.     """factorial(n): return the factorial of the integer n.
   12.     factorial(0) = 1
   13.     factorial(n) with n<0 is -factorial(abs(n))
   14.     """
   15.     result = 1
   16.     for i in xrange(1, abs(n)+1):
   17.         result *= i
   18.     assert n >= 0
   19.     return result
   20. 
   21. def binomial(n, k):
   22.     assert 0 <= k <= n
   23.     if k == 0 or k == n:
   24.         return 1
   25.     # calculate n!/k! as one product, avoiding factors that
   26.     # just get canceled
   27.     P = k+1
   28.     for i in xrange(k+2, n+1):
   29.         P *= i
   30.     # if you are paranoid:
   31.     # C, rem = divmod(P, factorial(n-k))
   32.     # assert rem == 0
   33.     # return C
   34.     return P//factorial(n-k)
   35. 
   36. class ProvisioningTool(rend.Page):
   37.     addSlash = True
   38.     docFactory = getxmlfile("provisioning.xhtml")
   39. 
   40.     def render_forms(self, ctx, data):
   41.         req = inevow.IRequest(ctx)
   42. 
   43.         def getarg(name, astype=int):
   44.             if req.method != "POST":
   45.                 return None
   46.             if name in req.fields:
   47.                 return astype(req.fields[name].value)
   48.             return None
   49.         return self.do_forms(getarg)
   50. 
   51. 
   52.     def do_forms(self, getarg):
   53.         filled = getarg("filled", bool)
   54. 
   55.         def get_and_set(name, options, default=None, astype=int):
   56.             current_value = getarg(name, astype)
   57.             i_select = T.select(name=name)
   58.             for (count, description) in options:
   59.                 count = astype(count)
   60.                 selected = False
   61.                 if ((current_value is not None and count == current_value) or
   62.                     (current_value is None and count == default)):
   63.                     o = T.option(value=str(count), selected="true")[description]
   64.                 else:
   65.                     o = T.option(value=str(count))[description]
   66.                 i_select = i_select[o]
   67.             if current_value is None:
   68.                 current_value = default
   69.             return current_value, i_select
   70. 
   71.         sections = {}
   72.         def add_input(section, text, entry):
   73.             if section not in sections:
   74.                 sections[section] = []
   75.             sections[section].extend([T.div[text, ": ", entry], "\n"])
   76. 
   77.         def add_output(section, entry):
   78.             if section not in sections:
   79.                 sections[section] = []
   80.             sections[section].extend([entry, "\n"])
   81. 
   82.         def build_section(section):
   83.             return T.fieldset[T.legend[section], sections[section]]
   84. 
   85.         def number(value, suffix=""):
   86.             scaling = 1
   87.             if value < 1:
   88.                 fmt = "%1.2g%s"
   89.             elif value < 100:
   90.                 fmt = "%.1f%s"
   91.             elif value < 1000:
   92.                 fmt = "%d%s"
   93.             elif value < 1e6:
   94.                 fmt = "%.2fk%s"; scaling = 1e3
   95.             elif value < 1e9:
   96.                 fmt = "%.2fM%s"; scaling = 1e6
   97.             elif value < 1e12:
   98.                 fmt = "%.2fG%s"; scaling = 1e9
   99.             elif value < 1e15:
  100.                 fmt = "%.2fT%s"; scaling = 1e12
  101.             elif value < 1e18:
  102.                 fmt = "%.2fP%s"; scaling = 1e15
  103.             else:
  104.                 fmt = "huge! %g%s"
  105.             return fmt % (value / scaling, suffix)
  106. 
  107.         user_counts = [(5, "5 users"),
  108.                        (50, "50 users"),
  109.                        (200, "200 users"),
  110.                        (1000, "1k users"),
  111.                        (10000, "10k users"),
  112.                        (50000, "50k users"),
  113.                        (100000, "100k users"),
  114.                        (500000, "500k users"),
  115.                        (1000000, "1M users"),
  116.                        ]
  117.         num_users, i_num_users = get_and_set("num_users", user_counts, 50000)
  118.         add_input("Users",
  119.                   "How many users are on this network?", i_num_users)
  120. 
  121.         files_per_user_counts = [(100, "100 files"),
  122.                                  (1000, "1k files"),
  123.                                  (10000, "10k files"),
  124.                                  (100000, "100k files"),
  125.                                  (1e6, "1M files"),
  126.                                  ]
  127.         files_per_user, i_files_per_user = get_and_set("files_per_user",
  128.                                                        files_per_user_counts,
  129.                                                        1000)
  130.         add_input("Users",
  131.                   "How many files in each user's vdrive? (avg)",
  132.                   i_files_per_user)
  133. 
  134.         space_per_user_sizes = [(1e6, "1MB"),
  135.                                 (10e6, "10MB"),
  136.                                 (100e6, "100MB"),
  137.                                 (200e6, "200MB"),
  138.                                 (1e9, "1GB"),
  139.                                 (2e9, "2GB"),
  140.                                 (5e9, "5GB"),
  141.                                 (10e9, "10GB"),
  142.                                 (100e9, "100GB"),
  143.                                 (1e12, "1TB"),
  144.                                 ]
  145.         # current allmydata average utilization 127MB per user
  146.         space_per_user, i_space_per_user = get_and_set("space_per_user",
  147.                                                        space_per_user_sizes,
  148.                                                        200e6)
  149.         add_input("Users",
  150.                   "How much data is in each user's vdrive? (avg)",
  151.                   i_space_per_user)
  152. 
  153.         sharing_ratios = [(1.0, "1.0x"),
  154.                           (1.1, "1.1x"),
  155.                           (2.0, "2.0x"),
  156.                           ]
  157.         sharing_ratio, i_sharing_ratio = get_and_set("sharing_ratio",
  158.                                                      sharing_ratios, 1.0,
  159.                                                      float)
  160.         add_input("Users",
  161.                   "What is the sharing ratio? (1.0x is no-sharing and"
  162.                   " no convergence)", i_sharing_ratio)
  163. 
  164.         # Encoding parameters
  165.         encoding_choices = [("3-of-10-5", "3.3x (3-of-10, repair below 5)"),
  166.                             ("3-of-10-8", "3.3x (3-of-10, repair below 8)"),
  167.                             ("5-of-10-7", "2x (5-of-10, repair below 7)"),
  168.                             ("8-of-10-9", "1.25x (8-of-10, repair below 9)"),
  169.                             ("27-of-30-28", "1.1x (27-of-30, repair below 28"),
  170.                             ("25-of-100-50", "4x (25-of-100, repair below 50)"),
  171.                             ]
  172.         encoding_parameters, i_encoding_parameters = \
  173.                              get_and_set("encoding_parameters",
  174.                                          encoding_choices, "3-of-10-5", str)
  175.         encoding_pieces = encoding_parameters.split("-")
  176.         k = int(encoding_pieces[0])
  177.         assert encoding_pieces[1] == "of"
  178.         n = int(encoding_pieces[2])
  179.         # we repair the file when the number of available shares drops below
  180.         # this value
  181.         repair_threshold = int(encoding_pieces[3])
  182. 
  183.         add_input("Servers",
  184.                   "What are the default encoding parameters?",
  185.                   i_encoding_parameters)
  186. 
  187.         # Server info
  188.         num_server_choices = [ (5, "5 servers"),
  189.                                (10, "10 servers"),
  190.                                (15, "15 servers"),
  191.                                (30, "30 servers"),
  192.                                (50, "50 servers"),
  193.                                (100, "100 servers"),
  194.                                (200, "200 servers"),
  195.                                (300, "300 servers"),
  196.                                (500, "500 servers"),
  197.                                (1000, "1k servers"),
  198.                                (2000, "2k servers"),
  199.                                (5000, "5k servers"),
  200.                                (10e3, "10k servers"),
  201.                                (100e3, "100k servers"),
  202.                                (1e6, "1M servers"),
  203.                                ]
  204.         num_servers, i_num_servers = \
  205.                      get_and_set("num_servers", num_server_choices, 30, int)
  206.         add_input("Servers",
  207.                   "How many servers are there?", i_num_servers)
  208. 
  209.         # availability is measured in dBA = -dBF, where 0dBF is 100% failure,
  210.         # 10dBF is 10% failure, 20dBF is 1% failure, etc
  211.         server_dBA_choices = [ (10, "90% [10dBA] (2.4hr/day)"),
  212.                                (13, "95% [13dBA] (1.2hr/day)"),
  213.                                (20, "99% [20dBA] (14min/day or 3.5days/year)"),
  214.                                (23, "99.5% [23dBA] (7min/day or 1.75days/year)"),
  215.                                (30, "99.9% [30dBA] (87sec/day or 9hours/year)"),
  216.                                (40, "99.99% [40dBA] (60sec/week or 53min/year)"),
  217.                                (50, "99.999% [50dBA] (5min per year)"),
  218.                                ]
  219.         server_dBA, i_server_availability = \
  220.                     get_and_set("server_availability",
  221.                                 server_dBA_choices,
  222.                                 20, int)
  223.         add_input("Servers",
  224.                   "What is the server availability?", i_server_availability)
  225. 
  226.         drive_MTBF_choices = [ (40, "40,000 Hours"),
  227.                                ]
  228.         drive_MTBF, i_drive_MTBF = \
  229.                     get_and_set("drive_MTBF", drive_MTBF_choices, 40, int)
  230.         add_input("Drives",
  231.                   "What is the hard drive MTBF?", i_drive_MTBF)
  232.         # http://www.tgdaily.com/content/view/30990/113/
  233.         # http://labs.google.com/papers/disk_failures.pdf
  234.         # google sees:
  235.         #  1.7% of the drives they replaced were 0-1 years old
  236.         #  8% of the drives they repalced were 1-2 years old
  237.         #  8.6% were 2-3 years old
  238.         #  6% were 3-4 years old, about 8% were 4-5 years old
  239. 
  240.         drive_size_choices = [ (100, "100 GB"),
  241.                                (250, "250 GB"),
  242.                                (500, "500 GB"),
  243.                                (750, "750 GB"),
  244.                                ]
  245.         drive_size, i_drive_size = \
  246.                     get_and_set("drive_size", drive_size_choices, 750, int)
  247.         drive_size = drive_size * 1e9
  248.         add_input("Drives",
  249.                   "What is the capacity of each hard drive?", i_drive_size)
  250.         drive_failure_model_choices = [ ("E", "Exponential"),
  251.                                         ("U", "Uniform"),
  252.                                         ]
  253.         drive_failure_model, i_drive_failure_model = \
  254.                              get_and_set("drive_failure_model",
  255.                                          drive_failure_model_choices,
  256.                                          "E", str)
  257.         add_input("Drives",
  258.                   "How should we model drive failures?", i_drive_failure_model)
  259. 
  260.         # drive_failure_rate is in failures per second
  261.         if drive_failure_model == "E":
  262.             drive_failure_rate = 1.0 / (drive_MTBF * 1000 * 3600)
  263.         else:
  264.             drive_failure_rate = 0.5 / (drive_MTBF * 1000 * 3600)
  265. 
  266.         # deletion/gc/ownership mode
  267.         ownership_choices = [ ("A", "no deletion, no gc, no owners"),
  268.                               ("B", "deletion, no gc, no owners"),
  269.                               ("C", "deletion, share timers, no owners"),
  270.                               ("D", "deletion, no gc, yes owners"),
  271.                               ("E", "deletion, owner timers"),
  272.                               ]
  273.         ownership_mode, i_ownership_mode = \
  274.                         get_and_set("ownership_mode", ownership_choices,
  275.                                     "A", str)
  276.         add_input("Servers",
  277.                   "What is the ownership mode?", i_ownership_mode)
  278. 
  279.         # client access behavior
  280.         access_rates = [ (1, "one file per day"),
  281.                          (10, "10 files per day"),
  282.                          (100, "100 files per day"),
  283.                          (1000, "1k files per day"),
  284.                          (10e3, "10k files per day"),
  285.                          (100e3, "100k files per day"),
  286.                          ]
  287.         download_files_per_day, i_download_rate = \
  288.                                 get_and_set("download_rate", access_rates,
  289.                                             100, int)
  290.         add_input("Users",
  291.                   "How many files are downloaded per day?", i_download_rate)
  292.         download_rate = 1.0 * download_files_per_day / (24*60*60)
  293. 
  294.         upload_files_per_day, i_upload_rate = \
  295.                               get_and_set("upload_rate", access_rates,
  296.                                           10, int)
  297.         add_input("Users",
  298.                   "How many files are uploaded per day?", i_upload_rate)
  299.         upload_rate = 1.0 * upload_files_per_day / (24*60*60)
  300. 
  301.         delete_files_per_day, i_delete_rate = \
  302.                               get_and_set("delete_rate", access_rates,
  303.                                           10, int)
  304.         add_input("Users",
  305.                   "How many files are deleted per day?", i_delete_rate)
  306.         delete_rate = 1.0 * delete_files_per_day / (24*60*60)
  307. 
  308. 
  309.         # the value is in days
  310.         lease_timers = [ (1, "one refresh per day"),
  311.                          (7, "one refresh per week"),
  312.                          ]
  313.         lease_timer, i_lease = \
  314.                      get_and_set("lease_timer", lease_timers,
  315.                                  7, int)
  316.         add_input("Users",
  317.                   "How frequently do clients refresh files or accounts? "
  318.                   "(if necessary)",
  319.                   i_lease)
  320.         seconds_per_lease = 24*60*60*lease_timer
  321. 
  322.         check_timer_choices = [ (1, "every week"),
  323.                                 (4, "every month"),
  324.                                 (8, "every two months"),
  325.                                 (16, "every four months"),
  326.                                 ]
  327.         check_timer, i_check_timer = \
  328.                      get_and_set("check_timer", check_timer_choices, 4, int)
  329.         add_input("Users",
  330.                   "How frequently should we check on each file?",
  331.                   i_check_timer)
  332.         file_check_interval = check_timer * 7 * 24 * 3600
  333. 
  334. 
  335.         if filled:
  336.             add_output("Users", T.div["Total users: %s" % number(num_users)])
  337.             add_output("Users",
  338.                        T.div["Files per user: %s" % number(files_per_user)])
  339.             file_size = 1.0 * space_per_user / files_per_user
  340.             add_output("Users",
  341.                        T.div["Average file size: ", number(file_size)])
  342.             total_files = num_users * files_per_user / sharing_ratio
  343.             user_file_check_interval = file_check_interval / files_per_user
  344. 
  345.             add_output("Grid",
  346.                        T.div["Total number of files in grid: ",
  347.                              number(total_files)])
  348.             total_space = num_users * space_per_user / sharing_ratio
  349.             add_output("Grid",
  350.                        T.div["Total volume of plaintext in grid: ",
  351.                              number(total_space, "B")])
  352. 
  353.             total_shares = n * total_files
  354.             add_output("Grid",
  355.                        T.div["Total shares in grid: ", number(total_shares)])
  356.             expansion = float(n) / float(k)
  357. 
  358.             total_usage = expansion * total_space
  359.             add_output("Grid",
  360.                        T.div["Share data in grid: ", number(total_usage, "B")])
  361. 
  362.             if n > num_servers:
  363.                 # silly configuration, causes Tahoe2 to wrap and put multiple
  364.                 # shares on some servers.
  365.                 add_output("Servers",
  366.                            T.div["non-ideal: more shares than servers"
  367.                                  " (n=%d, servers=%d)" % (n, num_servers)])
  368.                 # every file has at least one share on every server
  369.                 buckets_per_server = total_files
  370.                 shares_per_server = total_files * ((1.0 * n) / num_servers)
  371.             else:
  372.                 # if nobody is full, then no lease requests will be turned
  373.                 # down for lack of space, and no two shares for the same file
  374.                 # will share a server. Therefore the chance that any given
  375.                 # file has a share on any given server is n/num_servers.
  376.                 buckets_per_server = total_files * ((1.0 * n) / num_servers)
  377.                 # since each such represented file only puts one share on a
  378.                 # server, the total number of shares per server is the same.
  379.                 shares_per_server = buckets_per_server
  380.             add_output("Servers",
  381.                        T.div["Buckets per server: ",
  382.                              number(buckets_per_server)])
  383.             add_output("Servers",
  384.                        T.div["Shares per server: ",
  385.                              number(shares_per_server)])
  386. 
  387.             # how much space is used on the storage servers for the shares?
  388.             #  the share data itself
  389.             share_data_per_server = total_usage / num_servers
  390.             add_output("Servers",
  391.                        T.div["Share data per server: ",
  392.                              number(share_data_per_server, "B")])
  393.             # this is determined empirically. H=hashsize=32, for a one-segment
  394.             # file and 3-of-10 encoding
  395.             share_validation_per_server = 266 * shares_per_server
  396.             # this could be 423*buckets_per_server, if we moved the URI
  397.             # extension into a separate file, but that would actually consume
  398.             # *more* space (minimum filesize is 4KiB), unless we moved all
  399.             # shares for a given bucket into a single file.
  400.             share_uri_extension_per_server = 423 * shares_per_server
  401. 
  402.             # ownership mode adds per-bucket data
  403.             H = 32 # depends upon the desired security of delete/refresh caps
  404.             # bucket_lease_size is the amount of data needed to keep track of
  405.             # the delete/refresh caps for each bucket.
  406.             bucket_lease_size = 0
  407.             client_bucket_refresh_rate = 0
  408.             owner_table_size = 0
  409.             if ownership_mode in ("B", "C", "D", "E"):
  410.                 bucket_lease_size = sharing_ratio * 1.0 * H
  411.             if ownership_mode in ("B", "C"):
  412.                 # refreshes per second per client
  413.                 client_bucket_refresh_rate = (1.0 * n * files_per_user /
  414.                                               seconds_per_lease)
  415.                 add_output("Users",
  416.                            T.div["Client share refresh rate (outbound): ",
  417.                                  number(client_bucket_refresh_rate, "Hz")])
  418.                 server_bucket_refresh_rate = (client_bucket_refresh_rate *
  419.                                               num_users / num_servers)
  420.                 add_output("Servers",
  421.                            T.div["Server share refresh rate (inbound): ",
  422.                                  number(server_bucket_refresh_rate, "Hz")])
  423.             if ownership_mode in ("D", "E"):
  424.                 # each server must maintain a bidirectional mapping from
  425.                 # buckets to owners. One way to implement this would be to
  426.                 # put a list of four-byte owner numbers into each bucket, and
  427.                 # a list of four-byte share numbers into each owner (although
  428.                 # of course we'd really just throw it into a database and let
  429.                 # the experts take care of the details).
  430.                 owner_table_size = 2*(buckets_per_server * sharing_ratio * 4)
  431. 
  432.             if ownership_mode in ("E",):
  433.                 # in this mode, clients must refresh one timer per server
  434.                 client_account_refresh_rate = (1.0 * num_servers /
  435.                                                seconds_per_lease)
  436.                 add_output("Users",
  437.                            T.div["Client account refresh rate (outbound): ",
  438.                                  number(client_account_refresh_rate, "Hz")])
  439.                 server_account_refresh_rate = (client_account_refresh_rate *
  440.                                               num_users / num_servers)
  441.                 add_output("Servers",
  442.                            T.div["Server account refresh rate (inbound): ",
  443.                                  number(server_account_refresh_rate, "Hz")])
  444. 
  445.             # TODO: buckets vs shares here is a bit wonky, but in
  446.             # non-wrapping grids it shouldn't matter
  447.             share_lease_per_server = bucket_lease_size * buckets_per_server
  448.             share_ownertable_per_server = owner_table_size
  449. 
  450.             share_space_per_server = (share_data_per_server +
  451.                                       share_validation_per_server +
  452.                                       share_uri_extension_per_server +
  453.                                       share_lease_per_server +
  454.                                       share_ownertable_per_server)
  455.             add_output("Servers",
  456.                        T.div["Share space per server: ",
  457.                              number(share_space_per_server, "B"),
  458.                              " (data ",
  459.                              number(share_data_per_server, "B"),
  460.                              ", validation ",
  461.                              number(share_validation_per_server, "B"),
  462.                              ", UEB ",
  463.                              number(share_uri_extension_per_server, "B"),
  464.                              ", lease ",
  465.                              number(share_lease_per_server, "B"),
  466.                              ", ownertable ",
  467.                              number(share_ownertable_per_server, "B"),
  468.                              ")",
  469.                              ])
  470. 
  471. 
  472.             # rates
  473.             client_download_share_rate = download_rate * k
  474.             client_download_byte_rate = download_rate * file_size
  475.             add_output("Users",
  476.                        T.div["download rate: shares = ",
  477.                              number(client_download_share_rate, "Hz"),
  478.                              " , bytes = ",
  479.                              number(client_download_byte_rate, "Bps"),
  480.                              ])
  481.             total_file_check_rate = 1.0 * total_files / file_check_interval
  482.             client_check_share_rate = total_file_check_rate / num_users
  483.             add_output("Users",
  484.                        T.div["file check rate: shares = ",
  485.                              number(client_check_share_rate, "Hz"),
  486.                              " (interval = %s)" %
  487.                              number(1 / client_check_share_rate, "s"),
  488.                              ])
  489. 
  490.             client_upload_share_rate = upload_rate * n
  491.             # TODO: doesn't include overhead
  492.             client_upload_byte_rate = upload_rate * file_size * expansion
  493.             add_output("Users",
  494.                        T.div["upload rate: shares = ",
  495.                              number(client_upload_share_rate, "Hz"),
  496.                              " , bytes = ",
  497.                              number(client_upload_byte_rate, "Bps"),
  498.                              ])
  499.             client_delete_share_rate = delete_rate * n
  500. 
  501.             server_inbound_share_rate = (client_upload_share_rate *
  502.                                          num_users / num_servers)
  503.             server_inbound_byte_rate = (client_upload_byte_rate *
  504.                                         num_users / num_servers)
  505.             add_output("Servers",
  506.                        T.div["upload rate (inbound): shares = ",
  507.                              number(server_inbound_share_rate, "Hz"),
  508.                              " , bytes = ",
  509.                               number(server_inbound_byte_rate, "Bps"),
  510.                              ])
  511.             add_output("Servers",
  512.                        T.div["share check rate (inbound): ",
  513.                              number(total_file_check_rate * n / num_servers,
  514.                                     "Hz"),
  515.                              ])
  516. 
  517.             server_share_modify_rate = ((client_upload_share_rate +
  518.                                          client_delete_share_rate) *
  519.                                          num_users / num_servers)
  520.             add_output("Servers",
  521.                        T.div["share modify rate: shares = ",
  522.                              number(server_share_modify_rate, "Hz"),
  523.                              ])
  524. 
  525.             server_outbound_share_rate = (client_download_share_rate *
  526.                                           num_users / num_servers)
  527.             server_outbound_byte_rate = (client_download_byte_rate *
  528.                                          num_users / num_servers)
  529.             add_output("Servers",
  530.                        T.div["download rate (outbound): shares = ",
  531.                              number(server_outbound_share_rate, "Hz"),
  532.                              " , bytes = ",
  533.                               number(server_outbound_byte_rate, "Bps"),
  534.                              ])
  535. 
  536. 
  537.             total_share_space = num_servers * share_space_per_server
  538.             add_output("Grid",
  539.                        T.div["Share space consumed: ",
  540.                              number(total_share_space, "B")])
  541.             add_output("Grid",
  542.                        T.div[" %% validation: %.2f%%" %
  543.                              (100.0 * share_validation_per_server /
  544.                               share_space_per_server)])
  545.             add_output("Grid",
  546.                        T.div[" %% uri-extension: %.2f%%" %
  547.                              (100.0 * share_uri_extension_per_server /
  548.                               share_space_per_server)])
  549.             add_output("Grid",
  550.                        T.div[" %% lease data: %.2f%%" %
  551.                              (100.0 * share_lease_per_server /
  552.                               share_space_per_server)])
  553.             add_output("Grid",
  554.                        T.div[" %% owner data: %.2f%%" %
  555.                              (100.0 * share_ownertable_per_server /
  556.                               share_space_per_server)])
  557.             add_output("Grid",
  558.                        T.div[" %% share data: %.2f%%" %
  559.                              (100.0 * share_data_per_server /
  560.                               share_space_per_server)])
  561.             add_output("Grid",
  562.                        T.div["file check rate: ",
  563.                              number(total_file_check_rate,
  564.                                     "Hz")])
  565. 
  566.             total_drives = max(mathutil.div_ceil(int(total_share_space),
  567.                                                  int(drive_size)),
  568.                                num_servers)
  569.             add_output("Drives",
  570.                        T.div["Total drives: ", number(total_drives), " drives"])
  571.             drives_per_server = mathutil.div_ceil(total_drives, num_servers)
  572.             add_output("Servers",
  573.                        T.div["Drives per server: ", drives_per_server])
  574. 
  575.             # costs
  576.             if drive_size == 750 * 1e9:
  577.                 add_output("Servers", T.div["750GB drive: $250 each"])
  578.                 drive_cost = 250
  579.             else:
  580.                 add_output("Servers",
  581.                            T.div[T.b["unknown cost per drive, assuming $100"]])
  582.                 drive_cost = 100
  583. 
  584.             if drives_per_server <= 4:
  585.                 add_output("Servers", T.div["1U box with <= 4 drives: $1500"])
  586.                 server_cost = 1500 # typical 1U box
  587.             elif drives_per_server <= 12:
  588.                 add_output("Servers", T.div["2U box with <= 12 drives: $2500"])
  589.                 server_cost = 2500 # 2U box
  590.             else:
  591.                 add_output("Servers",
  592.                            T.div[T.b["Note: too many drives per server, "
  593.                                      "assuming $3000"]])
  594.                 server_cost = 3000
  595. 
  596.             server_capital_cost = (server_cost + drives_per_server * drive_cost)
  597.             total_server_cost = float(num_servers * server_capital_cost)
  598.             add_output("Servers", T.div["Capital cost per server: $",
  599.                                         server_capital_cost])
  600.             add_output("Grid", T.div["Capital cost for all servers: $",
  601.                                      number(total_server_cost)])
  602.             # $70/Mbps/mo
  603.             # $44/server/mo power+space
  604.             server_bandwidth = max(server_inbound_byte_rate,
  605.                                    server_outbound_byte_rate)
  606.             server_bandwidth_mbps = mathutil.div_ceil(int(server_bandwidth*8),
  607.                                                       int(1e6))
  608.             server_monthly_cost = 70*server_bandwidth_mbps + 44
  609.             add_output("Servers", T.div["Monthly cost per server: $",
  610.                                         server_monthly_cost])
  611.             add_output("Users", T.div["Capital cost per user: $",
  612.                                       number(total_server_cost / num_users)])
  613. 
  614.             # reliability
  615.             any_drive_failure_rate = total_drives * drive_failure_rate
  616.             any_drive_MTBF = 1 // any_drive_failure_rate  # in seconds
  617.             any_drive_MTBF_days = any_drive_MTBF / 86400
  618.             add_output("Drives",
  619.                        T.div["MTBF (any drive): ",
  620.                              number(any_drive_MTBF_days), " days"])
  621.             drive_replacement_monthly_cost = (float(drive_cost)
  622.                                               * any_drive_failure_rate
  623.                                               *30*86400)
  624.             add_output("Grid",
  625.                        T.div["Monthly cost of replacing drives: $",
  626.                              number(drive_replacement_monthly_cost)])
  627. 
  628.             total_server_monthly_cost = float(num_servers * server_monthly_cost
  629.                                               + drive_replacement_monthly_cost)
  630. 
  631.             add_output("Grid", T.div["Monthly cost for all servers: $",
  632.                                      number(total_server_monthly_cost)])
  633.             add_output("Users",
  634.                        T.div["Monthly cost per user: $",
  635.                              number(total_server_monthly_cost / num_users)])
  636. 
  637.             # availability
  638.             file_dBA = self.file_availability(k, n, server_dBA)
  639.             user_files_dBA = self.many_files_availability(file_dBA,
  640.                                                           files_per_user)
  641.             all_files_dBA = self.many_files_availability(file_dBA, total_files)
  642.             add_output("Users",
  643.                        T.div["availability of: ",
  644.                              "arbitrary file = %d dBA, " % file_dBA,
  645.                              "all files of user1 = %d dBA, " % user_files_dBA,
  646.                              "all files in grid = %d dBA" % all_files_dBA,
  647.                              ],
  648.                        )
  649. 
  650.             time_until_files_lost = (n-k+1) / any_drive_failure_rate
  651.             add_output("Grid",
  652.                        T.div["avg time until files are lost: ",
  653.                              number(time_until_files_lost, "s"), ", ",
  654.                              number(time_until_files_lost/86400, " days"),
  655.                              ])
  656. 
  657.             share_data_loss_rate = any_drive_failure_rate * drive_size
  658.             add_output("Grid",
  659.                        T.div["share data loss rate: ",
  660.                              number(share_data_loss_rate,"Bps")])
  661. 
  662.             # the worst-case survival numbers occur when we do a file check
  663.             # and the file is just above the threshold for repair (so we
  664.             # decide to not repair it). The question is then: what is the
  665.             # chance that the file will decay so badly before the next check
  666.             # that we can't recover it? The resulting probability is per
  667.             # check interval.
  668.             # Note that the chances of us getting into this situation are low.
  669.             P_disk_failure_during_interval = (drive_failure_rate *
  670.                                               file_check_interval)
  671.             disk_failure_dBF = 10*math.log10(P_disk_failure_during_interval)
  672.             disk_failure_dBA = -disk_failure_dBF
  673.             file_survives_dBA = self.file_availability(k, repair_threshold,
  674.                                                        disk_failure_dBA)
  675.             user_files_survives_dBA = self.many_files_availability( \
  676.                 file_survives_dBA, files_per_user)
  677.             all_files_survives_dBA = self.many_files_availability( \
  678.                 file_survives_dBA, total_files)
  679.             add_output("Users",
  680.                        T.div["survival of: ",
  681.                              "arbitrary file = %d dBA, " % file_survives_dBA,
  682.                              "all files of user1 = %d dBA, " %
  683.                              user_files_survives_dBA,
  684.                              "all files in grid = %d dBA" %
  685.                              all_files_survives_dBA,
  686.                              " (per worst-case check interval)",
  687.                              ])
  688. 
  689. 
  690. 
  691.         all_sections = []
  692.         all_sections.append(build_section("Users"))
  693.         all_sections.append(build_section("Servers"))
  694.         all_sections.append(build_section("Drives"))
  695.         if "Grid" in sections:
  696.             all_sections.append(build_section("Grid"))
  697. 
  698.         f = T.form(action=".", method="post", enctype="multipart/form-data")
  699. 
  700.         if filled:
  701.             action = "Recompute"
  702.         else:
  703.             action = "Compute"
  704. 
  705.         f = f[T.input(type="hidden", name="filled", value="true"),
  706.               T.input(type="submit", value=action),
  707.               all_sections,
  708.               ]
  709. 
  710.         try:
  711.             from allmydata import reliability
  712.             # we import this just to test to see if the page is available
  713.             _hush_pyflakes = reliability
  714.             f = [T.div[T.a(href="../reliability")["Reliability Math"]], f]
  715.         except ImportError:
  716.             pass
  717. 
  718.         return f
  719. 
  720.     def file_availability(self, k, n, server_dBA):
  721.         """
  722.         The full formula for the availability of a specific file is::
  723. 
  724.          1 - sum([choose(N,i) * p**i * (1-p)**(N-i)] for i in range(k)])
  725. 
  726.         Where choose(N,i) = N! / ( i! * (N-i)! ) . Note that each term of
  727.         this summation is the probability that there are exactly 'i' servers
  728.         available, and what we're doing is adding up the cases where i is too
  729.         low.
  730. 
  731.         This is a nuisance to calculate at all accurately, especially once N
  732.         gets large, and when p is close to unity. So we make an engineering
  733.         approximation: if (1-p) is very small, then each [i] term is much
  734.         larger than the [i-1] term, and the sum is dominated by the i=k-1
  735.         term. This only works for (1-p) < 10%, and when the choose() function
  736.         doesn't rise fast enough to compensate. For high-expansion encodings
  737.         (3-of-10, 25-of-100), the choose() function is rising at the same
  738.         time as the (1-p)**(N-i) term, so that's not an issue. For
  739.         low-expansion encodings (7-of-10, 75-of-100) the two values are
  740.         moving in opposite directions, so more care must be taken.
  741. 
  742.         Note that the p**i term has only a minor effect as long as (1-p)*N is
  743.         small, and even then the effect is attenuated by the 1-p term.
  744.         """
  745. 
  746.         assert server_dBA > 9  # >=90% availability to use the approximation
  747.         factor = binomial(n, k-1)
  748.         factor_dBA = 10 * math.log10(factor)
  749.         exponent = n - k + 1
  750.         file_dBA = server_dBA * exponent - factor_dBA
  751.         return file_dBA
  752. 
  753.     def many_files_availability(self, file_dBA, num_files):
  754.         """The probability that 'num_files' independent bernoulli trials will
  755.         succeed (i.e. we can recover all files in the grid at any given
  756.         moment) is p**num_files . Since p is close to unity, we express in p
  757.         in dBA instead, so we can get useful precision on q (=1-p), and then
  758.         the formula becomes::
  759. 
  760.          P_some_files_unavailable = 1 - (1 - q)**num_files
  761. 
  762.         That (1-q)**n expands with the usual binomial sequence, 1 - nq +
  763.         Xq**2 ... + Xq**n . We use the same approximation as before, since we
  764.         know q is close to zero, and we get to ignore all the terms past -nq.
  765.         """
  766. 
  767.         many_files_dBA = file_dBA - 10 * math.log10(num_files)
  768.         return many_files_dBA