1 | Sun Apr 25 13:08:26 PDT 2010 kevan@isnotajoke.com |
---|
2 | * Add code to support 'tahoe censor' |
---|
3 | |
---|
4 | 'tahoe censor' allows users to quickly and easily remove furls, IP |
---|
5 | addresses and (eventually) storage indices from log files, so that they |
---|
6 | can be posted in bug reports, mailing list messages, and other places |
---|
7 | without much effort. |
---|
8 | |
---|
9 | New patches: |
---|
10 | |
---|
11 | [Add code to support 'tahoe censor' |
---|
12 | kevan@isnotajoke.com**20100425200826 |
---|
13 | Ignore-this: 4e8c27d53f766c82e18bf5c7f5df0c01 |
---|
14 | |
---|
15 | 'tahoe censor' allows users to quickly and easily remove furls, IP |
---|
16 | addresses and (eventually) storage indices from log files, so that they |
---|
17 | can be posted in bug reports, mailing list messages, and other places |
---|
18 | without much effort. |
---|
19 | ] { |
---|
20 | hunk ./src/allmydata/scripts/cli.py 411 |
---|
21 | (which must be a directory), like 'tahoe check' but for multiple files. |
---|
22 | Optionally repair any problems found.""" |
---|
23 | |
---|
24 | + |
---|
25 | +class CensorOptions(BaseOptions, usage.Options): |
---|
26 | + optFlags = [ |
---|
27 | + ["verbose", "v", "Print information about what is going on"] |
---|
28 | + ] |
---|
29 | + |
---|
30 | + def parseArgs(self, oldfile, newfile=None): |
---|
31 | + self['oldfile'] = oldfile |
---|
32 | + if newfile == None: |
---|
33 | + self['newfile'] = oldfile |
---|
34 | + else: |
---|
35 | + self['newfile'] = newfile |
---|
36 | + |
---|
37 | + def getSynopsis(self): |
---|
38 | + return "%s censor SOURCE-LOG DEST-LOG" % (os.path.basename(sys.argv[0]),) |
---|
39 | + |
---|
40 | + longdesc = """ |
---|
41 | + Censor IP addreses, node addresses and storage indices from |
---|
42 | + SOURCE-LOG, writing the results to DEST-LOG.""" |
---|
43 | + |
---|
44 | + |
---|
45 | subCommands = [ |
---|
46 | ["mkdir", None, MakeDirectoryOptions, "Create a new directory"], |
---|
47 | ["add-alias", None, AddAliasOptions, "Add a new alias cap"], |
---|
48 | hunk ./src/allmydata/scripts/cli.py 450 |
---|
49 | ["stats", None, StatsOptions, "Print statistics about all files/directories in a subtree"], |
---|
50 | ["check", None, CheckOptions, "Check a single file or directory"], |
---|
51 | ["deep-check", None, DeepCheckOptions, "Check all files/directories reachable from a starting point"], |
---|
52 | + ["censor", None, CensorOptions, "Purge sensitive information from a log file"], |
---|
53 | ] |
---|
54 | |
---|
55 | def mkdir(options): |
---|
56 | hunk ./src/allmydata/scripts/cli.py 547 |
---|
57 | rc = tahoe_check.deepcheck(options) |
---|
58 | return rc |
---|
59 | |
---|
60 | +def censor(options): |
---|
61 | + from allmydata.scripts import tahoe_censor |
---|
62 | + rc = tahoe_censor.censor(options) |
---|
63 | + return rc |
---|
64 | + |
---|
65 | dispatch = { |
---|
66 | "mkdir": mkdir, |
---|
67 | "add-alias": add_alias, |
---|
68 | hunk ./src/allmydata/scripts/cli.py 570 |
---|
69 | "stats": stats, |
---|
70 | "check": check, |
---|
71 | "deep-check": deepcheck, |
---|
72 | + "censor": censor, |
---|
73 | } |
---|
74 | addfile ./src/allmydata/scripts/tahoe_censor.py |
---|
75 | hunk ./src/allmydata/scripts/tahoe_censor.py 1 |
---|
76 | +import sys, pickle, bz2, re, shutil, os |
---|
77 | + |
---|
78 | +from twisted.python import usage |
---|
79 | + |
---|
80 | +class Censorer: |
---|
81 | + """ |
---|
82 | + I use pickle to read events in a foolscap log, remove information |
---|
83 | + that people might not want to be public knowledge (e.g., IP |
---|
84 | + addresses, node furls, storage indices) from the messages of those |
---|
85 | + logs, and write out my results. |
---|
86 | + """ |
---|
87 | + |
---|
88 | + |
---|
89 | + NONEXISTENT_FILE_MSG = ("Error: Either '%s' doesn't exist, or I " |
---|
90 | + "can't read it") |
---|
91 | + UNWRITABLE_FILE_MSG = ("Error: I can't write to '%s'") |
---|
92 | + |
---|
93 | + IN_PLACE_LOG_MSG = ("Info: I'm censoring the file '%s' in-place") |
---|
94 | + NEW_DEST_LOG_MSG = ("Info: I'm censoring '%s', and writing the " |
---|
95 | + "results to '%s'") |
---|
96 | + STATUS_MSG = ("Info: I'm processing the message '%s'") |
---|
97 | + INVALID_FILE_MSG = ("Error: Either the file '%s' is unreadable, " |
---|
98 | + "or it is in an invalid format") |
---|
99 | + DONE_MSG = ("Info: Done. Your censored log files are now in " |
---|
100 | + "'%s'. I processed a total of %d messages. I " |
---|
101 | + "found and removed the following:\n " |
---|
102 | + "IP Addresses: %d\n " |
---|
103 | + "Storage Indices: %d\n " |
---|
104 | + "Node URLs: %d\n") |
---|
105 | + |
---|
106 | + |
---|
107 | + def run(self, options): |
---|
108 | + self.options = options |
---|
109 | + self.rc = 0 |
---|
110 | + self.censored_logs = 0 |
---|
111 | + self.censored_ips = 0 |
---|
112 | + self.censored_sis = 0 |
---|
113 | + self.censored_furls = 0 |
---|
114 | + newfile = options['newfile'] |
---|
115 | + oldfile = options['oldfile'] |
---|
116 | + |
---|
117 | + # Does the source file exist? Is it readable? |
---|
118 | + if not os.path.exists(oldfile) or not os.access(oldfile, os.R_OK): |
---|
119 | + print >>options.stderr, self.NONEXISTENT_FILE_MSG % oldfile |
---|
120 | + return 1 |
---|
121 | + # Does the destination file exist? If so, can I write to it? |
---|
122 | + if os.path.exists(newfile) and not os.access(newfile, os.W_OK): |
---|
123 | + print >>options.stderr, self.UNWRITABLE_FILE_MSG % newfile |
---|
124 | + return 1 |
---|
125 | + |
---|
126 | + if options['verbose']: |
---|
127 | + if newfile == oldfile: |
---|
128 | + print >>options.stdout, self.IN_PLACE_LOG_MSG % newfile |
---|
129 | + else: |
---|
130 | + print >>options.stdout, self.NEW_DEST_LOG_MSG % (newfile, |
---|
131 | + oldfile) |
---|
132 | + if newfile == oldfile: |
---|
133 | + newfilename = newfile + ".tmp" |
---|
134 | + if newfile.endswith(".bz2"): |
---|
135 | + newfilename += ".bz2" |
---|
136 | + else: |
---|
137 | + newfilename = newfile |
---|
138 | + |
---|
139 | + if newfilename.endswith(".bz2"): |
---|
140 | + newfilef = bz2.BZ2File(newfilename, "wb") |
---|
141 | + else: |
---|
142 | + newfilef = open(newfilename, "wb") |
---|
143 | + |
---|
144 | + ip_expression = re.compile("\d+\.\d+\.\d+\.\d+") |
---|
145 | + ip_replacement = "xxx.xxx.xxx.xxx" |
---|
146 | + si_expression = re.compile("<si>.*</si>", flags=re.IGNORECASE) |
---|
147 | + si_replacement = "<si>xxxxxx</si>" |
---|
148 | + furl_expression = re.compile("pb://.*/", flags=re.IGNORECASE) |
---|
149 | + furl_replacement = "pb://xxxx/" |
---|
150 | + |
---|
151 | + for e in self.get_events(oldfile): |
---|
152 | + if "d" in e and "message" in e['d']: |
---|
153 | + message = e['d']["message"] |
---|
154 | + message, ip_matches = re.subn(ip_expression, |
---|
155 | + ip_replacement, |
---|
156 | + message) |
---|
157 | + self.censored_ips += ip_matches |
---|
158 | + message, si_matches = re.subn(si_expression, |
---|
159 | + si_replacement, |
---|
160 | + message) |
---|
161 | + self.censored_sis += si_matches |
---|
162 | + message, furl_matches = re.subn(furl_expression, |
---|
163 | + furl_replacement, |
---|
164 | + message) |
---|
165 | + self.censored_furls += furl_matches |
---|
166 | + |
---|
167 | + if options['verbose'] \ |
---|
168 | + and (ip_matches or si_matches or furl_matches): |
---|
169 | + print >>options.stdout, self.STATUS_MSG % e['d']['message'] |
---|
170 | + e['d']["message"] = message |
---|
171 | + pickle.dump(e, newfilef, 2) |
---|
172 | + self.censored_logs += 1 |
---|
173 | + newfilef.close() |
---|
174 | + if newfile == oldfile: |
---|
175 | + shutil.move(newfilename, newfile) |
---|
176 | + if not options['quiet']: |
---|
177 | + print >>options.stdout, self.DONE_MSG % (newfile, |
---|
178 | + self.censored_logs, |
---|
179 | + self.censored_ips, |
---|
180 | + self.censored_sis, |
---|
181 | + self.censored_furls) |
---|
182 | + return self.rc |
---|
183 | + |
---|
184 | + |
---|
185 | + def get_events(self, source): |
---|
186 | + if source.endswith(".bz2"): |
---|
187 | + f = bz2.BZ2File(source, "rb") |
---|
188 | + else: |
---|
189 | + f = open(source, "rb") |
---|
190 | + |
---|
191 | + while True: |
---|
192 | + try: |
---|
193 | + e = pickle.load(f) |
---|
194 | + assert(isinstance(e, dict)) |
---|
195 | + yield e |
---|
196 | + except EOFError, e: |
---|
197 | + # pickle.load will raise an EOFError when it is done |
---|
198 | + # reading the contents of a file, but also when there |
---|
199 | + # aren't any more valid contents to read. The first |
---|
200 | + # condition is okay; it means there are no more logs |
---|
201 | + # to read; the latter isn't. So we keep track of how |
---|
202 | + # logs we have processed, and use that to figure out |
---|
203 | + # which condition we're in. |
---|
204 | + if self.censored_logs == 0: |
---|
205 | + self.rc = 1 |
---|
206 | + print >>self.options.stderr, self.INVALID_FILE_MSG % source |
---|
207 | + break |
---|
208 | + except: |
---|
209 | + # Sometimes, pickle will raise other exceptions to tell |
---|
210 | + # us that it doesn't like the format of what it is |
---|
211 | + # reading. |
---|
212 | + self.rc = 1 |
---|
213 | + print >>self.options.stderr, self.INVALID_FILE_MSG % source |
---|
214 | + break |
---|
215 | + f.close() |
---|
216 | + |
---|
217 | + |
---|
218 | +def censor(options): |
---|
219 | + c = Censorer() |
---|
220 | + rc = c.run(options) |
---|
221 | + return rc |
---|
222 | } |
---|
223 | |
---|
224 | Context: |
---|
225 | |
---|
226 | [setup: add licensing declaration for setuptools (noticed by the FSF compliance folks) |
---|
227 | zooko@zooko.com**20100309184415 |
---|
228 | Ignore-this: 2dfa7d812d65fec7c72ddbf0de609ccb |
---|
229 | ] |
---|
230 | [setup: fix error in licensing declaration from Shawn Willden, as noted by the FSF compliance division |
---|
231 | zooko@zooko.com**20100309163736 |
---|
232 | Ignore-this: c0623d27e469799d86cabf67921a13f8 |
---|
233 | ] |
---|
234 | [CREDITS to Jacob Appelbaum |
---|
235 | zooko@zooko.com**20100304015616 |
---|
236 | Ignore-this: 70db493abbc23968fcc8db93f386ea54 |
---|
237 | ] |
---|
238 | [desert-island-build-with-proper-versions |
---|
239 | jacob@appelbaum.net**20100304013858] |
---|
240 | [docs: a few small edits to try to guide newcomers through the docs |
---|
241 | zooko@zooko.com**20100303231902 |
---|
242 | Ignore-this: a6aab44f5bf5ad97ea73e6976bc4042d |
---|
243 | These edits were suggested by my watching over Jake Appelbaum's shoulder as he completely ignored/skipped/missed install.html and also as he decided that debian.txt wouldn't help him with basic installation. Then I threw in a few docs edits that have been sitting around in my sandbox asking to be committed for months. |
---|
244 | ] |
---|
245 | [TAG allmydata-tahoe-1.6.1 |
---|
246 | david-sarah@jacaranda.org**20100228062314 |
---|
247 | Ignore-this: eb5f03ada8ea953ee7780e7fe068539 |
---|
248 | ] |
---|
249 | Patch bundle hash: |
---|
250 | cc205da6be53515218cbe01c0e349b63f076a4d1 |
---|