1 | """ |
---|
2 | General web server-related utilities. |
---|
3 | """ |
---|
4 | from __future__ import annotations |
---|
5 | |
---|
6 | from six import ensure_str |
---|
7 | from typing import IO, Callable, Optional |
---|
8 | import re, time, tempfile |
---|
9 | from urllib.parse import parse_qsl, urlencode |
---|
10 | |
---|
11 | from cgi import ( |
---|
12 | FieldStorage, |
---|
13 | ) |
---|
14 | from io import ( |
---|
15 | BytesIO, |
---|
16 | ) |
---|
17 | |
---|
18 | from twisted.application import service, strports, internet |
---|
19 | from twisted.web import static |
---|
20 | from twisted.web.http import ( |
---|
21 | parse_qs, |
---|
22 | ) |
---|
23 | from twisted.web.server import ( |
---|
24 | Request, |
---|
25 | Site, |
---|
26 | ) |
---|
27 | from twisted.internet import defer |
---|
28 | from twisted.internet.address import ( |
---|
29 | IPv4Address, |
---|
30 | IPv6Address, |
---|
31 | ) |
---|
32 | from allmydata.util import log, fileutil |
---|
33 | |
---|
34 | from allmydata.web import introweb, root |
---|
35 | from allmydata.web.operations import OphandleTable |
---|
36 | |
---|
37 | from .web.storage_plugins import ( |
---|
38 | StoragePlugins, |
---|
39 | ) |
---|
40 | |
---|
41 | |
---|
42 | class FileUploadFieldStorage(FieldStorage): |
---|
43 | """ |
---|
44 | Do terrible things to ensure files are still bytes. |
---|
45 | |
---|
46 | On Python 2, uploaded files were always bytes. On Python 3, there's a |
---|
47 | heuristic: if the filename is set on a field, it's assumed to be a file |
---|
48 | upload and therefore bytes. If no filename is set, it's Unicode. |
---|
49 | |
---|
50 | Unfortunately, we always want it to be bytes, and Tahoe-LAFS also |
---|
51 | enables setting the filename not via the MIME filename, but via a |
---|
52 | separate field called "name". |
---|
53 | |
---|
54 | Thus we need to do this ridiculous workaround. Mypy doesn't like it |
---|
55 | either, thus the ``# type: ignore`` below. |
---|
56 | |
---|
57 | Source for idea: |
---|
58 | https://mail.python.org/pipermail/python-dev/2017-February/147402.html |
---|
59 | """ |
---|
60 | @property # type: ignore |
---|
61 | def filename(self): |
---|
62 | if self.name == "file" and not self._mime_filename: |
---|
63 | # We use the file field to upload files, see directory.py's |
---|
64 | # _POST_upload. Lack of _mime_filename means we need to trick |
---|
65 | # FieldStorage into thinking there is a filename so it'll |
---|
66 | # return bytes. |
---|
67 | return "unknown-filename" |
---|
68 | return self._mime_filename |
---|
69 | |
---|
70 | @filename.setter |
---|
71 | def filename(self, value): |
---|
72 | self._mime_filename = value |
---|
73 | |
---|
74 | |
---|
75 | class TahoeLAFSRequest(Request, object): |
---|
76 | """ |
---|
77 | ``TahoeLAFSRequest`` adds several features to a Twisted Web ``Request`` |
---|
78 | that are useful for Tahoe-LAFS. |
---|
79 | |
---|
80 | :ivar NoneType|FieldStorage fields: For POST requests, a structured |
---|
81 | representation of the contents of the request body. For anything |
---|
82 | else, ``None``. |
---|
83 | """ |
---|
84 | fields = None |
---|
85 | |
---|
86 | def requestReceived(self, command, path, version): |
---|
87 | """ |
---|
88 | Called by channel when all data has been received. |
---|
89 | |
---|
90 | Override the base implementation to apply certain site-wide policies |
---|
91 | and to provide less memory-intensive multipart/form-post handling for |
---|
92 | large file uploads. |
---|
93 | """ |
---|
94 | self.content.seek(0) |
---|
95 | self.args = {} |
---|
96 | self.stack = [] |
---|
97 | |
---|
98 | self.method, self.uri = command, path |
---|
99 | self.clientproto = version |
---|
100 | x = self.uri.split(b'?', 1) |
---|
101 | |
---|
102 | if len(x) == 1: |
---|
103 | self.path = self.uri |
---|
104 | else: |
---|
105 | self.path, argstring = x |
---|
106 | self.args = parse_qs(argstring, 1) |
---|
107 | |
---|
108 | content_type = (self.requestHeaders.getRawHeaders("content-type") or [""])[0] |
---|
109 | if self.method == b'POST' and content_type.split(";")[0] in ("multipart/form-data", "application/x-www-form-urlencoded"): |
---|
110 | # We use FieldStorage here because it performs better than |
---|
111 | # cgi.parse_multipart(self.content, pdict) which is what |
---|
112 | # twisted.web.http.Request uses. |
---|
113 | |
---|
114 | headers = { |
---|
115 | ensure_str(name.lower()): ensure_str(value[-1]) |
---|
116 | for (name, value) |
---|
117 | in self.requestHeaders.getAllRawHeaders() |
---|
118 | } |
---|
119 | |
---|
120 | if 'content-length' not in headers: |
---|
121 | # Python 3's cgi module would really, really like us to set Content-Length. |
---|
122 | self.content.seek(0, 2) |
---|
123 | headers['content-length'] = str(self.content.tell()) |
---|
124 | self.content.seek(0) |
---|
125 | |
---|
126 | self.fields = FileUploadFieldStorage( |
---|
127 | self.content, headers, environ={'REQUEST_METHOD': 'POST'}) |
---|
128 | self.content.seek(0) |
---|
129 | |
---|
130 | self._tahoeLAFSSecurityPolicy() |
---|
131 | |
---|
132 | self.processing_started_timestamp = time.time() |
---|
133 | self.process() |
---|
134 | |
---|
135 | def _tahoeLAFSSecurityPolicy(self): |
---|
136 | """ |
---|
137 | Set response properties related to Tahoe-LAFS-imposed security policy. |
---|
138 | This will ensure that all HTTP requests received by the Tahoe-LAFS |
---|
139 | HTTP server have this policy imposed, regardless of other |
---|
140 | implementation details. |
---|
141 | """ |
---|
142 | # See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Frame-Options |
---|
143 | self.responseHeaders.setRawHeaders("X-Frame-Options", ["DENY"]) |
---|
144 | # See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy |
---|
145 | self.setHeader("Referrer-Policy", "no-referrer") |
---|
146 | |
---|
147 | |
---|
148 | def _get_client_ip(request): |
---|
149 | try: |
---|
150 | get = request.getClientAddress |
---|
151 | except AttributeError: |
---|
152 | return request.getClientIP() |
---|
153 | else: |
---|
154 | client_addr = get() |
---|
155 | if isinstance(client_addr, (IPv4Address, IPv6Address)): |
---|
156 | return client_addr.host |
---|
157 | return None |
---|
158 | |
---|
159 | |
---|
160 | def _logFormatter(logDateTime, request): |
---|
161 | # we build up a log string that hides most of the cap, to preserve |
---|
162 | # user privacy. We retain the query args so we can identify things |
---|
163 | # like t=json. Then we send it to the flog. We make no attempt to |
---|
164 | # match apache formatting. TODO: when we move to DSA dirnodes and |
---|
165 | # shorter caps, consider exposing a few characters of the cap, or |
---|
166 | # maybe a few characters of its hash. |
---|
167 | x = request.uri.split(b"?", 1) |
---|
168 | if len(x) == 1: |
---|
169 | # no query args |
---|
170 | path = request.uri |
---|
171 | queryargs = b"" |
---|
172 | else: |
---|
173 | path, queryargs = x |
---|
174 | queryargs = b"?" + censor(queryargs) |
---|
175 | if path.startswith(b"/uri/"): |
---|
176 | path = b"/uri/[CENSORED]" |
---|
177 | elif path.startswith(b"/file/"): |
---|
178 | path = b"/file/[CENSORED]" |
---|
179 | elif path.startswith(b"/named/"): |
---|
180 | path = b"/named/[CENSORED]" |
---|
181 | |
---|
182 | uri = path + queryargs |
---|
183 | |
---|
184 | template = "web: %(clientip)s %(method)s %(uri)s %(code)s %(length)s" |
---|
185 | return template % dict( |
---|
186 | clientip=_get_client_ip(request), |
---|
187 | method=str(request.method, "utf-8"), |
---|
188 | uri=str(uri, "utf-8"), |
---|
189 | code=request.code, |
---|
190 | length=(request.sentLength or "-"), |
---|
191 | facility="tahoe.webish", |
---|
192 | level=log.OPERATIONAL, |
---|
193 | ) |
---|
194 | |
---|
195 | |
---|
196 | def censor(queryargs: bytes) -> bytes: |
---|
197 | """ |
---|
198 | Replace potentially sensitive values in query arguments with a |
---|
199 | constant string. |
---|
200 | """ |
---|
201 | args = parse_qsl(queryargs.decode("ascii"), keep_blank_values=True, encoding="utf8") |
---|
202 | result = [] |
---|
203 | for k, v in args: |
---|
204 | if k == "uri": |
---|
205 | # there is a form handler which redirects POST /uri?uri=FOO into |
---|
206 | # GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make |
---|
207 | # sure we censor these. |
---|
208 | v = "[CENSORED]" |
---|
209 | elif k == "private-key": |
---|
210 | # Likewise, sometimes a private key is supplied with mutable |
---|
211 | # creation. |
---|
212 | v = "[CENSORED]" |
---|
213 | |
---|
214 | result.append((k, v)) |
---|
215 | |
---|
216 | # Customize safe to try to leave our markers intact. |
---|
217 | return urlencode(result, safe="[]").encode("ascii") |
---|
218 | |
---|
219 | |
---|
220 | def anonymous_tempfile_factory(tempdir: bytes) -> Callable[[], IO[bytes]]: |
---|
221 | """ |
---|
222 | Create a no-argument callable for creating a new temporary file in the |
---|
223 | given directory. |
---|
224 | |
---|
225 | :param tempdir: The directory in which temporary files with be created. |
---|
226 | |
---|
227 | :return: The callable. |
---|
228 | """ |
---|
229 | return lambda: tempfile.TemporaryFile(dir=tempdir) |
---|
230 | |
---|
231 | |
---|
232 | class TahoeLAFSSite(Site, object): |
---|
233 | """ |
---|
234 | The HTTP protocol factory used by Tahoe-LAFS. |
---|
235 | |
---|
236 | Among the behaviors provided: |
---|
237 | |
---|
238 | * A configurable temporary file factory for large request bodies to avoid |
---|
239 | keeping them in memory. |
---|
240 | |
---|
241 | * A log formatter that writes some access logs but omits capability |
---|
242 | strings to help keep them secret. |
---|
243 | """ |
---|
244 | requestFactory = TahoeLAFSRequest |
---|
245 | |
---|
246 | def __init__(self, make_tempfile: Callable[[], IO[bytes]], *args, **kwargs): |
---|
247 | Site.__init__(self, *args, logFormatter=_logFormatter, **kwargs) |
---|
248 | assert callable(make_tempfile) |
---|
249 | with make_tempfile(): |
---|
250 | pass |
---|
251 | self._make_tempfile = make_tempfile |
---|
252 | |
---|
253 | def getContentFile(self, length: Optional[int]) -> IO[bytes]: |
---|
254 | if length is None or length >= 1024 * 1024: |
---|
255 | return self._make_tempfile() |
---|
256 | return BytesIO() |
---|
257 | |
---|
258 | class WebishServer(service.MultiService): |
---|
259 | # The type in Twisted for services is wrong in 22.10... |
---|
260 | # https://github.com/twisted/twisted/issues/10135 |
---|
261 | name = "webish" # type: ignore[assignment] |
---|
262 | |
---|
263 | def __init__(self, client, webport, make_tempfile, nodeurl_path=None, staticdir=None, |
---|
264 | clock=None, now_fn=time.time): |
---|
265 | service.MultiService.__init__(self) |
---|
266 | # the 'data' argument to all render() methods default to the Client |
---|
267 | # the 'clock' argument to root.Root is, if set, a |
---|
268 | # twisted.internet.task.Clock that is provided by the unit tests |
---|
269 | # so that they can test features that involve the passage of |
---|
270 | # time in a deterministic manner. |
---|
271 | |
---|
272 | self.root = root.Root(client, clock, now_fn) |
---|
273 | self.buildServer(webport, make_tempfile, nodeurl_path, staticdir) |
---|
274 | |
---|
275 | # If set, clock is a twisted.internet.task.Clock that the tests |
---|
276 | # use to test ophandle expiration. |
---|
277 | self._operations = OphandleTable(clock) |
---|
278 | self._operations.setServiceParent(self) |
---|
279 | self.root.putChild(b"operations", self._operations) |
---|
280 | |
---|
281 | self.root.putChild(b"storage-plugins", StoragePlugins(client)) |
---|
282 | |
---|
283 | def buildServer(self, webport, make_tempfile, nodeurl_path, staticdir): |
---|
284 | self.webport = webport |
---|
285 | self.site = TahoeLAFSSite(make_tempfile, self.root) |
---|
286 | self.staticdir = staticdir # so tests can check |
---|
287 | if staticdir: |
---|
288 | self.root.putChild(b"static", static.File(staticdir)) |
---|
289 | if re.search(r'^\d', webport): |
---|
290 | webport = "tcp:"+webport # twisted warns about bare "0" or "3456" |
---|
291 | # strports must be native strings. |
---|
292 | webport = ensure_str(webport) |
---|
293 | s = strports.service(webport, self.site) |
---|
294 | s.setServiceParent(self) |
---|
295 | |
---|
296 | self._scheme = None |
---|
297 | self._portnum = None |
---|
298 | self._url = None |
---|
299 | self._listener = s # stash it so we can query for the portnum |
---|
300 | |
---|
301 | self._started = defer.Deferred() |
---|
302 | if nodeurl_path: |
---|
303 | def _write_nodeurl_file(ign): |
---|
304 | # this file will be created with default permissions |
---|
305 | line = self.getURL() + "\n" |
---|
306 | fileutil.write_atomically(nodeurl_path, line, mode="") |
---|
307 | self._started.addCallback(_write_nodeurl_file) |
---|
308 | |
---|
309 | def getURL(self): |
---|
310 | assert self._url |
---|
311 | return self._url |
---|
312 | |
---|
313 | def getPortnum(self): |
---|
314 | assert self._portnum |
---|
315 | return self._portnum |
---|
316 | |
---|
317 | def startService(self): |
---|
318 | def _got_port(lp): |
---|
319 | self._portnum = lp.getHost().port |
---|
320 | # what is our webport? |
---|
321 | assert self._scheme |
---|
322 | self._url = "%s://127.0.0.1:%d/" % (self._scheme, self._portnum) |
---|
323 | self._started.callback(None) |
---|
324 | return lp |
---|
325 | def _fail(f): |
---|
326 | self._started.errback(f) |
---|
327 | return f |
---|
328 | |
---|
329 | service.MultiService.startService(self) |
---|
330 | s = self._listener |
---|
331 | if hasattr(s, 'endpoint') and hasattr(s, '_waitingForPort'): |
---|
332 | # Twisted 10.2 gives us a StreamServerEndpointService. This is |
---|
333 | # ugly but should do for now. |
---|
334 | classname = s.endpoint.__class__.__name__ |
---|
335 | if classname.startswith('SSL'): |
---|
336 | self._scheme = 'https' |
---|
337 | else: |
---|
338 | self._scheme = 'http' |
---|
339 | s._waitingForPort.addCallbacks(_got_port, _fail) |
---|
340 | elif isinstance(s, internet.TCPServer): |
---|
341 | # Twisted <= 10.1 |
---|
342 | self._scheme = 'http' |
---|
343 | _got_port(s._port) |
---|
344 | elif isinstance(s, internet.SSLServer): |
---|
345 | # Twisted <= 10.1 |
---|
346 | self._scheme = 'https' |
---|
347 | _got_port(s._port) |
---|
348 | else: |
---|
349 | # who knows, probably some weirdo future version of Twisted |
---|
350 | self._started.errback(AssertionError("couldn't find out the scheme or port for the web-API server")) |
---|
351 | |
---|
352 | def get_operations(self): |
---|
353 | """ |
---|
354 | :return: a reference to our "active operations" tracker |
---|
355 | """ |
---|
356 | return self._operations |
---|
357 | |
---|
358 | |
---|
359 | class IntroducerWebishServer(WebishServer): |
---|
360 | def __init__(self, introducer, webport, nodeurl_path=None, staticdir=None): |
---|
361 | service.MultiService.__init__(self) |
---|
362 | self.root = introweb.IntroducerRoot(introducer) |
---|
363 | self.buildServer(webport, tempfile.TemporaryFile, nodeurl_path, staticdir) |
---|