From a6b5148a8d6675e0f525ed589773abb2c80d8be7 Mon Sep 17 00:00:00 2001 From: Markus Demleitner Date: Wed, 14 Aug 2019 14:14:32 +0200 Subject: [PATCH 1/8] Adding a test for parsing chunked uploads into request.args --- src/twisted/web/test/test_http.py | 66 +++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/twisted/web/test/test_http.py b/src/twisted/web/test/test_http.py index ba3bf7361..7fa6fd84a 100644 --- a/src/twisted/web/test/test_http.py +++ b/src/twisted/web/test/test_http.py @@ -1361,6 +1361,72 @@ class ChunkingTests(unittest.TestCase, ResponseTestMixin): b"Transfer-Encoding: chunked", b"5\r\nHello\r\n6\r\nWorld!\r\n")]) + def runChunkedRequest(self, httpRequest, requestFactory=None, + chunkSize=1): + """ + Execute a web request based on plain text content, chunking + the request payload. + + This is a stripped-down, chunking version of ParsingTests.runRequest. + """ + channel = http.HTTPChannel() + + if requestFactory: + channel.requestFactory = _makeRequestProxyFactory(requestFactory) + + httpRequest = httpRequest.replace(b"\n", b"\r\n") + header, body = httpRequest.split(b"\r\n\r\n", 1) + + transport = StringTransport() + + channel.makeConnection(transport) + channel.dataReceived(header+b"\r\n\r\n") + + for pos in range(len(body)//chunkSize+1): + if channel.transport.disconnecting: + break + channel.dataReceived(b"".join( + http.toChunk(body[pos*chunkSize:(pos+1)*chunkSize]))) + + channel.dataReceived(b"".join(http.toChunk(b""))) + channel.connectionLost(IOError("all done")) + + return channel + + def test_multipartFormData(self): + """ + Test that chunked uploads are actually processed into args. + + This is essentially a copy of ParsingTests.test_multipartFormData, + just with chunking put in. + + This fails as of twisted version 18.9.0 because of bug #9678. + """ + processed = [] + class MyRequest(http.Request): + def process(self): + processed.append(self) + self.write(b"done") + self.finish() + req = b'''\ +POST / HTTP/1.0 +Content-Type: multipart/form-data; boundary=AaB03x +Transfer-Encoding: chunked + +--AaB03x +Content-Type: text/plain +Content-Disposition: form-data; name="text" +Content-Transfer-Encoding: quoted-printable + +abasdfg +--AaB03x-- +''' + channel = self.runChunkedRequest(req, MyRequest, chunkSize=5) + self.assertEqual(channel.transport.value(), + b"HTTP/1.0 200 OK\r\n\r\ndone") + self.assertEqual(len(processed), 1) + self.assertEqual(processed[0].args, {b"text": [b"abasdfg"]}) + class ParsingTests(unittest.TestCase): From a562242a488318c3f32408239a800b840369c816 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Sun, 15 Dec 2019 16:10:39 -0800 Subject: [PATCH 2/8] Use actual content length rather than the header bpo-29979's PR https://github.com/python/cpython/pull/991 changed cgi.parse_multipart() to require a 'CONTENT-LENGTH' member in its `pdict` parameter (which is documented as "dictionary containing other parameters of the content type header"). The PR seems to have some issues: * The change seems to conflate `pdict` with an environment dict, probably because the test passes a dict called `env` as `pdict`. * 'CONTENT-LENGTH' isn't a customary parameter of the Content-Type header. `pdict` is intended to pass the multipart boundary [1]. * In a CGI environment the length of the request body is in the CONTENT_LENGTH variable, anyway [2]. Typo? * FieldStorage doesn't require a Content-Length header anyway! It's potionally used to enforce cgi.maxlen, which limits the requests size (because this is the layer for that, right?). https://github.com/twisted/twisted/pull/1012 dealt with bpo-29979's backward-incompatible change by passing the HTTP `Content-Length` header as the 'CONTENT-LENGTH' member of `pdict` when the header exists. When the header isn't given, Request skips parsing the request content. The problem is that in the Twisted context, unlike the CGI context (where the web server is responsible for removing any transfer encodings), that header won't necessarily exist. For example, when a chunked transfer encoding is used by the client. This commit does the simplest possible thing: synthesize the content length based on what was actually received. [1]: https://tools.ietf.org/html/rfc7578#section-4.1 [2]: https://tools.ietf.org/html/rfc3875#section-4.1.2 --- src/twisted/web/http.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/twisted/web/http.py b/src/twisted/web/http.py index e5accc208..56520d231 100644 --- a/src/twisted/web/http.py +++ b/src/twisted/web/http.py @@ -873,6 +873,7 @@ class Request: @type version: C{bytes} @param version: The HTTP version of this request. """ + clength = self.content.tell() self.content.seek(0,0) self.args = {} @@ -889,16 +890,14 @@ class Request: # Argument processing args = self.args ctype = self.requestHeaders.getRawHeaders(b'content-type') - clength = self.requestHeaders.getRawHeaders(b'content-length') if ctype is not None: ctype = ctype[0] - if clength is not None: - clength = clength[0] - if self.method == b"POST" and ctype and clength: mfd = b'multipart/form-data' key, pdict = _parseHeader(ctype) + # This fake content-type param is required by cgi.parse_multipart() + # in Python 3.7+, see bpo-29979. pdict["CONTENT-LENGTH"] = clength if key == b'application/x-www-form-urlencoded': args.update(parse_qs(self.content.read(), 1)) From ec7df8289438c56a4037ae8a60b40e43a944d381 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Sun, 15 Dec 2019 18:01:48 -0800 Subject: [PATCH 3/8] Fix lint --- src/twisted/web/test/test_http.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/twisted/web/test/test_http.py b/src/twisted/web/test/test_http.py index 7fa6fd84a..7345b4a9c 100644 --- a/src/twisted/web/test/test_http.py +++ b/src/twisted/web/test/test_http.py @@ -1373,7 +1373,7 @@ class ChunkingTests(unittest.TestCase, ResponseTestMixin): if requestFactory: channel.requestFactory = _makeRequestProxyFactory(requestFactory) - + httpRequest = httpRequest.replace(b"\n", b"\r\n") header, body = httpRequest.split(b"\r\n\r\n", 1) @@ -1381,7 +1381,7 @@ class ChunkingTests(unittest.TestCase, ResponseTestMixin): channel.makeConnection(transport) channel.dataReceived(header+b"\r\n\r\n") - + for pos in range(len(body)//chunkSize+1): if channel.transport.disconnecting: break @@ -1403,6 +1403,7 @@ class ChunkingTests(unittest.TestCase, ResponseTestMixin): This fails as of twisted version 18.9.0 because of bug #9678. """ processed = [] + class MyRequest(http.Request): def process(self): processed.append(self) From 09e7f6c4a221aec85aeab1c4489d27f06d833256 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Sun, 15 Dec 2019 18:13:22 -0800 Subject: [PATCH 4/8] Newsfile --- src/twisted/web/newsfragments/9678.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/twisted/web/newsfragments/9678.bugfix diff --git a/src/twisted/web/newsfragments/9678.bugfix b/src/twisted/web/newsfragments/9678.bugfix new file mode 100644 index 000000000..141578b7d --- /dev/null +++ b/src/twisted/web/newsfragments/9678.bugfix @@ -0,0 +1 @@ +twisted.web.http.Request now correctly parses multipart-encoded form data submitted as a chunked request on Python 3.7+. From 546d2e565734a8e6b36ce18c51eb306e7eea42bc Mon Sep 17 00:00:00 2001 From: Tom Most Date: Sun, 15 Dec 2019 18:22:26 -0800 Subject: [PATCH 5/8] More linting --- src/twisted/web/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/twisted/web/http.py b/src/twisted/web/http.py index 8657a848d..db6ff7458 100644 --- a/src/twisted/web/http.py +++ b/src/twisted/web/http.py @@ -874,7 +874,7 @@ class Request: @param version: The HTTP version of this request. """ clength = self.content.tell() - self.content.seek(0,0) + self.content.seek(0, 0) self.args = {} self.method, self.uri = command, path From f9cbd331326f29e8d65412dc4016348cfa35a5a0 Mon Sep 17 00:00:00 2001 From: Tom Most Date: Sun, 15 Dec 2019 19:54:28 -0800 Subject: [PATCH 6/8] Improve comment Also add a reference to https://github.com/python/cpython/pull/8530 --- src/twisted/web/http.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/twisted/web/http.py b/src/twisted/web/http.py index db6ff7458..f0fb05b4d 100644 --- a/src/twisted/web/http.py +++ b/src/twisted/web/http.py @@ -896,8 +896,10 @@ class Request: if self.method == b"POST" and ctype and clength: mfd = b'multipart/form-data' key, pdict = _parseHeader(ctype) - # This fake content-type param is required by cgi.parse_multipart() - # in Python 3.7+, see bpo-29979. + # This weird CONTENT-LENGTH param is required by + # cgi.parse_multipart() in some versions of Python 3.7+, see + # bpo-29979. It looks like this will be relaxed and backported, see + # https://github.com/python/cpython/pull/8530. pdict["CONTENT-LENGTH"] = clength if key == b'application/x-www-form-urlencoded': args.update(parse_qs(self.content.read(), 1)) From 06786483cb0c3a5afd8026dd84bb674324c610e1 Mon Sep 17 00:00:00 2001 From: Glyph Date: Thu, 9 Jan 2020 01:15:49 -0800 Subject: [PATCH 7/8] Revert "tox ini" This reverts commit a810e449412f07b5a6845a37a87a24aa04604ee1. --- tox.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tox.ini b/tox.ini index 328ac0354..e5fdf779c 100644 --- a/tox.ini +++ b/tox.ini @@ -48,9 +48,9 @@ extras = deps = py27-alldeps-{posix,macos}: pysqlite - ; Coverage 5.0 does not work with codecov. - {withcov}: coverage<5.0 - {coverage-prepare,codecov-publish}: coverage<5.0 + {withcov}: coverage + + {coverage-prepare,codecov-publish}: coverage {codecov-push,codecov-publish}: codecov From eee96ae000455b43a5ac8133dd028017288d3688 Mon Sep 17 00:00:00 2001 From: Glyph Date: Thu, 9 Jan 2020 01:16:42 -0800 Subject: [PATCH 8/8] newsfragment --- src/twisted/newsfragments/9757.misc | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/twisted/newsfragments/9757.misc diff --git a/src/twisted/newsfragments/9757.misc b/src/twisted/newsfragments/9757.misc new file mode 100644 index 000000000..e69de29bb