diff --git a/.gitignore b/.gitignore index 6154e51..3523064 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,6 @@ -docs/_build +/docs/_build/ tmp.py -htmlcov/ -.coverage.* *.py[cod] -/.hypothesis/ # emacs *~ @@ -32,11 +29,23 @@ lib64 # Installer logs pip-log.txt -# Unit test / coverage reports -.coverage -.tox/ +# Testing +/.tox/ +/.hypothesis/ nosetests.xml +# Coverage +/.coverage +/.coverage.* +/htmlcov/ +/.mypy_cache/ + +# Documentation +/htmldocs/ + +# Documentation +/htmldocs/ + # Translations *.mo diff --git a/.travis.yml b/.travis.yml index 6e9403c..80b3525 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ matrix: - python: "3.8" env: TOXENV=test-py38,codecov - python: "pypy" - env: TOXENV=test-pypy,codecov + env: TOXENV=test-pypy2,codecov - python: "pypy3" env: TOXENV=test-pypy3,codecov - python: "2.7" diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bc4f61..b23dfb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,15 @@ ## dev (not yet released) +* CPython 3.7 and 3.8 and PyPy3 added to test matrix +* Hyperlink now has type hints and they are now exported per + [PEP 561](https://www.python.org/dev/peps/pep-0561/). +* Several bugs related to hidden state were fixed, making it so that all data + on a `URL` object (including `rooted` and `uses_netloc`) is reflected by and + consistent with its textual representation. + This does mean that sometimes these constructor arguments are ignored, if it + would create invalid or unparseable URL text. + ## 19.0.0 *(April 7, 2019)* @@ -13,7 +22,8 @@ A query parameter-centric release, with two enhancements: [#39](https://github.com/python-hyper/hyperlink/pull/39)) * `URL.remove()` now accepts *value* and *limit* parameters, allowing for removal of specific name-value pairs, as well as limiting the - number of removals. (see [#71](https://github.com/python-hyper/hyperlink/pull/71)) + number of removals. + (See [#71](https://github.com/python-hyper/hyperlink/pull/71)) ## 18.0.0 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e7efe6a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[build-system] + +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + + +[tool.black] + +line-length = 80 +target-version = ["py27"] diff --git a/setup.py b/setup.py index fceb4fa..2f09f9c 100644 --- a/setup.py +++ b/setup.py @@ -10,52 +10,47 @@ See the docs at http://hyperlink.readthedocs.io. from setuptools import find_packages, setup -__author__ = 'Mahmoud Hashemi and Glyph Lefkowitz' -__version__ = '19.0.1dev' -__contact__ = 'mahmoud@hatnote.com' -__url__ = 'https://github.com/python-hyper/hyperlink' -__license__ = 'MIT' +__author__ = "Mahmoud Hashemi and Glyph Lefkowitz" +__version__ = "19.0.1dev" +__contact__ = "mahmoud@hatnote.com" +__url__ = "https://github.com/python-hyper/hyperlink" +__license__ = "MIT" -setup(name='hyperlink', - version=__version__, - description="A featureful, immutable, and correct URL for Python.", - long_description=__doc__, - author=__author__, - author_email=__contact__, - url=__url__, - packages=find_packages(where="src"), - package_dir={"": "src"}, - package_data=dict( - hyperlink=[ - "py.typed", - ], - ), - zip_safe=False, - license=__license__, - platforms='any', - install_requires=[ - 'idna>=2.5', - 'typing ; python_version<"3.5"', - ], - python_requires='>=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*', - classifiers=[ - 'Topic :: Utilities', - 'Intended Audience :: Developers', - 'Topic :: Software Development :: Libraries', - 'Development Status :: 5 - Production/Stable', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: Implementation :: PyPy', - 'License :: OSI Approved :: MIT License', ] - ) +setup( + name="hyperlink", + version=__version__, + description="A featureful, immutable, and correct URL for Python.", + long_description=__doc__, + author=__author__, + author_email=__contact__, + url=__url__, + packages=find_packages(where="src"), + package_dir={"": "src"}, + package_data=dict(hyperlink=["py.typed",],), + zip_safe=False, + license=__license__, + platforms="any", + install_requires=["idna>=2.5", 'typing ; python_version<"3.5"',], + python_requires=">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", + classifiers=[ + "Topic :: Utilities", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries", + "Development Status :: 5 - Production/Stable", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: Implementation :: PyPy", + "License :: OSI Approved :: MIT License", + ], +) """ A brief checklist for release: diff --git a/src/hyperlink/_socket.py b/src/hyperlink/_socket.py index 769b9d5..3bcf897 100644 --- a/src/hyperlink/_socket.py +++ b/src/hyperlink/_socket.py @@ -2,6 +2,7 @@ try: from socket import inet_pton except ImportError: from typing import TYPE_CHECKING + if TYPE_CHECKING: # pragma: no cover pass else: @@ -25,7 +26,7 @@ except ImportError: def inet_pton(address_family, ip_string): # type: (int, str) -> bytes addr = SockAddr() - ip_string_bytes = ip_string.encode('ascii') + ip_string_bytes = ip_string.encode("ascii") addr.sa_family = address_family addr_size = ctypes.c_int(ctypes.sizeof(addr)) @@ -37,10 +38,16 @@ except ImportError: except KeyError: raise socket.error("unknown address family") - if WSAStringToAddressA( - ip_string_bytes, address_family, None, - ctypes.byref(addr), ctypes.byref(addr_size) - ) != 0: + if ( + WSAStringToAddressA( + ip_string_bytes, + address_family, + None, + ctypes.byref(addr), + ctypes.byref(addr_size), + ) + != 0 + ): raise socket.error(ctypes.FormatError()) return ctypes.string_at(getattr(addr, attribute), size) diff --git a/src/hyperlink/_url.py b/src/hyperlink/_url.py index 7b2b27c..7374039 100644 --- a/src/hyperlink/_url.py +++ b/src/hyperlink/_url.py @@ -20,16 +20,31 @@ import sys import string import socket from socket import AF_INET, AF_INET6 + try: from socket import AddressFamily except ImportError: AddressFamily = int # type: ignore[assignment,misc] from typing import ( - Any, Callable, Dict, Iterable, Iterator, List, Mapping, Optional, - Sequence, Text, Tuple, Type, TypeVar, Union, cast, + Any, + Callable, + Dict, + Iterable, + Iterator, + List, + Mapping, + Optional, + Sequence, + Text, + Tuple, + Type, + TypeVar, + Union, + cast, ) from unicodedata import normalize from ._socket import inet_pton + try: from collections.abc import Mapping as MappingABC except ImportError: # Python 2 @@ -38,7 +53,7 @@ except ImportError: # Python 2 from idna import encode as idna_encode, decode as idna_decode -PY2 = (sys.version_info[0] == 2) +PY2 = sys.version_info[0] == 2 try: unichr except NameError: # Py3 @@ -46,14 +61,15 @@ except NameError: # Py3 NoneType = type(None) # type: Type[None] QueryPairs = Tuple[Tuple[Text, Optional[Text]], ...] # internal representation QueryParameters = Union[ - Mapping[Text, Optional[Text]], QueryPairs, + Mapping[Text, Optional[Text]], + QueryPairs, Sequence[Tuple[Text, Optional[Text]]], ] -T = TypeVar('T') +T = TypeVar("T") # from boltons.typeutils -def make_sentinel(name='_MISSING', var_name=""): +def make_sentinel(name="_MISSING", var_name=""): # type: (str, str) -> object """Creates and returns a new **instance** of a new class, suitable for usage as a "sentinel", a kind of singleton often used to indicate @@ -83,6 +99,7 @@ def make_sentinel(name='_MISSING', var_name=""): >>> type(make_sentinel('TEST')) == type(make_sentinel('TEST')) False """ + class Sentinel(object): def __init__(self): # type: () -> None @@ -93,7 +110,8 @@ def make_sentinel(name='_MISSING', var_name=""): # type: () -> str if self.var_name: return self.var_name - return '%s(%r)' % (self.__class__.__name__, self.name) + return "%s(%r)" % (self.__class__.__name__, self.name) + if var_name: # superclass type hints don't allow str return type, but it is # allowed in the docs, hence the ignore[override] below @@ -110,53 +128,62 @@ def make_sentinel(name='_MISSING', var_name=""): return Sentinel() -_unspecified = _UNSET = make_sentinel('_UNSET') # type: Any +_unspecified = _UNSET = make_sentinel("_UNSET") # type: Any # RFC 3986 Section 2.3, Unreserved URI Characters # https://tools.ietf.org/html/rfc3986#section-2.3 -_UNRESERVED_CHARS = frozenset('~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' - 'abcdefghijklmnopqrstuvwxyz') +_UNRESERVED_CHARS = frozenset( + "~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" +) # URL parsing regex (based on RFC 3986 Appendix B, with modifications) -_URL_RE = re.compile(r'^((?P[^:/?#]+):)?' - r'((?P<_netloc_sep>//)' - r'(?P[^/?#]*))?' - r'(?P[^?#]*)' - r'(\?(?P[^#]*))?' - r'(#(?P.*))?$') -_SCHEME_RE = re.compile(r'^[a-zA-Z0-9+-.]*$') -_AUTHORITY_RE = re.compile(r'^(?:(?P[^@/?#]*)@)?' - r'(?P' - r'(?:\[(?P[^[\]/?#]*)\])' - r'|(?P[^:/?#[\]]*)' - r'|(?P.*?))?' - r'(?::(?P.*))?$') +_URL_RE = re.compile( + r"^((?P[^:/?#]+):)?" + r"((?P<_netloc_sep>//)" + r"(?P[^/?#]*))?" + r"(?P[^?#]*)" + r"(\?(?P[^#]*))?" + r"(#(?P.*))?$" +) +_SCHEME_RE = re.compile(r"^[a-zA-Z0-9+-.]*$") +_AUTHORITY_RE = re.compile( + r"^(?:(?P[^@/?#]*)@)?" + r"(?P" + r"(?:\[(?P[^[\]/?#]*)\])" + r"|(?P[^:/?#[\]]*)" + r"|(?P.*?))?" + r"(?::(?P.*))?$" +) -_HEX_CHAR_MAP = dict([((a + b).encode('ascii'), - unichr(int(a + b, 16)).encode('charmap')) - for a in string.hexdigits for b in string.hexdigits]) -_ASCII_RE = re.compile('([\x00-\x7f]+)') +_HEX_CHAR_MAP = dict( + [ + ((a + b).encode("ascii"), unichr(int(a + b, 16)).encode("charmap")) + for a in string.hexdigits + for b in string.hexdigits + ] +) +_ASCII_RE = re.compile("([\x00-\x7f]+)") # RFC 3986 section 2.2, Reserved Characters # https://tools.ietf.org/html/rfc3986#section-2.2 -_GEN_DELIMS = frozenset(u':/?#[]@') +_GEN_DELIMS = frozenset(u":/?#[]@") _SUB_DELIMS = frozenset(u"!$&'()*+,;=") _ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS -_USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u'%') +_USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u"%") _USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE -_PATH_SAFE = _USERINFO_SAFE | set(u':@') +_PATH_SAFE = _USERINFO_SAFE | set(u":@") _PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE -_SCHEMELESS_PATH_SAFE = _PATH_SAFE - set(':') +_SCHEMELESS_PATH_SAFE = _PATH_SAFE - set(":") _SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE -_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u'/?') +_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u"/?") _FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE -_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u'&+') +_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&+") _QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE -_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u'=') +_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u"=") _QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE @@ -164,9 +191,9 @@ def _make_decode_map(delims, allow_percent=False): # type: (Iterable[Text], bool) -> Mapping[bytes, bytes] ret = dict(_HEX_CHAR_MAP) if not allow_percent: - delims = set(delims) | set([u'%']) + delims = set(delims) | set([u"%"]) for delim in delims: - _hexord = '{0:02X}'.format(ord(delim)).encode('ascii') + _hexord = "{0:02X}".format(ord(delim)).encode("ascii") _hexord_lower = _hexord.lower() ret.pop(_hexord) if _hexord != _hexord_lower: @@ -184,7 +211,7 @@ def _make_quote_map(safe_chars): if c in safe_chars: ret[c] = ret[v] = c else: - ret[c] = ret[v] = '%{0:02X}'.format(i) + ret[c] = ret[v] = "%{0:02X}".format(i) return ret @@ -200,11 +227,15 @@ _QUERY_VALUE_DECODE_MAP = _make_decode_map(_QUERY_VALUE_DELIMS) _FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE) _FRAGMENT_DECODE_MAP = _make_decode_map(_FRAGMENT_DELIMS) _UNRESERVED_QUOTE_MAP = _make_quote_map(_UNRESERVED_CHARS) -_UNRESERVED_DECODE_MAP = dict([(k, v) for k, v in _HEX_CHAR_MAP.items() - if v.decode('ascii', 'replace') - in _UNRESERVED_CHARS]) +_UNRESERVED_DECODE_MAP = dict( + [ + (k, v) + for k, v in _HEX_CHAR_MAP.items() + if v.decode("ascii", "replace") in _UNRESERVED_CHARS + ] +) -_ROOT_PATHS = frozenset(((), (u'',))) +_ROOT_PATHS = frozenset(((), (u"",))) def _encode_reserved(text, maximal=True): @@ -215,20 +246,25 @@ def _encode_reserved(text, maximal=True): bytes. """ if maximal: - bytestr = normalize('NFC', text).encode('utf8') - return u''.join([_UNRESERVED_QUOTE_MAP[b] for b in bytestr]) - return u''.join([_UNRESERVED_QUOTE_MAP[t] if t in _UNRESERVED_CHARS - else t for t in text]) + bytestr = normalize("NFC", text).encode("utf8") + return u"".join([_UNRESERVED_QUOTE_MAP[b] for b in bytestr]) + return u"".join( + [ + _UNRESERVED_QUOTE_MAP[t] if t in _UNRESERVED_CHARS else t + for t in text + ] + ) def _encode_path_part(text, maximal=True): # type: (Text, bool) -> Text "Percent-encode a single segment of a URL path." if maximal: - bytestr = normalize('NFC', text).encode('utf8') - return u''.join([_PATH_PART_QUOTE_MAP[b] for b in bytestr]) - return u''.join([_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t - for t in text]) + bytestr = normalize("NFC", text).encode("utf8") + return u"".join([_PATH_PART_QUOTE_MAP[b] for b in bytestr]) + return u"".join( + [_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t for t in text] + ) def _encode_schemeless_path_part(text, maximal=True): @@ -237,18 +273,24 @@ def _encode_schemeless_path_part(text, maximal=True): scheme specified. """ if maximal: - bytestr = normalize('NFC', text).encode('utf8') - return u''.join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr]) - return u''.join([_SCHEMELESS_PATH_PART_QUOTE_MAP[t] - if t in _SCHEMELESS_PATH_DELIMS else t for t in text]) + bytestr = normalize("NFC", text).encode("utf8") + return u"".join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr]) + return u"".join( + [ + _SCHEMELESS_PATH_PART_QUOTE_MAP[t] + if t in _SCHEMELESS_PATH_DELIMS + else t + for t in text + ] + ) def _encode_path_parts( - text_parts, # type: Sequence[Text] - rooted=False, # type: bool - has_scheme=True, # type: bool + text_parts, # type: Sequence[Text] + rooted=False, # type: bool + has_scheme=True, # type: bool has_authority=True, # type: bool - maximal=True, # type: bool + maximal=True, # type: bool ): # type: (...) -> Sequence[Text] """ @@ -274,17 +316,23 @@ def _encode_path_parts( if not text_parts: return () if rooted: - text_parts = (u'',) + tuple(text_parts) + text_parts = (u"",) + tuple(text_parts) # elif has_authority and text_parts: # raise Exception('see rfc above') # TODO: too late to fail like this? encoded_parts = [] # type: List[Text] if has_scheme: - encoded_parts = [_encode_path_part(part, maximal=maximal) - if part else part for part in text_parts] + encoded_parts = [ + _encode_path_part(part, maximal=maximal) if part else part + for part in text_parts + ] else: encoded_parts = [_encode_schemeless_path_part(text_parts[0])] - encoded_parts.extend([_encode_path_part(part, maximal=maximal) - if part else part for part in text_parts[1:]]) + encoded_parts.extend( + [ + _encode_path_part(part, maximal=maximal) if part else part + for part in text_parts[1:] + ] + ) return tuple(encoded_parts) @@ -294,10 +342,11 @@ def _encode_query_key(text, maximal=True): Percent-encode a single query string key or value. """ if maximal: - bytestr = normalize('NFC', text).encode('utf8') - return u''.join([_QUERY_KEY_QUOTE_MAP[b] for b in bytestr]) - return u''.join([_QUERY_KEY_QUOTE_MAP[t] if t in _QUERY_KEY_DELIMS else t - for t in text]) + bytestr = normalize("NFC", text).encode("utf8") + return u"".join([_QUERY_KEY_QUOTE_MAP[b] for b in bytestr]) + return u"".join( + [_QUERY_KEY_QUOTE_MAP[t] if t in _QUERY_KEY_DELIMS else t for t in text] + ) def _encode_query_value(text, maximal=True): @@ -306,10 +355,14 @@ def _encode_query_value(text, maximal=True): Percent-encode a single query string key or value. """ if maximal: - bytestr = normalize('NFC', text).encode('utf8') - return u''.join([_QUERY_VALUE_QUOTE_MAP[b] for b in bytestr]) - return u''.join([_QUERY_VALUE_QUOTE_MAP[t] - if t in _QUERY_VALUE_DELIMS else t for t in text]) + bytestr = normalize("NFC", text).encode("utf8") + return u"".join([_QUERY_VALUE_QUOTE_MAP[b] for b in bytestr]) + return u"".join( + [ + _QUERY_VALUE_QUOTE_MAP[t] if t in _QUERY_VALUE_DELIMS else t + for t in text + ] + ) def _encode_fragment_part(text, maximal=True): @@ -318,10 +371,11 @@ def _encode_fragment_part(text, maximal=True): subdelimiters, so the whole URL fragment can be passed. """ if maximal: - bytestr = normalize('NFC', text).encode('utf8') - return u''.join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr]) - return u''.join([_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t - for t in text]) + bytestr = normalize("NFC", text).encode("utf8") + return u"".join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr]) + return u"".join( + [_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t for t in text] + ) def _encode_userinfo_part(text, maximal=True): @@ -330,32 +384,85 @@ def _encode_userinfo_part(text, maximal=True): section of the URL. """ if maximal: - bytestr = normalize('NFC', text).encode('utf8') - return u''.join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr]) - return u''.join([_USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS - else t for t in text]) + bytestr = normalize("NFC", text).encode("utf8") + return u"".join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr]) + return u"".join( + [ + _USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS else t + for t in text + ] + ) # This port list painstakingly curated by hand searching through # https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml # and # https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml -SCHEME_PORT_MAP = {'acap': 674, 'afp': 548, 'dict': 2628, 'dns': 53, - 'file': None, 'ftp': 21, 'git': 9418, 'gopher': 70, - 'http': 80, 'https': 443, 'imap': 143, 'ipp': 631, - 'ipps': 631, 'irc': 194, 'ircs': 6697, 'ldap': 389, - 'ldaps': 636, 'mms': 1755, 'msrp': 2855, 'msrps': None, - 'mtqp': 1038, 'nfs': 111, 'nntp': 119, 'nntps': 563, - 'pop': 110, 'prospero': 1525, 'redis': 6379, 'rsync': 873, - 'rtsp': 554, 'rtsps': 322, 'rtspu': 5005, 'sftp': 22, - 'smb': 445, 'snmp': 161, 'ssh': 22, 'steam': None, - 'svn': 3690, 'telnet': 23, 'ventrilo': 3784, 'vnc': 5900, - 'wais': 210, 'ws': 80, 'wss': 443, 'xmpp': None} +SCHEME_PORT_MAP = { + "acap": 674, + "afp": 548, + "dict": 2628, + "dns": 53, + "file": None, + "ftp": 21, + "git": 9418, + "gopher": 70, + "http": 80, + "https": 443, + "imap": 143, + "ipp": 631, + "ipps": 631, + "irc": 194, + "ircs": 6697, + "ldap": 389, + "ldaps": 636, + "mms": 1755, + "msrp": 2855, + "msrps": None, + "mtqp": 1038, + "nfs": 111, + "nntp": 119, + "nntps": 563, + "pop": 110, + "prospero": 1525, + "redis": 6379, + "rsync": 873, + "rtsp": 554, + "rtsps": 322, + "rtspu": 5005, + "sftp": 22, + "smb": 445, + "snmp": 161, + "ssh": 22, + "steam": None, + "svn": 3690, + "telnet": 23, + "ventrilo": 3784, + "vnc": 5900, + "wais": 210, + "ws": 80, + "wss": 443, + "xmpp": None, +} # This list of schemes that don't use authorities is also from the link above. -NO_NETLOC_SCHEMES = set(['urn', 'about', 'bitcoin', 'blob', 'data', 'geo', - 'magnet', 'mailto', 'news', 'pkcs11', - 'sip', 'sips', 'tel']) +NO_NETLOC_SCHEMES = set( + [ + "urn", + "about", + "bitcoin", + "blob", + "data", + "geo", + "magnet", + "mailto", + "news", + "pkcs11", + "sip", + "sips", + "tel", + ] +) # As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc @@ -384,18 +491,22 @@ def register_scheme(text, uses_netloc=True, default_port=None): try: default_port = int(default_port) except (ValueError, TypeError): - raise ValueError('default_port expected integer or None, not %r' - % (default_port,)) + raise ValueError( + "default_port expected integer or None, not %r" + % (default_port,) + ) if uses_netloc is True: SCHEME_PORT_MAP[text] = default_port elif uses_netloc is False: if default_port is not None: - raise ValueError('unexpected default port while specifying' - ' non-netloc scheme: %r' % default_port) + raise ValueError( + "unexpected default port while specifying" + " non-netloc scheme: %r" % default_port + ) NO_NETLOC_SCHEMES.add(text) else: - raise ValueError('uses_netloc expected bool, not: %r' % uses_netloc) + raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc) return @@ -427,7 +538,7 @@ def scheme_uses_netloc(scheme, default=None): return True if scheme in NO_NETLOC_SCHEMES: return False - if scheme.split('+')[-1] in SCHEME_PORT_MAP: + if scheme.split("+")[-1] in SCHEME_PORT_MAP: return True return default @@ -436,6 +547,7 @@ class URLParseError(ValueError): """Exception inheriting from :exc:`ValueError`, raised when failing to parse a URL. Mostly raised on invalid ports and IPv6 addresses. """ + pass @@ -454,11 +566,12 @@ def _typecheck(name, value, *types): exception describing the problem using *name*. """ if not types: - raise ValueError('expected one or more types, maybe use _textcheck?') + raise ValueError("expected one or more types, maybe use _textcheck?") if not isinstance(value, types): - raise TypeError("expected %s for %s, got %r" - % (" or ".join([t.__name__ for t in types]), - name, value)) + raise TypeError( + "expected %s for %s, got %r" + % (" or ".join([t.__name__ for t in types]), name, value) + ) return value @@ -470,11 +583,13 @@ def _textcheck(name, value, delims=frozenset(), nullable=False): return value # type: ignore[unreachable] else: str_name = "unicode" if PY2 else "str" - exp = str_name + ' or NoneType' if nullable else str_name - raise TypeError('expected %s for %s, got %r' % (exp, name, value)) + exp = str_name + " or NoneType" if nullable else str_name + raise TypeError("expected %s for %s, got %r" % (exp, name, value)) if delims and set(value) & set(delims): # TODO: test caching into regexes - raise ValueError('one or more reserved delimiters %s present in %s: %r' - % (''.join(delims), name, value)) + raise ValueError( + "one or more reserved delimiters %s present in %s: %r" + % ("".join(delims), name, value) + ) return value # type: ignore[return-value] # T vs. Text @@ -492,27 +607,29 @@ def iter_pairs(iterable): return iter(iterable) -def _decode_unreserved( - text, normalize_case=False, encode_stray_percents=False -): +def _decode_unreserved(text, normalize_case=False, encode_stray_percents=False): # type: (Text, bool, bool) -> Text - return _percent_decode(text, normalize_case=normalize_case, - encode_stray_percents=encode_stray_percents, - _decode_map=_UNRESERVED_DECODE_MAP) + return _percent_decode( + text, + normalize_case=normalize_case, + encode_stray_percents=encode_stray_percents, + _decode_map=_UNRESERVED_DECODE_MAP, + ) def _decode_userinfo_part( text, normalize_case=False, encode_stray_percents=False ): # type: (Text, bool, bool) -> Text - return _percent_decode(text, normalize_case=normalize_case, - encode_stray_percents=encode_stray_percents, - _decode_map=_USERINFO_DECODE_MAP) + return _percent_decode( + text, + normalize_case=normalize_case, + encode_stray_percents=encode_stray_percents, + _decode_map=_USERINFO_DECODE_MAP, + ) -def _decode_path_part( - text, normalize_case=False, encode_stray_percents=False -): +def _decode_path_part(text, normalize_case=False, encode_stray_percents=False): # type: (Text, bool, bool) -> Text """ >>> _decode_path_part(u'%61%77%2f%7a') @@ -520,45 +637,55 @@ def _decode_path_part( >>> _decode_path_part(u'%61%77%2f%7a', normalize_case=True) u'aw%2Fz' """ - return _percent_decode(text, normalize_case=normalize_case, - encode_stray_percents=encode_stray_percents, - _decode_map=_PATH_DECODE_MAP) + return _percent_decode( + text, + normalize_case=normalize_case, + encode_stray_percents=encode_stray_percents, + _decode_map=_PATH_DECODE_MAP, + ) -def _decode_query_key( - text, normalize_case=False, encode_stray_percents=False -): +def _decode_query_key(text, normalize_case=False, encode_stray_percents=False): # type: (Text, bool, bool) -> Text - return _percent_decode(text, normalize_case=normalize_case, - encode_stray_percents=encode_stray_percents, - _decode_map=_QUERY_KEY_DECODE_MAP) + return _percent_decode( + text, + normalize_case=normalize_case, + encode_stray_percents=encode_stray_percents, + _decode_map=_QUERY_KEY_DECODE_MAP, + ) def _decode_query_value( text, normalize_case=False, encode_stray_percents=False ): # type: (Text, bool, bool) -> Text - return _percent_decode(text, normalize_case=normalize_case, - encode_stray_percents=encode_stray_percents, - _decode_map=_QUERY_VALUE_DECODE_MAP) + return _percent_decode( + text, + normalize_case=normalize_case, + encode_stray_percents=encode_stray_percents, + _decode_map=_QUERY_VALUE_DECODE_MAP, + ) def _decode_fragment_part( text, normalize_case=False, encode_stray_percents=False ): # type: (Text, bool, bool) -> Text - return _percent_decode(text, normalize_case=normalize_case, - encode_stray_percents=encode_stray_percents, - _decode_map=_FRAGMENT_DECODE_MAP) + return _percent_decode( + text, + normalize_case=normalize_case, + encode_stray_percents=encode_stray_percents, + _decode_map=_FRAGMENT_DECODE_MAP, + ) def _percent_decode( - text, # type: Text - normalize_case=False, # type: bool - subencoding="utf-8", # type: Text + text, # type: Text + normalize_case=False, # type: bool + subencoding="utf-8", # type: Text raise_subencoding_exc=False, # type: bool encode_stray_percents=False, # type: bool - _decode_map=_HEX_CHAR_MAP # type: Mapping[bytes, bytes] + _decode_map=_HEX_CHAR_MAP, # type: Mapping[bytes, bytes] ): # type: (...) -> Text """Convert percent-encoded text characters to their normal, @@ -594,7 +721,7 @@ def _percent_decode( except UnicodeEncodeError: return text - bits = quoted_bytes.split(b'%') + bits = quoted_bytes.split(b"%") if len(bits) == 1: return text @@ -609,20 +736,20 @@ def _percent_decode( except KeyError: pair_is_hex = hexpair in _HEX_CHAR_MAP if pair_is_hex or not encode_stray_percents: - append(b'%') + append(b"%") else: # if it's undecodable, treat as a real percent sign, # which is reserved (because it wasn't in the # context-aware _decode_map passed in), and should # stay in an encoded state. - append(b'%25') + append(b"%25") if normalize_case and pair_is_hex: append(hexpair.upper()) append(rest) else: append(item) - unquoted_bytes = b''.join(res) + unquoted_bytes = b"".join(res) try: return unquoted_bytes.decode(subencoding) @@ -679,7 +806,7 @@ def _decode_host(host): u'm\xe9hmoud.io' """ # noqa: E501 if not host: - return u'' + return u"" try: host_bytes = host.encode("ascii") except UnicodeEncodeError: @@ -713,16 +840,16 @@ def _resolve_dot_segments(path): segs = [] # type: List[Text] for seg in path: - if seg == u'.': + if seg == u".": pass - elif seg == u'..': + elif seg == u"..": if segs: segs.pop() else: segs.append(seg) - if list(path[-1:]) in ([u'.'], [u'..']): - segs.append(u'') + if list(path[-1:]) in ([u"."], [u".."]): + segs.append(u"") return segs @@ -747,13 +874,13 @@ def parse_host(host): True """ if not host: - return None, u'' + return None, u"" - if u':' in host: + if u":" in host: try: inet_pton(AF_INET6, host) except socket.error as se: - raise URLParseError('invalid IPv6 host: %r (%r)' % (host, se)) + raise URLParseError("invalid IPv6 host: %r (%r)" % (host, se)) except UnicodeEncodeError: pass # TODO: this can't be a real host right? else: @@ -815,9 +942,18 @@ class URL(object): that starts with a slash. userinfo (Text): The username or colon-separated username:password pair. - uses_netloc (bool): Indicates whether two slashes appear - between the scheme and the host (``http://eg.com`` vs - ``mailto:e@g.com``). Set automatically based on scheme. + uses_netloc (Optional[bool]): Indicates whether ``://`` (the "netloc + separator") will appear to separate the scheme from the *path* in + cases where no host is present. Setting this to ``True`` is a + non-spec-compliant affordance for the common practice of having URIs + that are *not* URLs (cannot have a 'host' part) but nevertheless use + the common ``://`` idiom that most people associate with URLs; + e.g. ``message:`` URIs like ``message://message-id`` being + equivalent to ``message:message-id``. This may be inferred based on + the scheme depending on whether :func:`register_scheme` has been + used to register the scheme and should not be passed directly unless + you know the scheme works like this and you know it has not been + registered. All of these parts are also exposed as read-only attributes of URL instances, along with several useful methods. @@ -828,33 +964,33 @@ class URL(object): def __init__( self, - scheme=None, # type: Optional[Text] - host=None, # type: Optional[Text] - path=(), # type: Iterable[Text] - query=(), # type: QueryParameters - fragment=u"", # type: Text - port=None, # type: Optional[int] - rooted=None, # type: Optional[bool] - userinfo=u"", # type: Text + scheme=None, # type: Optional[Text] + host=None, # type: Optional[Text] + path=(), # type: Iterable[Text] + query=(), # type: QueryParameters + fragment=u"", # type: Text + port=None, # type: Optional[int] + rooted=None, # type: Optional[bool] + userinfo=u"", # type: Text uses_netloc=None, # type: Optional[bool] ): # type: (...) -> None if host is not None and scheme is None: - scheme = u'http' # TODO: why + scheme = u"http" # TODO: why if port is None and scheme is not None: port = SCHEME_PORT_MAP.get(scheme) if host and query and not path: # per RFC 3986 6.2.3, "a URI that uses the generic syntax # for authority with an empty path should be normalized to # a path of '/'." - path = (u'',) + path = (u"",) # Now that we're done detecting whether they were passed, we can set # them to their defaults: if scheme is None: - scheme = u'' + scheme = u"" if host is None: - host = u'' + host = u"" if rooted is None: rooted = bool(host) @@ -862,35 +998,54 @@ class URL(object): self._scheme = _textcheck("scheme", scheme) if self._scheme: if not _SCHEME_RE.match(self._scheme): - raise ValueError('invalid scheme: %r. Only alphanumeric, "+",' - ' "-", and "." allowed. Did you meant to call' - ' %s.from_text()?' - % (self._scheme, self.__class__.__name__)) + raise ValueError( + 'invalid scheme: %r. Only alphanumeric, "+",' + ' "-", and "." allowed. Did you meant to call' + " %s.from_text()?" % (self._scheme, self.__class__.__name__) + ) - _, self._host = parse_host(_textcheck('host', host, '/?#@')) + _, self._host = parse_host(_textcheck("host", host, "/?#@")) if isinstance(path, Text): - raise TypeError("expected iterable of text for path, not: %r" - % (path,)) - self._path = tuple((_textcheck("path segment", segment, '/?#') - for segment in path)) + raise TypeError( + "expected iterable of text for path, not: %r" % (path,) + ) + self._path = tuple( + (_textcheck("path segment", segment, "/?#") for segment in path) + ) self._query = tuple( - (_textcheck("query parameter name", k, '&=#'), - _textcheck("query parameter value", v, '&#', nullable=True)) - for k, v in iter_pairs(query)) + ( + _textcheck("query parameter name", k, "&=#"), + _textcheck("query parameter value", v, "&#", nullable=True), + ) + for k, v in iter_pairs(query) + ) self._fragment = _textcheck("fragment", fragment) self._port = _typecheck("port", port, int, NoneType) self._rooted = _typecheck("rooted", rooted, bool) - self._userinfo = _textcheck("userinfo", userinfo, '/?#@') + self._userinfo = _textcheck("userinfo", userinfo, "/?#@") - uses_netloc = scheme_uses_netloc(self._scheme, uses_netloc) - self._uses_netloc = _typecheck("uses_netloc", - uses_netloc, bool, NoneType) - # fixup for rooted consistency - if self._host: + if uses_netloc is None: + uses_netloc = scheme_uses_netloc(self._scheme, uses_netloc) + self._uses_netloc = _typecheck( + "uses_netloc", uses_netloc, bool, NoneType + ) + will_have_authority = self._host or ( + self._port and self._port != SCHEME_PORT_MAP.get(scheme) + ) + if will_have_authority: + # fixup for rooted consistency; if there's any 'authority' + # represented in the textual URL, then the path must be rooted, and + # we're definitely using a netloc (there must be a ://). self._rooted = True - if (not self._rooted) and self._path and self._path[0] == '': + self._uses_netloc = True + if (not self._rooted) and self.path[:1] == (u"",): self._rooted = True self._path = self._path[1:] + if not will_have_authority and self._path and not self._rooted: + # If, after fixing up the path, there *is* a path and it *isn't* + # rooted, then we are definitely not using a netloc; if we did, it + # would make the path (erroneously) look like a hostname. + self._uses_netloc = False def get_decoded_url(self, lazy=False): # type: (bool) -> DecodedURL @@ -1006,6 +1161,8 @@ class URL(object): def uses_netloc(self): # type: () -> Optional[bool] """ + Indicates whether ``://`` (the "netloc separator") will appear to + separate the scheme from the *path* in cases where no host is present. """ return self._uses_netloc @@ -1015,7 +1172,7 @@ class URL(object): """ The user portion of :attr:`~hyperlink.URL.userinfo`. """ - return self.userinfo.split(u':')[0] + return self.userinfo.split(u":")[0] def authority(self, with_password=False, **kw): # type: (bool, Any) -> Text @@ -1037,12 +1194,12 @@ class URL(object): of the URL. """ # first, a bit of twisted compat - with_password = kw.pop('includeSecrets', with_password) + with_password = kw.pop("includeSecrets", with_password) if kw: - raise TypeError('got unexpected keyword arguments: %r' % kw.keys()) + raise TypeError("got unexpected keyword arguments: %r" % kw.keys()) host = self.host - if ':' in host: - hostport = ['[' + host + ']'] + if ":" in host: + hostport = ["[" + host + "]"] else: hostport = [self.host] if self.port != SCHEME_PORT_MAP.get(self.scheme): @@ -1051,7 +1208,7 @@ class URL(object): if self.userinfo: userinfo = self.userinfo if not with_password and u":" in userinfo: - userinfo = userinfo[:userinfo.index(u":") + 1] + userinfo = userinfo[: userinfo.index(u":") + 1] authority.append(userinfo) authority.append(u":".join(hostport)) return u"@".join(authority) @@ -1060,13 +1217,20 @@ class URL(object): # type: (Any) -> bool if not isinstance(other, self.__class__): return NotImplemented - for attr in ['scheme', 'userinfo', 'host', 'query', - 'fragment', 'port', 'uses_netloc', 'rooted']: + for attr in [ + "scheme", + "userinfo", + "host", + "query", + "fragment", + "port", + "uses_netloc", + "rooted", + ]: if getattr(self, attr) != getattr(other, attr): return False - if ( - self.path == other.path or - (self.path in _ROOT_PATHS and other.path in _ROOT_PATHS) + if self.path == other.path or ( + self.path in _ROOT_PATHS and other.path in _ROOT_PATHS ): return True return False @@ -1079,9 +1243,20 @@ class URL(object): def __hash__(self): # type: () -> int - return hash((self.__class__, self.scheme, self.userinfo, self.host, - self.path, self.query, self.fragment, self.port, - self.rooted, self.uses_netloc)) + return hash( + ( + self.__class__, + self.scheme, + self.userinfo, + self.host, + self.path, + self.query, + self.fragment, + self.port, + self.rooted, + self.uses_netloc, + ) + ) @property def absolute(self): @@ -1101,15 +1276,15 @@ class URL(object): def replace( self, - scheme=_UNSET, # type: Optional[Text] - host=_UNSET, # type: Optional[Text] - path=_UNSET, # type: Iterable[Text] - query=_UNSET, # type: QueryParameters - fragment=_UNSET, # type: Text - port=_UNSET, # type: Optional[int] - rooted=_UNSET, # type: Optional[bool] - userinfo=_UNSET, # type: Text - uses_netloc=_UNSET # type: Optional[bool] + scheme=_UNSET, # type: Optional[Text] + host=_UNSET, # type: Optional[Text] + path=_UNSET, # type: Iterable[Text] + query=_UNSET, # type: QueryParameters + fragment=_UNSET, # type: Text + port=_UNSET, # type: Optional[int] + rooted=_UNSET, # type: Optional[bool] + userinfo=_UNSET, # type: Text + uses_netloc=_UNSET, # type: Optional[bool] ): # type: (...) -> URL """:class:`URL` objects are immutable, which means that attributes @@ -1134,14 +1309,28 @@ class URL(object): slash. userinfo (Text): The username or colon-separated username:password pair. - uses_netloc (bool): Indicates whether two slashes appear between - the scheme and the host - (``http://eg.com`` vs ``mailto:e@g.com``) + uses_netloc (bool): Indicates whether ``://`` (the "netloc + separator") will appear to separate the scheme from the *path* + in cases where no host is present. Setting this to ``True`` is + a non-spec-compliant affordance for the common practice of + having URIs that are *not* URLs (cannot have a 'host' part) but + nevertheless use the common ``://`` idiom that most people + associate with URLs; e.g. ``message:`` URIs like + ``message://message-id`` being equivalent to + ``message:message-id``. This may be inferred based on the + scheme depending on whether :func:`register_scheme` has been + used to register the scheme and should not be passed directly + unless you know the scheme works like this and you know it has + not been registered. Returns: URL: A copy of the current :class:`URL`, with new values for parameters passed. """ + if scheme is not _UNSET and scheme != self.scheme: + # when changing schemes, reset the explicit uses_netloc preference + # to honor the new scheme. + uses_netloc = None return self.__class__( scheme=_optional(scheme, self.scheme), host=_optional(host, self.host), @@ -1151,7 +1340,7 @@ class URL(object): port=_optional(port, self.port), rooted=_optional(rooted, self.rooted), userinfo=_optional(userinfo, self.userinfo), - uses_netloc=_optional(uses_netloc, self.uses_netloc) + uses_netloc=_optional(uses_netloc, self.uses_netloc), ) @classmethod @@ -1185,41 +1374,41 @@ class URL(object): method only raises :class:`URLParseError` on invalid port and IPv6 values in the host portion of the URL. """ - um = _URL_RE.match(_textcheck('text', text)) + um = _URL_RE.match(_textcheck("text", text)) if um is None: - raise URLParseError('could not parse url: %r' % text) + raise URLParseError("could not parse url: %r" % text) gs = um.groupdict() - au_text = gs['authority'] or u'' + au_text = gs["authority"] or u"" au_m = _AUTHORITY_RE.match(au_text) if au_m is None: raise URLParseError( - 'invalid authority %r in url: %r' % (au_text, text) + "invalid authority %r in url: %r" % (au_text, text) ) au_gs = au_m.groupdict() - if au_gs['bad_host']: + if au_gs["bad_host"]: raise URLParseError( - 'invalid host %r in url: %r' % (au_gs['bad_host'], text) + "invalid host %r in url: %r" % (au_gs["bad_host"], text) ) - userinfo = au_gs['userinfo'] or u'' + userinfo = au_gs["userinfo"] or u"" - host = au_gs['ipv6_host'] or au_gs['plain_host'] - port = au_gs['port'] + host = au_gs["ipv6_host"] or au_gs["plain_host"] + port = au_gs["port"] if port is not None: try: port = int(port) # type: ignore[assignment] # FIXME, see below except ValueError: if not port: # TODO: excessive? - raise URLParseError('port must not be empty: %r' % au_text) - raise URLParseError('expected integer for port, not %r' % port) + raise URLParseError("port must not be empty: %r" % au_text) + raise URLParseError("expected integer for port, not %r" % port) - scheme = gs['scheme'] or u'' - fragment = gs['fragment'] or u'' - uses_netloc = bool(gs['_netloc_sep']) + scheme = gs["scheme"] or u"" + fragment = gs["fragment"] or u"" + uses_netloc = bool(gs["_netloc_sep"]) - if gs['path']: - path = tuple(gs['path'].split(u"/")) + if gs["path"]: + path = tuple(gs["path"].split(u"/")) if not path[0]: path = path[1:] rooted = True @@ -1228,24 +1417,39 @@ class URL(object): else: path = () rooted = bool(au_text) - if gs['query']: + if gs["query"]: query = tuple( ( qe.split(u"=", 1) # type: ignore[misc] - if u'=' in qe else (qe, None) + if u"=" in qe + else (qe, None) ) - for qe in gs['query'].split(u"&") + for qe in gs["query"].split(u"&") ) # type: QueryPairs else: query = () return cls( - scheme, host, path, query, fragment, + scheme, + host, + path, + query, + fragment, port, # type: ignore[arg-type] # FIXME, see above - rooted, userinfo, uses_netloc, + rooted, + userinfo, + uses_netloc, ) - def normalize(self, scheme=True, host=True, path=True, query=True, - fragment=True, userinfo=True, percents=True): + def normalize( + self, + scheme=True, + host=True, + path=True, + query=True, + fragment=True, + userinfo=True, + percents=True, + ): # type: (bool, bool, bool, bool, bool, bool, bool) -> URL """Return a new URL object with several standard normalizations applied: @@ -1286,30 +1490,34 @@ class URL(object): """ # noqa: E501 kw = {} # type: Dict[str, Any] if scheme: - kw['scheme'] = self.scheme.lower() + kw["scheme"] = self.scheme.lower() if host: - kw['host'] = self.host.lower() + kw["host"] = self.host.lower() def _dec_unres(target): # type: (Text) -> Text return _decode_unreserved( target, normalize_case=True, encode_stray_percents=percents ) + if path: if self.path: - kw['path'] = [ + kw["path"] = [ _dec_unres(p) for p in _resolve_dot_segments(self.path) ] else: - kw['path'] = (u'',) + kw["path"] = (u"",) if query: - kw['query'] = [(_dec_unres(k), _dec_unres(v) if v else v) - for k, v in self.query] + kw["query"] = [ + (_dec_unres(k), _dec_unres(v) if v else v) + for k, v in self.query + ] if fragment: - kw['fragment'] = _dec_unres(self.fragment) + kw["fragment"] = _dec_unres(self.fragment) if userinfo: - kw['userinfo'] = u':'.join([_dec_unres(p) - for p in self.userinfo.split(':', 1)]) + kw["userinfo"] = u":".join( + [_dec_unres(p) for p in self.userinfo.split(":", 1)] + ) return self.replace(**kw) @@ -1338,7 +1546,7 @@ class URL(object): return self segments = [ # type: ignore[assignment] # variable is tuple - _textcheck('path segment', s) for s in segments + _textcheck("path segment", s) for s in segments ] new_path = tuple(self.path) if self.path and self.path[-1] == u"": @@ -1359,11 +1567,11 @@ class URL(object): replaced by *segment*. Special characters such as ``/?#`` will be percent encoded. """ - _textcheck('path segment', segment) + _textcheck("path segment", segment) new_path = tuple(self.path)[:-1] + (_encode_path_part(segment),) return self.replace(path=new_path) - def click(self, href=u''): + def click(self, href=u""): # type: (Union[Text, URL]) -> URL """Resolve the given URL relative to this URL. @@ -1394,7 +1602,7 @@ class URL(object): # TODO: This error message is not completely accurate, # as URL objects are now also valid, but Twisted's # test suite (wrongly) relies on this exact message. - _textcheck('relative URL', href) + _textcheck("relative URL", href) clicked = URL.from_text(href) if clicked.absolute: return clicked @@ -1406,8 +1614,9 @@ class URL(object): # Schemes with relative paths are not well-defined. RFC 3986 calls # them a "loophole in prior specifications" that should be avoided, # or supported only for backwards compatibility. - raise NotImplementedError('absolute URI with rootless path: %r' - % (href,)) + raise NotImplementedError( + "absolute URI with rootless path: %r" % (href,) + ) else: if clicked.rooted: path = clicked.path @@ -1417,12 +1626,14 @@ class URL(object): path = self.path if not query: query = self.query - return self.replace(scheme=clicked.scheme or self.scheme, - host=clicked.host or self.host, - port=clicked.port or self.port, - path=_resolve_dot_segments(path), - query=query, - fragment=clicked.fragment) + return self.replace( + scheme=clicked.scheme or self.scheme, + host=clicked.host or self.host, + port=clicked.port or self.port, + path=_resolve_dot_segments(path), + query=query, + fragment=clicked.fragment, + ) def to_uri(self): # type: () -> URL @@ -1440,10 +1651,12 @@ class URL(object): hostname encoded, so that they are all in the standard US-ASCII range. """ - new_userinfo = u':'.join([_encode_userinfo_part(p) for p in - self.userinfo.split(':', 1)]) - new_path = _encode_path_parts(self.path, has_scheme=bool(self.scheme), - rooted=False, maximal=True) + new_userinfo = u":".join( + [_encode_userinfo_part(p) for p in self.userinfo.split(":", 1)] + ) + new_path = _encode_path_parts( + self.path, has_scheme=bool(self.scheme), rooted=False, maximal=True + ) new_host = ( self.host if not self.host @@ -1453,11 +1666,18 @@ class URL(object): userinfo=new_userinfo, host=new_host, path=new_path, - query=tuple([(_encode_query_key(k, maximal=True), - _encode_query_value(v, maximal=True) - if v is not None else None) - for k, v in self.query]), - fragment=_encode_fragment_part(self.fragment, maximal=True) + query=tuple( + [ + ( + _encode_query_key(k, maximal=True), + _encode_query_value(v, maximal=True) + if v is not None + else None, + ) + for k, v in self.query + ] + ), + fragment=_encode_fragment_part(self.fragment, maximal=True), ) def to_iri(self): @@ -1484,9 +1704,9 @@ class URL(object): URL: A new instance with its path segments, query parameters, and hostname decoded for display purposes. """ # noqa: E501 - new_userinfo = u':'.join([ - _decode_userinfo_part(p) for p in self.userinfo.split(':', 1) - ]) + new_userinfo = u":".join( + [_decode_userinfo_part(p) for p in self.userinfo.split(":", 1)] + ) host_text = _decode_host(self.host) return self.replace( @@ -1495,8 +1715,8 @@ class URL(object): path=[_decode_path_part(segment) for segment in self.path], query=tuple( ( - _decode_query_key(k), _decode_query_value(v) - if v is not None else None + _decode_query_key(k), + _decode_query_value(v) if v is not None else None, ) for k, v in self.query ), @@ -1531,23 +1751,29 @@ class URL(object): """ scheme = self.scheme authority = self.authority(with_password) - path = "/".join(_encode_path_parts( - self.path, - rooted=self.rooted, - has_scheme=bool(scheme), - has_authority=bool(authority), - maximal=False - )) + path = "/".join( + _encode_path_parts( + self.path, + rooted=self.rooted, + has_scheme=bool(scheme), + has_authority=bool(authority), + maximal=False, + ) + ) query_parts = [] for k, v in self.query: if v is None: query_parts.append(_encode_query_key(k, maximal=False)) else: - query_parts.append(u'='.join(( - _encode_query_key(k, maximal=False), - _encode_query_value(v, maximal=False) - ))) - query_string = u'&'.join(query_parts) + query_parts.append( + u"=".join( + ( + _encode_query_key(k, maximal=False), + _encode_query_value(v, maximal=False), + ) + ) + ) + query_string = u"&".join(query_parts) fragment = self.fragment @@ -1555,23 +1781,23 @@ class URL(object): _add = parts.append if scheme: _add(scheme) - _add(':') + _add(":") if authority: - _add('//') + _add("//") _add(authority) - elif (scheme and path[:2] != '//' and self.uses_netloc): - _add('//') + elif scheme and path[:2] != "//" and self.uses_netloc: + _add("//") if path: - if scheme and authority and path[:1] != '/': - _add('/') # relpaths with abs authorities auto get '/' + if scheme and authority and path[:1] != "/": + _add("/") # relpaths with abs authorities auto get '/' _add(path) if query_string: - _add('?') + _add("?") _add(query_string) if fragment: - _add('#') + _add("#") _add(fragment) - return u''.join(parts) + return u"".join(parts) def __repr__(self): # type: () -> str @@ -1579,7 +1805,7 @@ class URL(object): constituent parts, as well as being a valid argument to :func:`eval`. """ - return '%s.from_text(%r)' % (self.__class__.__name__, self.to_text()) + return "%s.from_text(%r)" % (self.__class__.__name__, self.to_text()) def _to_bytes(self): # type: () -> bytes @@ -1588,7 +1814,7 @@ class URL(object): requests, which automatically stringify URL parameters. See issue #49. """ - return self.to_uri().to_text().encode('ascii') + return self.to_uri().to_text().encode("ascii") if PY2: __str__ = _to_bytes @@ -1617,7 +1843,7 @@ class URL(object): except AttributeError: # object.__dir__ == AttributeError # pdw for py2 ret = dir(self.__class__) + list(self.__dict__.keys()) - ret = sorted(set(ret) - set(['fromText', 'asURI', 'asIRI', 'asText'])) + ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"])) return ret # # End Twisted Compat Code @@ -1667,8 +1893,9 @@ class URL(object): """ # Preserve the original position of the query key in the list q = [(k, v) for (k, v) in self.query if k != name] - idx = next((i for (i, (k, v)) in enumerate(self.query) - if k == name), -1) + idx = next( + (i for (i, (k, v)) in enumerate(self.query) if k == name), -1 + ) q[idx:idx] = [(name, value)] return self.replace(query=q) @@ -1696,9 +1923,9 @@ class URL(object): def remove( self, - name, # type: Text + name, # type: Text value=_UNSET, # type: Text - limit=None, # type: Optional[int] + limit=None, # type: Optional[int] ): # type: (...) -> URL """Make a new :class:`URL` instance with occurrences of the query @@ -1722,7 +1949,8 @@ class URL(object): nq = [(k, v) for (k, v) in self.query if k != name] else: nq = [ - (k, v) for (k, v) in self.query + (k, v) + for (k, v) in self.query if not (k == name and v == value) ] else: @@ -1730,9 +1958,9 @@ class URL(object): for k, v in self.query: if ( - k == name and - (value is _UNSET or v == value) and - removed_count < limit + k == name + and (value is _UNSET or v == value) + and removed_count < limit ): removed_count += 1 # drop it else: @@ -1764,6 +1992,7 @@ class DecodedURL(object): check for validity. Defaults to False. """ + def __init__(self, url, lazy=False): # type: (URL, bool) -> None self._url = url @@ -1810,7 +2039,7 @@ class DecodedURL(object): "Passthrough to :meth:`~hyperlink.URL.to_iri()`" return self._url.to_iri() - def click(self, href=u''): + def click(self, href=u""): # type: (Union[Text, URL, DecodedURL]) -> DecodedURL """Return a new DecodedURL wrapping the result of :meth:`~hyperlink.URL.click()` @@ -1838,15 +2067,25 @@ class DecodedURL(object): new_segs = [_encode_reserved(s) for s in segments] return self.__class__(self._url.child(*new_segs)) - def normalize(self, scheme=True, host=True, path=True, query=True, - fragment=True, userinfo=True, percents=True): + def normalize( + self, + scheme=True, + host=True, + path=True, + query=True, + fragment=True, + userinfo=True, + percents=True, + ): # type: (bool, bool, bool, bool, bool, bool, bool) -> DecodedURL """Return a new `DecodedURL` wrapping the result of :meth:`~hyperlink.URL.normalize()` """ - return self.__class__(self._url.normalize( - scheme, host, path, query, fragment, userinfo, percents - )) + return self.__class__( + self._url.normalize( + scheme, host, path, query, fragment, userinfo, percents + ) + ) @property def absolute(self): @@ -1877,24 +2116,30 @@ class DecodedURL(object): def path(self): # type: () -> Sequence[Text] if not hasattr(self, "_path"): - self._path = tuple([ - _percent_decode(p, raise_subencoding_exc=True) - for p in self._url.path - ]) + self._path = tuple( + [ + _percent_decode(p, raise_subencoding_exc=True) + for p in self._url.path + ] + ) return self._path @property def query(self): # type: () -> QueryPairs if not hasattr(self, "_query"): - self._query = cast(QueryPairs, tuple( + self._query = cast( + QueryPairs, tuple( - _percent_decode(x, raise_subencoding_exc=True) - if x is not None else None - for x in (k, v) - ) - for k, v in self._url.query - )) + tuple( + _percent_decode(x, raise_subencoding_exc=True) + if x is not None + else None + for x in (k, v) + ) + for k, v in self._url.query + ), + ) return self._query @property @@ -1914,9 +2159,9 @@ class DecodedURL(object): tuple( tuple( _percent_decode(p, raise_subencoding_exc=True) - for p in self._url.userinfo.split(':', 1) + for p in self._url.userinfo.split(":", 1) ) - ) + ), ) return self._userinfo @@ -1932,15 +2177,15 @@ class DecodedURL(object): def replace( self, - scheme=_UNSET, # type: Optional[Text] - host=_UNSET, # type: Optional[Text] - path=_UNSET, # type: Iterable[Text] - query=_UNSET, # type: QueryParameters - fragment=_UNSET, # type: Text - port=_UNSET, # type: Optional[int] - rooted=_UNSET, # type: Optional[bool] - userinfo=_UNSET, # type: Union[Tuple[str], Tuple[str, str]] - uses_netloc=_UNSET # type: Optional[bool] + scheme=_UNSET, # type: Optional[Text] + host=_UNSET, # type: Optional[Text] + path=_UNSET, # type: Iterable[Text] + query=_UNSET, # type: QueryParameters + fragment=_UNSET, # type: Text + port=_UNSET, # type: Optional[int] + rooted=_UNSET, # type: Optional[bool] + userinfo=_UNSET, # type: Union[Tuple[str], Tuple[str, str]] + uses_netloc=_UNSET, # type: Optional[bool] ): # type: (...) -> DecodedURL """While the signature is the same, this `replace()` differs a little @@ -1953,30 +2198,36 @@ class DecodedURL(object): if path is not _UNSET: path = tuple(_encode_reserved(p) for p in path) if query is not _UNSET: - query = cast(QueryPairs, tuple( + query = cast( + QueryPairs, tuple( - _encode_reserved(x) - if x is not None else None - for x in (k, v) - ) - for k, v in iter_pairs(query) - )) + tuple( + _encode_reserved(x) if x is not None else None + for x in (k, v) + ) + for k, v in iter_pairs(query) + ), + ) if userinfo is not _UNSET: if len(userinfo) > 2: - raise ValueError('userinfo expected sequence of ["user"] or' - ' ["user", "password"], got %r' % (userinfo,)) - userinfo_text = u':'.join([_encode_reserved(p) for p in userinfo]) + raise ValueError( + 'userinfo expected sequence of ["user"] or' + ' ["user", "password"], got %r' % (userinfo,) + ) + userinfo_text = u":".join([_encode_reserved(p) for p in userinfo]) else: userinfo_text = _UNSET - new_url = self._url.replace(scheme=scheme, - host=host, - path=path, - query=query, - fragment=fragment, - port=port, - rooted=rooted, - userinfo=userinfo_text, - uses_netloc=uses_netloc) + new_url = self._url.replace( + scheme=scheme, + host=host, + path=path, + query=query, + fragment=fragment, + port=port, + rooted=rooted, + userinfo=userinfo_text, + uses_netloc=uses_netloc, + ) return self.__class__(url=new_url) def get(self, name): @@ -2001,9 +2252,9 @@ class DecodedURL(object): def remove( self, - name, # type: Text + name, # type: Text value=_UNSET, # type: Text - limit=None, # type: Optional[int] + limit=None, # type: Optional[int] ): # type: (...) -> DecodedURL """Return a new DecodedURL with query parameter *name* removed. @@ -2016,16 +2267,17 @@ class DecodedURL(object): nq = [(k, v) for (k, v) in self.query if k != name] else: nq = [ - (k, v) for (k, v) in self.query + (k, v) + for (k, v) in self.query if not (k == name and v == value) ] else: nq, removed_count = [], 0 for k, v in self.query: if ( - k == name and - (value is _UNSET or v == value) and - removed_count < limit + k == name + and (value is _UNSET or v == value) + and removed_count < limit ): removed_count += 1 # drop it else: @@ -2036,7 +2288,7 @@ class DecodedURL(object): def __repr__(self): # type: () -> str cn = self.__class__.__name__ - return '%s(url=%r)' % (cn, self._url) + return "%s(url=%r)" % (cn, self._url) def __str__(self): # type: () -> str @@ -2058,9 +2310,20 @@ class DecodedURL(object): def __hash__(self): # type: () -> int - return hash((self.__class__, self.scheme, self.userinfo, self.host, - self.path, self.query, self.fragment, self.port, - self.rooted, self.uses_netloc)) + return hash( + ( + self.__class__, + self.scheme, + self.userinfo, + self.host, + self.path, + self.query, + self.fragment, + self.port, + self.rooted, + self.uses_netloc, + ) + ) # # Begin Twisted Compat Code asURI = to_uri @@ -2082,7 +2345,7 @@ class DecodedURL(object): except AttributeError: # object.__dir__ == AttributeError # pdw for py2 ret = dir(self.__class__) + list(self.__dict__.keys()) - ret = sorted(set(ret) - set(['fromText', 'asURI', 'asIRI', 'asText'])) + ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"])) return ret # # End Twisted Compat Code diff --git a/src/hyperlink/test/common.py b/src/hyperlink/test/common.py index f489266..1eec0db 100644 --- a/src/hyperlink/test/common.py +++ b/src/hyperlink/test/common.py @@ -6,12 +6,13 @@ class HyperlinkTestCase(TestCase): """This type mostly exists to provide a backwards-compatible assertRaises method for Python 2.6 testing. """ + def assertRaises( # type: ignore[override] self, expected_exception, # type: Type[BaseException] - callableObj=None, # type: Optional[Callable[..., Any]] - *args, # type: Any - **kwargs # type: Any + callableObj=None, # type: Optional[Callable[..., Any]] + *args, # type: Any + **kwargs # type: Any ): # type: (...) -> Any """Fail unless an exception of class expected_exception is raised diff --git a/src/hyperlink/test/test_common.py b/src/hyperlink/test/test_common.py index af495d8..6827d0b 100644 --- a/src/hyperlink/test/test_common.py +++ b/src/hyperlink/test/test_common.py @@ -39,8 +39,9 @@ class TestHyperlink(TestCase): called_with.append((args, kwargs)) raise _ExpectedException - self.hyperlink_test.assertRaises(_ExpectedException, - raisesExpected, 1, keyword=True) + self.hyperlink_test.assertRaises( + _ExpectedException, raisesExpected, 1, keyword=True + ) self.assertEqual(called_with, [((1,), {"keyword": True})]) def test_assertRaisesWithCallableUnexpectedException(self): @@ -55,8 +56,9 @@ class TestHyperlink(TestCase): raise _UnexpectedException try: - self.hyperlink_test.assertRaises(_ExpectedException, - doesNotRaiseExpected) + self.hyperlink_test.assertRaises( + _ExpectedException, doesNotRaiseExpected + ) except _UnexpectedException: pass @@ -72,8 +74,7 @@ class TestHyperlink(TestCase): pass try: - self.hyperlink_test.assertRaises(_ExpectedException, - doesNotRaise) + self.hyperlink_test.assertRaises(_ExpectedException, doesNotRaise) except AssertionError: pass diff --git a/src/hyperlink/test/test_decoded_url.py b/src/hyperlink/test/test_decoded_url.py index 3549107..7104bea 100644 --- a/src/hyperlink/test/test_decoded_url.py +++ b/src/hyperlink/test/test_decoded_url.py @@ -7,7 +7,7 @@ from .. import DecodedURL, URL from .._url import _percent_decode from .common import HyperlinkTestCase -BASIC_URL = 'http://example.com/#' +BASIC_URL = "http://example.com/#" TOTAL_URL = ( "https://%75%73%65%72:%00%00%00%00@xn--bcher-kva.ch:8080/" "a/nice%20nice/./path/?zot=23%25&zut#frég" @@ -15,27 +15,26 @@ TOTAL_URL = ( class TestURL(HyperlinkTestCase): - def test_durl_basic(self): # type: () -> None bdurl = DecodedURL.from_text(BASIC_URL) - assert bdurl.scheme == 'http' - assert bdurl.host == 'example.com' + assert bdurl.scheme == "http" + assert bdurl.host == "example.com" assert bdurl.port == 80 - assert bdurl.path == ('',) - assert bdurl.fragment == '' + assert bdurl.path == ("",) + assert bdurl.fragment == "" durl = DecodedURL.from_text(TOTAL_URL) - assert durl.scheme == 'https' - assert durl.host == 'bücher.ch' + assert durl.scheme == "https" + assert durl.host == "bücher.ch" assert durl.port == 8080 - assert durl.path == ('a', 'nice nice', '.', 'path', '') - assert durl.fragment == 'frég' - assert durl.get('zot') == ['23%'] + assert durl.path == ("a", "nice nice", ".", "path", "") + assert durl.fragment == "frég" + assert durl.get("zot") == ["23%"] - assert durl.user == 'user' - assert durl.userinfo == ('user', '\0\0\0\0') + assert durl.user == "user" + assert durl.userinfo == ("user", "\0\0\0\0") def test_passthroughs(self): # type: () -> None @@ -44,18 +43,18 @@ class TestURL(HyperlinkTestCase): # through to the underlying URL durl = DecodedURL.from_text(TOTAL_URL) - assert durl.sibling('te%t').path[-1] == 'te%t' - assert durl.child('../test2%').path[-1] == '../test2%' + assert durl.sibling("te%t").path[-1] == "te%t" + assert durl.child("../test2%").path[-1] == "../test2%" assert durl.child() == durl assert durl.child() is durl - assert durl.click('/').path[-1] == '' - assert durl.user == 'user' + assert durl.click("/").path[-1] == "" + assert durl.user == "user" - assert '.' in durl.path - assert '.' not in durl.normalize().path + assert "." in durl.path + assert "." not in durl.normalize().path - assert durl.to_uri().fragment == 'fr%C3%A9g' - assert ' ' in durl.to_iri().path[1] + assert durl.to_uri().fragment == "fr%C3%A9g" + assert " " in durl.to_iri().path[1] assert durl.to_text(with_password=True) == TOTAL_URL @@ -68,8 +67,8 @@ class TestURL(HyperlinkTestCase): assert durl2 == durl2.encoded_url.get_decoded_url(lazy=True) assert ( - str(DecodedURL.from_text(BASIC_URL).child(' ')) == - 'http://example.com/%20' + str(DecodedURL.from_text(BASIC_URL).child(" ")) + == "http://example.com/%20" ) assert not (durl == 1) @@ -78,46 +77,42 @@ class TestURL(HyperlinkTestCase): def test_repr(self): # type: () -> None durl = DecodedURL.from_text(TOTAL_URL) - assert repr(durl) == 'DecodedURL(url=' + repr(durl._url) + ')' + assert repr(durl) == "DecodedURL(url=" + repr(durl._url) + ")" def test_query_manipulation(self): # type: () -> None durl = DecodedURL.from_text(TOTAL_URL) - assert durl.get('zot') == ['23%'] - durl = durl.add(' ', 'space') - assert durl.get(' ') == ['space'] - durl = durl.set(' ', 'spa%ed') - assert durl.get(' ') == ['spa%ed'] + assert durl.get("zot") == ["23%"] + durl = durl.add(" ", "space") + assert durl.get(" ") == ["space"] + durl = durl.set(" ", "spa%ed") + assert durl.get(" ") == ["spa%ed"] durl = DecodedURL(url=durl.to_uri()) - assert durl.get(' ') == ['spa%ed'] - durl = durl.remove(' ') - assert durl.get(' ') == [] + assert durl.get(" ") == ["spa%ed"] + durl = durl.remove(" ") + assert durl.get(" ") == [] - durl = DecodedURL.from_text('/?%61rg=b&arg=c') - assert durl.get('arg') == ['b', 'c'] + durl = DecodedURL.from_text("/?%61rg=b&arg=c") + assert durl.get("arg") == ["b", "c"] - assert durl.set('arg', 'd').get('arg') == ['d'] + assert durl.set("arg", "d").get("arg") == ["d"] durl = DecodedURL.from_text( - u"https://example.com/a/b/?fóó=1&bar=2&fóó=3" + "https://example.com/a/b/?fóó=1&bar=2&fóó=3" ) - assert ( - durl.remove("fóó") == - DecodedURL.from_text("https://example.com/a/b/?bar=2") + assert durl.remove("fóó") == DecodedURL.from_text( + "https://example.com/a/b/?bar=2" ) - assert ( - durl.remove("fóó", value="1") == - DecodedURL.from_text("https://example.com/a/b/?bar=2&fóó=3") + assert durl.remove("fóó", value="1") == DecodedURL.from_text( + "https://example.com/a/b/?bar=2&fóó=3" ) - assert ( - durl.remove("fóó", limit=1) == - DecodedURL.from_text("https://example.com/a/b/?bar=2&fóó=3") + assert durl.remove("fóó", limit=1) == DecodedURL.from_text( + "https://example.com/a/b/?bar=2&fóó=3" ) - assert ( - durl.remove("fóó", value="1", limit=0) == - DecodedURL.from_text("https://example.com/a/b/?fóó=1&bar=2&fóó=3") + assert durl.remove("fóó", value="1", limit=0) == DecodedURL.from_text( + "https://example.com/a/b/?fóó=1&bar=2&fóó=3" ) def test_equality_and_hashability(self): @@ -153,15 +148,17 @@ class TestURL(HyperlinkTestCase): # type: () -> None durl = DecodedURL.from_text(TOTAL_URL) - durl2 = durl.replace(scheme=durl.scheme, - host=durl.host, - path=durl.path, - query=durl.query, - fragment=durl.fragment, - port=durl.port, - rooted=durl.rooted, - userinfo=durl.userinfo, - uses_netloc=durl.uses_netloc) + durl2 = durl.replace( + scheme=durl.scheme, + host=durl.host, + path=durl.path, + query=durl.query, + fragment=durl.fragment, + port=durl.port, + rooted=durl.rooted, + userinfo=durl.userinfo, + uses_netloc=durl.uses_netloc, + ) assert durl == durl2 @@ -171,7 +168,9 @@ class TestURL(HyperlinkTestCase): with self.assertRaises(ValueError): durl.replace( userinfo=( # type: ignore[arg-type] - 'user', 'pw', 'thiswillcauseafailure' + "user", + "pw", + "thiswillcauseafailure", ) ) return @@ -181,8 +180,8 @@ class TestURL(HyperlinkTestCase): durl = DecodedURL.from_text(TOTAL_URL) assert durl == DecodedURL.fromText(TOTAL_URL) - assert 'to_text' in dir(durl) - assert 'asText' not in dir(durl) + assert "to_text" in dir(durl) + assert "asText" not in dir(durl) assert durl.to_text() == durl.asText() def test_percent_decode_mixed(self): @@ -190,24 +189,24 @@ class TestURL(HyperlinkTestCase): # See https://github.com/python-hyper/hyperlink/pull/59 for a # nice discussion of the possibilities - assert _percent_decode('abcdé%C3%A9éfg') == 'abcdéééfg' + assert _percent_decode("abcdé%C3%A9éfg") == "abcdéééfg" # still allow percent encoding in the case of an error - assert _percent_decode('abcdé%C3éfg') == 'abcdé%C3éfg' + assert _percent_decode("abcdé%C3éfg") == "abcdé%C3éfg" # ...unless explicitly told otherwise with self.assertRaises(UnicodeDecodeError): - _percent_decode('abcdé%C3éfg', raise_subencoding_exc=True) + _percent_decode("abcdé%C3éfg", raise_subencoding_exc=True) # when not encodable as subencoding - assert _percent_decode('é%25é', subencoding='ascii') == 'é%25é' + assert _percent_decode("é%25é", subencoding="ascii") == "é%25é" def test_click_decoded_url(self): # type: () -> None durl = DecodedURL.from_text(TOTAL_URL) - durl_dest = DecodedURL.from_text('/tëst') + durl_dest = DecodedURL.from_text("/tëst") clicked = durl.click(durl_dest) assert clicked.host == durl.host assert clicked.path == durl_dest.path - assert clicked.path == ('tëst',) + assert clicked.path == ("tëst",) diff --git a/src/hyperlink/test/test_parse.py b/src/hyperlink/test/test_parse.py index 8fdbf35..66b0270 100644 --- a/src/hyperlink/test/test_parse.py +++ b/src/hyperlink/test/test_parse.py @@ -5,29 +5,28 @@ from __future__ import unicode_literals from .common import HyperlinkTestCase from hyperlink import parse, EncodedURL, DecodedURL -BASIC_URL = 'http://example.com/#' +BASIC_URL = "http://example.com/#" TOTAL_URL = ( "https://%75%73%65%72:%00%00%00%00@xn--bcher-kva.ch:8080" "/a/nice%20nice/./path/?zot=23%25&zut#frég" ) -UNDECODABLE_FRAG_URL = TOTAL_URL + '%C3' +UNDECODABLE_FRAG_URL = TOTAL_URL + "%C3" # the %C3 above percent-decodes to an unpaired \xc3 byte which makes this # invalid utf8 class TestURL(HyperlinkTestCase): - def test_parse(self): # type: () -> None purl = parse(TOTAL_URL) assert isinstance(purl, DecodedURL) - assert purl.user == 'user' - assert purl.get('zot') == ['23%'] - assert purl.fragment == 'frég' + assert purl.user == "user" + assert purl.get("zot") == ["23%"] + assert purl.fragment == "frég" purl2 = parse(TOTAL_URL, decoded=False) assert isinstance(purl2, EncodedURL) - assert purl2.get('zot') == ['23%25'] + assert purl2.get("zot") == ["23%25"] with self.assertRaises(UnicodeDecodeError): purl3 = parse(UNDECODABLE_FRAG_URL) diff --git a/src/hyperlink/test/test_scheme_registration.py b/src/hyperlink/test/test_scheme_registration.py index a8bbbef..f98109a 100644 --- a/src/hyperlink/test/test_scheme_registration.py +++ b/src/hyperlink/test/test_scheme_registration.py @@ -9,7 +9,6 @@ from .._url import register_scheme, URL class TestSchemeRegistration(HyperlinkTestCase): - def setUp(self): # type: () -> None self._orig_scheme_port_map = dict(_url.SCHEME_PORT_MAP) @@ -22,52 +21,52 @@ class TestSchemeRegistration(HyperlinkTestCase): def test_register_scheme_basic(self): # type: () -> None - register_scheme('deltron', uses_netloc=True, default_port=3030) + register_scheme("deltron", uses_netloc=True, default_port=3030) - u1 = URL.from_text('deltron://example.com') - assert u1.scheme == 'deltron' + u1 = URL.from_text("deltron://example.com") + assert u1.scheme == "deltron" assert u1.port == 3030 assert u1.uses_netloc is True # test netloc works even when the original gives no indication - u2 = URL.from_text('deltron:') - u2 = u2.replace(host='example.com') - assert u2.to_text() == 'deltron://example.com' + u2 = URL.from_text("deltron:") + u2 = u2.replace(host="example.com") + assert u2.to_text() == "deltron://example.com" # test default port means no emission - u3 = URL.from_text('deltron://example.com:3030') - assert u3.to_text() == 'deltron://example.com' + u3 = URL.from_text("deltron://example.com:3030") + assert u3.to_text() == "deltron://example.com" - register_scheme('nonetron', default_port=3031) - u4 = URL(scheme='nonetron') - u4 = u4.replace(host='example.com') - assert u4.to_text() == 'nonetron://example.com' + register_scheme("nonetron", default_port=3031) + u4 = URL(scheme="nonetron") + u4 = u4.replace(host="example.com") + assert u4.to_text() == "nonetron://example.com" def test_register_no_netloc_scheme(self): # type: () -> None - register_scheme('noloctron', uses_netloc=False) - u4 = URL(scheme='noloctron') + register_scheme("noloctron", uses_netloc=False) + u4 = URL(scheme="noloctron") u4 = u4.replace(path=("example", "path")) - assert u4.to_text() == 'noloctron:example/path' + assert u4.to_text() == "noloctron:example/path" def test_register_no_netloc_with_port(self): # type: () -> None with self.assertRaises(ValueError): - register_scheme('badnetlocless', uses_netloc=False, default_port=7) + register_scheme("badnetlocless", uses_netloc=False, default_port=7) def test_invalid_uses_netloc(self): # type: () -> None with self.assertRaises(ValueError): - register_scheme('badnetloc', uses_netloc=cast(bool, None)) + register_scheme("badnetloc", uses_netloc=cast(bool, None)) with self.assertRaises(ValueError): - register_scheme('badnetloc', uses_netloc=cast(bool, object())) + register_scheme("badnetloc", uses_netloc=cast(bool, object())) def test_register_invalid_uses_netloc(self): # type: () -> None with self.assertRaises(ValueError): - register_scheme('lol', uses_netloc=cast(bool, object())) + register_scheme("lol", uses_netloc=cast(bool, object())) def test_register_invalid_port(self): # type: () -> None with self.assertRaises(ValueError): - register_scheme('nope', default_port=cast(bool, object())) + register_scheme("nope", default_port=cast(bool, object())) diff --git a/src/hyperlink/test/test_url.py b/src/hyperlink/test/test_url.py index fa5c7bf..159d6a5 100644 --- a/src/hyperlink/test/test_url.py +++ b/src/hyperlink/test/test_url.py @@ -14,65 +14,63 @@ from .. import URL, URLParseError from .._url import inet_pton, SCHEME_PORT_MAP -PY2 = (sys.version_info[0] == 2) -unicode = type(u'') +PY2 = sys.version_info[0] == 2 +unicode = type("") BASIC_URL = "http://www.foo.com/a/nice/path/?zot=23&zut" # Examples from RFC 3986 section 5.4, Reference Resolution Examples -relativeLinkBaseForRFC3986 = 'http://a/b/c/d;p?q' +relativeLinkBaseForRFC3986 = "http://a/b/c/d;p?q" relativeLinkTestsForRFC3986 = [ # "Normal" # ('g:h', 'g:h'), # can't click on a scheme-having url without an abs path - ('g', 'http://a/b/c/g'), - ('./g', 'http://a/b/c/g'), - ('g/', 'http://a/b/c/g/'), - ('/g', 'http://a/g'), - ('//g', 'http://g'), - ('?y', 'http://a/b/c/d;p?y'), - ('g?y', 'http://a/b/c/g?y'), - ('#s', 'http://a/b/c/d;p?q#s'), - ('g#s', 'http://a/b/c/g#s'), - ('g?y#s', 'http://a/b/c/g?y#s'), - (';x', 'http://a/b/c/;x'), - ('g;x', 'http://a/b/c/g;x'), - ('g;x?y#s', 'http://a/b/c/g;x?y#s'), - ('', 'http://a/b/c/d;p?q'), - ('.', 'http://a/b/c/'), - ('./', 'http://a/b/c/'), - ('..', 'http://a/b/'), - ('../', 'http://a/b/'), - ('../g', 'http://a/b/g'), - ('../..', 'http://a/'), - ('../../', 'http://a/'), - ('../../g', 'http://a/g'), - + ("g", "http://a/b/c/g"), + ("./g", "http://a/b/c/g"), + ("g/", "http://a/b/c/g/"), + ("/g", "http://a/g"), + ("//g", "http://g"), + ("?y", "http://a/b/c/d;p?y"), + ("g?y", "http://a/b/c/g?y"), + ("#s", "http://a/b/c/d;p?q#s"), + ("g#s", "http://a/b/c/g#s"), + ("g?y#s", "http://a/b/c/g?y#s"), + (";x", "http://a/b/c/;x"), + ("g;x", "http://a/b/c/g;x"), + ("g;x?y#s", "http://a/b/c/g;x?y#s"), + ("", "http://a/b/c/d;p?q"), + (".", "http://a/b/c/"), + ("./", "http://a/b/c/"), + ("..", "http://a/b/"), + ("../", "http://a/b/"), + ("../g", "http://a/b/g"), + ("../..", "http://a/"), + ("../../", "http://a/"), + ("../../g", "http://a/g"), # Abnormal examples # ".." cannot be used to change the authority component of a URI. - ('../../../g', 'http://a/g'), - ('../../../../g', 'http://a/g'), - + ("../../../g", "http://a/g"), + ("../../../../g", "http://a/g"), # Only include "." and ".." when they are only part of a larger segment, # not by themselves. - ('/./g', 'http://a/g'), - ('/../g', 'http://a/g'), - ('g.', 'http://a/b/c/g.'), - ('.g', 'http://a/b/c/.g'), - ('g..', 'http://a/b/c/g..'), - ('..g', 'http://a/b/c/..g'), + ("/./g", "http://a/g"), + ("/../g", "http://a/g"), + ("g.", "http://a/b/c/g."), + (".g", "http://a/b/c/.g"), + ("g..", "http://a/b/c/g.."), + ("..g", "http://a/b/c/..g"), # Unnecessary or nonsensical forms of "." and "..". - ('./../g', 'http://a/b/g'), - ('./g/.', 'http://a/b/c/g/'), - ('g/./h', 'http://a/b/c/g/h'), - ('g/../h', 'http://a/b/c/h'), - ('g;x=1/./y', 'http://a/b/c/g;x=1/y'), - ('g;x=1/../y', 'http://a/b/c/y'), + ("./../g", "http://a/b/g"), + ("./g/.", "http://a/b/c/g/"), + ("g/./h", "http://a/b/c/g/h"), + ("g/../h", "http://a/b/c/h"), + ("g;x=1/./y", "http://a/b/c/g;x=1/y"), + ("g;x=1/../y", "http://a/b/c/y"), # Separating the reference's query and fragment components from the path. - ('g?y/./x', 'http://a/b/c/g?y/./x'), - ('g?y/../x', 'http://a/b/c/g?y/../x'), - ('g#s/./x', 'http://a/b/c/g#s/./x'), - ('g#s/../x', 'http://a/b/c/g#s/../x') + ("g?y/./x", "http://a/b/c/g?y/./x"), + ("g?y/../x", "http://a/b/c/g?y/../x"), + ("g#s/./x", "http://a/b/c/g#s/./x"), + ("g#s/../x", "http://a/b/c/g#s/../x"), ] @@ -95,52 +93,50 @@ ROUNDTRIP_TESTS = ( "http://(%2525)/(%2525)?(%2525)&(%2525)=(%2525)#(%2525)", "http://(%C3%A9)/(%C3%A9)?(%C3%A9)&(%C3%A9)=(%C3%A9)#(%C3%A9)", "?sslrootcert=/Users/glyph/Downloads/rds-ca-2015-root.pem&sslmode=verify", - # from boltons.urlutils' tests - - 'http://googlewebsite.com/e-shops.aspx', - 'http://example.com:8080/search?q=123&business=Nothing%20Special', - 'http://hatnote.com:9000/?arg=1&arg=2&arg=3', - 'https://xn--bcher-kva.ch', - 'http://xn--ggbla1c4e.xn--ngbc5azd/', - 'http://tools.ietf.org/html/rfc3986#section-3.4', + "http://googlewebsite.com/e-shops.aspx", + "http://example.com:8080/search?q=123&business=Nothing%20Special", + "http://hatnote.com:9000/?arg=1&arg=2&arg=3", + "https://xn--bcher-kva.ch", + "http://xn--ggbla1c4e.xn--ngbc5azd/", + "http://tools.ietf.org/html/rfc3986#section-3.4", # 'http://wiki:pedia@hatnote.com', - 'ftp://ftp.rfc-editor.org/in-notes/tar/RFCs0001-0500.tar.gz', - 'http://[1080:0:0:0:8:800:200C:417A]/index.html', - 'ssh://192.0.2.16:2222/', - 'https://[::101.45.75.219]:80/?hi=bye', - 'ldap://[::192.9.5.5]/dc=example,dc=com??sub?(sn=Jensen)', - 'mailto:me@example.com?to=me@example.com&body=hi%20http://wikipedia.org', - 'news:alt.rec.motorcycle', - 'tel:+1-800-867-5309', - 'urn:oasis:member:A00024:x', - ('magnet:?xt=urn:btih:1a42b9e04e122b97a5254e3df77ab3c4b7da725f&dn=Puppy%' - '20Linux%20precise-5.7.1.iso&tr=udp://tracker.openbittorrent.com:80&' - 'tr=udp://tracker.publicbt.com:80&tr=udp://tracker.istole.it:6969&' - 'tr=udp://tracker.ccc.de:80&tr=udp://open.demonii.com:1337'), - + "ftp://ftp.rfc-editor.org/in-notes/tar/RFCs0001-0500.tar.gz", + "http://[1080:0:0:0:8:800:200C:417A]/index.html", + "ssh://192.0.2.16:2222/", + "https://[::101.45.75.219]:80/?hi=bye", + "ldap://[::192.9.5.5]/dc=example,dc=com??sub?(sn=Jensen)", + "mailto:me@example.com?to=me@example.com&body=hi%20http://wikipedia.org", + "news:alt.rec.motorcycle", + "tel:+1-800-867-5309", + "urn:oasis:member:A00024:x", + ( + "magnet:?xt=urn:btih:1a42b9e04e122b97a5254e3df77ab3c4b7da725f&dn=Puppy%" + "20Linux%20precise-5.7.1.iso&tr=udp://tracker.openbittorrent.com:80&" + "tr=udp://tracker.publicbt.com:80&tr=udp://tracker.istole.it:6969&" + "tr=udp://tracker.ccc.de:80&tr=udp://open.demonii.com:1337" + ), # percent-encoded delimiters in percent-encodable fields - - 'https://%3A@example.com/', # colon in username - 'https://%40@example.com/', # at sign in username - 'https://%2f@example.com/', # slash in username - 'https://a:%3a@example.com/', # colon in password - 'https://a:%40@example.com/', # at sign in password - 'https://a:%2f@example.com/', # slash in password - 'https://a:%3f@example.com/', # question mark in password - 'https://example.com/%2F/', # slash in path - 'https://example.com/%3F/', # question mark in path - 'https://example.com/%23/', # hash in path - 'https://example.com/?%23=b', # hash in query param name - 'https://example.com/?%3D=b', # equals in query param name - 'https://example.com/?%26=b', # ampersand in query param name - 'https://example.com/?a=%23', # hash in query param value - 'https://example.com/?a=%26', # ampersand in query param value - 'https://example.com/?a=%3D', # equals in query param value + "https://%3A@example.com/", # colon in username + "https://%40@example.com/", # at sign in username + "https://%2f@example.com/", # slash in username + "https://a:%3a@example.com/", # colon in password + "https://a:%40@example.com/", # at sign in password + "https://a:%2f@example.com/", # slash in password + "https://a:%3f@example.com/", # question mark in password + "https://example.com/%2F/", # slash in path + "https://example.com/%3F/", # question mark in path + "https://example.com/%23/", # hash in path + "https://example.com/?%23=b", # hash in query param name + "https://example.com/?%3D=b", # equals in query param name + "https://example.com/?%26=b", # ampersand in query param name + "https://example.com/?a=%23", # hash in query param value + "https://example.com/?a=%26", # ampersand in query param value + "https://example.com/?a=%3D", # equals in query param value # double-encoded percent sign in all percent-encodable positions: "http://(%2525):(%2525)@example.com/(%2525)/?(%2525)=(%2525)#(%2525)", # colon in first part of schemeless relative url - 'first_seg_rel_path__colon%3Anotok/second_seg__colon%3Aok', + "first_seg_rel_path__colon%3Anotok/second_seg__colon%3Aok", ) @@ -156,10 +152,10 @@ class TestURL(HyperlinkTestCase): @param u: The L{URL} to test. """ - self.assertTrue(isinstance(u.scheme, unicode) or u.scheme is None, - repr(u)) - self.assertTrue(isinstance(u.host, unicode) or u.host is None, - repr(u)) + self.assertTrue( + isinstance(u.scheme, unicode) or u.scheme is None, repr(u) + ) + self.assertTrue(isinstance(u.host, unicode) or u.host is None, repr(u)) for seg in u.path: self.assertEqual(type(seg), unicode, repr(u)) for (_k, v) in u.query: @@ -169,14 +165,14 @@ class TestURL(HyperlinkTestCase): def assertURL( self, - u, # type: URL - scheme, # type: Text - host, # type: Text - path, # type: Iterable[Text] - query, # type: Iterable[Tuple[Text, Optional[Text]]] - fragment, # type: Text - port, # type: Optional[int] - userinfo='', # type: Text + u, # type: URL + scheme, # type: Text + host, # type: Text + path, # type: Iterable[Text] + query, # type: Iterable[Tuple[Text, Optional[Text]]] + fragment, # type: Text + port, # type: Optional[int] + userinfo="", # type: Text ): # type: (...) -> None """ @@ -198,10 +194,24 @@ class TestURL(HyperlinkTestCase): @param userinfo: The expected userinfo. """ - actual = (u.scheme, u.host, u.path, u.query, - u.fragment, u.port, u.userinfo) - expected = (scheme, host, tuple(path), tuple(query), - fragment, port, u.userinfo) + actual = ( + u.scheme, + u.host, + u.path, + u.query, + u.fragment, + u.port, + u.userinfo, + ) + expected = ( + scheme, + host, + tuple(path), + tuple(query), + fragment, + port, + u.userinfo, + ) self.assertEqual(actual, expected) def test_initDefaults(self): @@ -209,42 +219,45 @@ class TestURL(HyperlinkTestCase): """ L{URL} should have appropriate default values. """ + def check(u): # type: (URL) -> None self.assertUnicoded(u) - self.assertURL(u, 'http', '', [], [], '', 80, '') + self.assertURL(u, "http", "", [], [], "", 80, "") - check(URL('http', '')) - check(URL('http', '', [], [])) - check(URL('http', '', [], [], '')) + check(URL("http", "")) + check(URL("http", "", [], [])) + check(URL("http", "", [], [], "")) def test_init(self): # type: () -> None """ L{URL} should accept L{unicode} parameters. """ - u = URL('s', 'h', ['p'], [('k', 'v'), ('k', None)], 'f') + u = URL("s", "h", ["p"], [("k", "v"), ("k", None)], "f") self.assertUnicoded(u) - self.assertURL(u, 's', 'h', ['p'], [('k', 'v'), ('k', None)], - 'f', None) + self.assertURL(u, "s", "h", ["p"], [("k", "v"), ("k", None)], "f", None) - self.assertURL(URL('http', '\xe0', ['\xe9'], - [('\u03bb', '\u03c0')], '\u22a5'), - 'http', '\xe0', ['\xe9'], - [('\u03bb', '\u03c0')], '\u22a5', 80) + self.assertURL( + URL("http", "\xe0", ["\xe9"], [("\u03bb", "\u03c0")], "\u22a5"), + "http", + "\xe0", + ["\xe9"], + [("\u03bb", "\u03c0")], + "\u22a5", + 80, + ) def test_initPercent(self): # type: () -> None """ L{URL} should accept (and not interpret) percent characters. """ - u = URL('s', '%68', ['%70'], [('%6B', '%76'), ('%6B', None)], - '%66') + u = URL("s", "%68", ["%70"], [("%6B", "%76"), ("%6B", None)], "%66") self.assertUnicoded(u) - self.assertURL(u, - 's', '%68', ['%70'], - [('%6B', '%76'), ('%6B', None)], - '%66', None) + self.assertURL( + u, "s", "%68", ["%70"], [("%6B", "%76"), ("%6B", None)], "%66", None + ) def test_repr(self): # type: () -> None @@ -254,10 +267,16 @@ class TestURL(HyperlinkTestCase): easy to read. """ self.assertEqual( - repr(URL(scheme='http', host='foo', path=['bar'], - query=[('baz', None), ('k', 'v')], - fragment='frob')), - "URL.from_text(%s)" % (repr(u"http://foo/bar?baz&k=v#frob"),) + repr( + URL( + scheme="http", + host="foo", + path=["bar"], + query=[("baz", None), ("k", "v")], + fragment="frob", + ) + ), + "URL.from_text(%s)" % (repr("http://foo/bar?baz&k=v#frob"),), ) def test_from_text(self): @@ -302,8 +321,9 @@ class TestURL(HyperlinkTestCase): self.assertEqual(urlpath, URL.from_text(BASIC_URL)) self.assertNotEqual( urlpath, - URL.from_text('ftp://www.anotherinvaliddomain.com/' - 'foo/bar/baz/?zot=21&zut') + URL.from_text( + "ftp://www.anotherinvaliddomain.com/" "foo/bar/baz/?zot=21&zut" + ), ) def test_fragmentEquality(self): @@ -312,9 +332,11 @@ class TestURL(HyperlinkTestCase): An URL created with the empty string for a fragment compares equal to an URL created with an unspecified fragment. """ - self.assertEqual(URL(fragment=''), URL()) - self.assertEqual(URL.from_text(u"http://localhost/#"), - URL.from_text(u"http://localhost/")) + self.assertEqual(URL(fragment=""), URL()) + self.assertEqual( + URL.from_text("http://localhost/#"), + URL.from_text("http://localhost/"), + ) def test_child(self): # type: () -> None @@ -323,17 +345,21 @@ class TestURL(HyperlinkTestCase): or fragment. """ urlpath = URL.from_text(BASIC_URL) - self.assertEqual("http://www.foo.com/a/nice/path/gong?zot=23&zut", - urlpath.child('gong').to_text()) - self.assertEqual("http://www.foo.com/a/nice/path/gong%2F?zot=23&zut", - urlpath.child('gong/').to_text()) + self.assertEqual( + "http://www.foo.com/a/nice/path/gong?zot=23&zut", + urlpath.child("gong").to_text(), + ) + self.assertEqual( + "http://www.foo.com/a/nice/path/gong%2F?zot=23&zut", + urlpath.child("gong/").to_text(), + ) self.assertEqual( "http://www.foo.com/a/nice/path/gong%2Fdouble?zot=23&zut", - urlpath.child('gong/double').to_text() + urlpath.child("gong/double").to_text(), ) self.assertEqual( "http://www.foo.com/a/nice/path/gong%2Fdouble%2F?zot=23&zut", - urlpath.child('gong/double/').to_text() + urlpath.child("gong/double/").to_text(), ) def test_multiChild(self): @@ -342,9 +368,10 @@ class TestURL(HyperlinkTestCase): L{URL.child} receives multiple segments as C{*args} and appends each in turn. """ - url = URL.from_text('http://example.com/a/b') - self.assertEqual(url.child('c', 'd', 'e').to_text(), - 'http://example.com/a/b/c/d/e') + url = URL.from_text("http://example.com/a/b") + self.assertEqual( + url.child("c", "d", "e").to_text(), "http://example.com/a/b/c/d/e" + ) def test_childInitRoot(self): # type: () -> None @@ -352,7 +379,7 @@ class TestURL(HyperlinkTestCase): L{URL.child} of a L{URL} without a path produces a L{URL} with a single path segment. """ - childURL = URL(host=u"www.foo.com").child(u"c") + childURL = URL(host="www.foo.com").child("c") self.assertTrue(childURL.rooted) self.assertEqual("http://www.foo.com/c", childURL.to_text()) @@ -361,7 +388,7 @@ class TestURL(HyperlinkTestCase): """ L{URL.child} without any new segments returns the original L{URL}. """ - url = URL(host=u"www.foo.com") + url = URL(host="www.foo.com") self.assertEqual(url.child(), url) def test_sibling(self): @@ -373,14 +400,14 @@ class TestURL(HyperlinkTestCase): urlpath = URL.from_text(BASIC_URL) self.assertEqual( "http://www.foo.com/a/nice/path/sister?zot=23&zut", - urlpath.sibling('sister').to_text() + urlpath.sibling("sister").to_text(), ) # Use an url without trailing '/' to check child removal. url_text = "http://www.foo.com/a/nice/path?zot=23&zut" urlpath = URL.from_text(url_text) self.assertEqual( "http://www.foo.com/a/nice/sister?zot=23&zut", - urlpath.sibling('sister').to_text() + urlpath.sibling("sister").to_text(), ) def test_click(self): @@ -391,47 +418,59 @@ class TestURL(HyperlinkTestCase): """ urlpath = URL.from_text(BASIC_URL) # A null uri should be valid (return here). - self.assertEqual("http://www.foo.com/a/nice/path/?zot=23&zut", - urlpath.click("").to_text()) + self.assertEqual( + "http://www.foo.com/a/nice/path/?zot=23&zut", + urlpath.click("").to_text(), + ) # A simple relative path remove the query. - self.assertEqual("http://www.foo.com/a/nice/path/click", - urlpath.click("click").to_text()) + self.assertEqual( + "http://www.foo.com/a/nice/path/click", + urlpath.click("click").to_text(), + ) # An absolute path replace path and query. - self.assertEqual("http://www.foo.com/click", - urlpath.click("/click").to_text()) + self.assertEqual( + "http://www.foo.com/click", urlpath.click("/click").to_text() + ) # Replace just the query. - self.assertEqual("http://www.foo.com/a/nice/path/?burp", - urlpath.click("?burp").to_text()) + self.assertEqual( + "http://www.foo.com/a/nice/path/?burp", + urlpath.click("?burp").to_text(), + ) # One full url to another should not generate '//' between authority. # and path - self.assertTrue("//foobar" not in - urlpath.click('http://www.foo.com/foobar').to_text()) + self.assertTrue( + "//foobar" + not in urlpath.click("http://www.foo.com/foobar").to_text() + ) # From a url with no query clicking a url with a query, the query # should be handled properly. - u = URL.from_text('http://www.foo.com/me/noquery') - self.assertEqual('http://www.foo.com/me/17?spam=158', - u.click('/me/17?spam=158').to_text()) + u = URL.from_text("http://www.foo.com/me/noquery") + self.assertEqual( + "http://www.foo.com/me/17?spam=158", + u.click("/me/17?spam=158").to_text(), + ) # Check that everything from the path onward is removed when the click # link has no path. - u = URL.from_text('http://localhost/foo?abc=def') - self.assertEqual(u.click('http://www.python.org').to_text(), - 'http://www.python.org') + u = URL.from_text("http://localhost/foo?abc=def") + self.assertEqual( + u.click("http://www.python.org").to_text(), "http://www.python.org" + ) # https://twistedmatrix.com/trac/ticket/8184 - u = URL.from_text('http://hatnote.com/a/b/../c/./d/e/..') - res = 'http://hatnote.com/a/c/d/' - self.assertEqual(u.click('').to_text(), res) + u = URL.from_text("http://hatnote.com/a/b/../c/./d/e/..") + res = "http://hatnote.com/a/c/d/" + self.assertEqual(u.click("").to_text(), res) # test click default arg is same as empty string above self.assertEqual(u.click().to_text(), res) # test click on a URL instance - u = URL.fromText('http://localhost/foo/?abc=def') - u2 = URL.from_text('bar') + u = URL.fromText("http://localhost/foo/?abc=def") + u2 = URL.from_text("bar") u3 = u.click(u2) - self.assertEqual(u3.to_text(), 'http://localhost/foo/bar') + self.assertEqual(u3.to_text(), "http://localhost/foo/bar") def test_clickRFC3986(self): # type: () -> None @@ -448,8 +487,8 @@ class TestURL(HyperlinkTestCase): L{URL.click} should not accept schemes with relative paths. """ base = URL.from_text(relativeLinkBaseForRFC3986) - self.assertRaises(NotImplementedError, base.click, 'g:h') - self.assertRaises(NotImplementedError, base.click, 'http:h') + self.assertRaises(NotImplementedError, base.click, "g:h") + self.assertRaises(NotImplementedError, base.click, "http:h") def test_cloneUnchanged(self): # type: () -> None @@ -457,14 +496,18 @@ class TestURL(HyperlinkTestCase): Verify that L{URL.replace} doesn't change any of the arguments it is passed. """ - urlpath = URL.from_text('https://x:1/y?z=1#A') - self.assertEqual(urlpath.replace(urlpath.scheme, - urlpath.host, - urlpath.path, - urlpath.query, - urlpath.fragment, - urlpath.port), - urlpath) + urlpath = URL.from_text("https://x:1/y?z=1#A") + self.assertEqual( + urlpath.replace( + urlpath.scheme, + urlpath.host, + urlpath.path, + urlpath.query, + urlpath.fragment, + urlpath.port, + ), + urlpath, + ) self.assertEqual(urlpath.replace(), urlpath) def test_clickCollapse(self): @@ -474,21 +517,27 @@ class TestURL(HyperlinkTestCase): 5.2.4. """ tests = [ - ['http://localhost/', '.', 'http://localhost/'], - ['http://localhost/', '..', 'http://localhost/'], - ['http://localhost/a/b/c', '.', 'http://localhost/a/b/'], - ['http://localhost/a/b/c', '..', 'http://localhost/a/'], - ['http://localhost/a/b/c', './d/e', 'http://localhost/a/b/d/e'], - ['http://localhost/a/b/c', '../d/e', 'http://localhost/a/d/e'], - ['http://localhost/a/b/c', '/./d/e', 'http://localhost/d/e'], - ['http://localhost/a/b/c', '/../d/e', 'http://localhost/d/e'], - ['http://localhost/a/b/c/', '../../d/e/', - 'http://localhost/a/d/e/'], - ['http://localhost/a/./c', '../d/e', 'http://localhost/d/e'], - ['http://localhost/a/./c/', '../d/e', 'http://localhost/a/d/e'], - ['http://localhost/a/b/c/d', './e/../f/../g', - 'http://localhost/a/b/c/g'], - ['http://localhost/a/b/c', 'd//e', 'http://localhost/a/b/d//e'], + ["http://localhost/", ".", "http://localhost/"], + ["http://localhost/", "..", "http://localhost/"], + ["http://localhost/a/b/c", ".", "http://localhost/a/b/"], + ["http://localhost/a/b/c", "..", "http://localhost/a/"], + ["http://localhost/a/b/c", "./d/e", "http://localhost/a/b/d/e"], + ["http://localhost/a/b/c", "../d/e", "http://localhost/a/d/e"], + ["http://localhost/a/b/c", "/./d/e", "http://localhost/d/e"], + ["http://localhost/a/b/c", "/../d/e", "http://localhost/d/e"], + [ + "http://localhost/a/b/c/", + "../../d/e/", + "http://localhost/a/d/e/", + ], + ["http://localhost/a/./c", "../d/e", "http://localhost/d/e"], + ["http://localhost/a/./c/", "../d/e", "http://localhost/a/d/e"], + [ + "http://localhost/a/b/c/d", + "./e/../f/../g", + "http://localhost/a/b/c/g", + ], + ["http://localhost/a/b/c", "d//e", "http://localhost/a/b/d//e"], ] for start, click, expected in tests: actual = URL.from_text(start).click(click).to_text() @@ -500,7 +549,7 @@ class TestURL(HyperlinkTestCase): click=repr(click), actual=actual, expected=expected, - ) + ), ) def test_queryAdd(self): @@ -511,30 +560,36 @@ class TestURL(HyperlinkTestCase): self.assertEqual( "http://www.foo.com/a/nice/path/?foo=bar", URL.from_text("http://www.foo.com/a/nice/path/") - .add(u"foo", u"bar").to_text()) + .add("foo", "bar") + .to_text(), + ) self.assertEqual( "http://www.foo.com/?foo=bar", - URL(host=u"www.foo.com").add(u"foo", u"bar") - .to_text()) + URL(host="www.foo.com").add("foo", "bar").to_text(), + ) urlpath = URL.from_text(BASIC_URL) self.assertEqual( "http://www.foo.com/a/nice/path/?zot=23&zut&burp", - urlpath.add(u"burp").to_text()) + urlpath.add("burp").to_text(), + ) self.assertEqual( "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx", - urlpath.add(u"burp", u"xxx").to_text()) + urlpath.add("burp", "xxx").to_text(), + ) self.assertEqual( "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zing", - urlpath.add(u"burp", u"xxx").add(u"zing").to_text()) + urlpath.add("burp", "xxx").add("zing").to_text(), + ) # Note the inversion! self.assertEqual( "http://www.foo.com/a/nice/path/?zot=23&zut&zing&burp=xxx", - urlpath.add(u"zing").add(u"burp", u"xxx").to_text()) + urlpath.add("zing").add("burp", "xxx").to_text(), + ) # Note the two values for the same name. self.assertEqual( "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zot=32", - urlpath.add(u"burp", u"xxx").add(u"zot", '32') - .to_text()) + urlpath.add("burp", "xxx").add("zot", "32").to_text(), + ) def test_querySet(self): # type: () -> None @@ -544,17 +599,18 @@ class TestURL(HyperlinkTestCase): urlpath = URL.from_text(BASIC_URL) self.assertEqual( "http://www.foo.com/a/nice/path/?zot=32&zut", - urlpath.set(u"zot", '32').to_text()) + urlpath.set("zot", "32").to_text(), + ) # Replace name without value with name/value and vice-versa. self.assertEqual( "http://www.foo.com/a/nice/path/?zot&zut=itworked", - urlpath.set(u"zot").set(u"zut", u"itworked").to_text() + urlpath.set("zot").set("zut", "itworked").to_text(), ) # Q: what happens when the query has two values and we replace? # A: we replace both values with a single one self.assertEqual( "http://www.foo.com/a/nice/path/?zot=32&zut", - urlpath.add(u"zot", u"xxx").set(u"zot", '32').to_text() + urlpath.add("zot", "xxx").set("zot", "32").to_text(), ) def test_queryRemove(self): @@ -562,25 +618,24 @@ class TestURL(HyperlinkTestCase): """ L{URL.remove} removes instances of a query parameter. """ - url = URL.from_text(u"https://example.com/a/b/?foo=1&bar=2&foo=3") + url = URL.from_text("https://example.com/a/b/?foo=1&bar=2&foo=3") self.assertEqual( - url.remove(u"foo"), - URL.from_text(u"https://example.com/a/b/?bar=2") + url.remove("foo"), URL.from_text("https://example.com/a/b/?bar=2") ) self.assertEqual( - url.remove(name=u"foo", value=u"1"), - URL.from_text(u"https://example.com/a/b/?bar=2&foo=3") + url.remove(name="foo", value="1"), + URL.from_text("https://example.com/a/b/?bar=2&foo=3"), ) self.assertEqual( - url.remove(name=u"foo", limit=1), - URL.from_text(u"https://example.com/a/b/?bar=2&foo=3") + url.remove(name="foo", limit=1), + URL.from_text("https://example.com/a/b/?bar=2&foo=3"), ) self.assertEqual( - url.remove(name=u"foo", value=u"1", limit=0), - URL.from_text(u"https://example.com/a/b/?foo=1&bar=2&foo=3") + url.remove(name="foo", value="1", limit=0), + URL.from_text("https://example.com/a/b/?foo=1&bar=2&foo=3"), ) def test_parseEqualSignInParamValue(self): @@ -589,42 +644,42 @@ class TestURL(HyperlinkTestCase): Every C{=}-sign after the first in a query parameter is simply included in the value of the parameter. """ - u = URL.from_text('http://localhost/?=x=x=x') - self.assertEqual(u.get(''), ['x=x=x']) - self.assertEqual(u.to_text(), 'http://localhost/?=x=x=x') - u = URL.from_text('http://localhost/?foo=x=x=x&bar=y') - self.assertEqual(u.query, (('foo', 'x=x=x'), ('bar', 'y'))) - self.assertEqual(u.to_text(), 'http://localhost/?foo=x=x=x&bar=y') + u = URL.from_text("http://localhost/?=x=x=x") + self.assertEqual(u.get(""), ["x=x=x"]) + self.assertEqual(u.to_text(), "http://localhost/?=x=x=x") + u = URL.from_text("http://localhost/?foo=x=x=x&bar=y") + self.assertEqual(u.query, (("foo", "x=x=x"), ("bar", "y"))) + self.assertEqual(u.to_text(), "http://localhost/?foo=x=x=x&bar=y") u = URL.from_text( - 'https://example.com/?argument=3&argument=4&operator=%3D' + "https://example.com/?argument=3&argument=4&operator=%3D" ) iri = u.to_iri() - self.assertEqual(iri.get('operator'), ['=']) + self.assertEqual(iri.get("operator"), ["="]) # assert that the equals is not unnecessarily escaped - self.assertEqual(iri.to_uri().get('operator'), ['=']) + self.assertEqual(iri.to_uri().get("operator"), ["="]) def test_empty(self): # type: () -> None """ An empty L{URL} should serialize as the empty string. """ - self.assertEqual(URL().to_text(), '') + self.assertEqual(URL().to_text(), "") def test_justQueryText(self): # type: () -> None """ An L{URL} with query text should serialize as just query text. """ - u = URL(query=[(u"hello", u"world")]) - self.assertEqual(u.to_text(), '?hello=world') + u = URL(query=[("hello", "world")]) + self.assertEqual(u.to_text(), "?hello=world") def test_identicalEqual(self): # type: () -> None """ L{URL} compares equal to itself. """ - u = URL.from_text('http://localhost/') + u = URL.from_text("http://localhost/") self.assertEqual(u, u) def test_similarEqual(self): @@ -632,8 +687,8 @@ class TestURL(HyperlinkTestCase): """ URLs with equivalent components should compare equal. """ - u1 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f') - u2 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f') + u1 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f") + u2 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f") self.assertEqual(u1, u2) def test_differentNotEqual(self): @@ -642,8 +697,8 @@ class TestURL(HyperlinkTestCase): L{URL}s that refer to different resources are both unequal (C{!=}) and also not equal (not C{==}). """ - u1 = URL.from_text('http://localhost/a') - u2 = URL.from_text('http://localhost/b') + u1 = URL.from_text("http://localhost/a") + u2 = URL.from_text("http://localhost/b") self.assertFalse(u1 == u2, "%r != %r" % (u1, u2)) self.assertNotEqual(u1, u2) @@ -652,7 +707,7 @@ class TestURL(HyperlinkTestCase): """ L{URL} is not equal (C{==}) to other types. """ - u = URL.from_text('http://localhost/') + u = URL.from_text("http://localhost/") self.assertFalse(u == 42, "URL must not equal a number.") self.assertFalse(u == object(), "URL must not equal an object.") self.assertNotEqual(u, 42) @@ -663,7 +718,7 @@ class TestURL(HyperlinkTestCase): """ Identical L{URL}s are not unequal (C{!=}) to each other. """ - u = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f') + u = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f") self.assertFalse(u != u, "%r == itself" % u) def test_similarNotUnequal(self): @@ -671,8 +726,8 @@ class TestURL(HyperlinkTestCase): """ Structurally similar L{URL}s are not unequal (C{!=}) to each other. """ - u1 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f') - u2 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f') + u1 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f") + u2 = URL.from_text("http://u@localhost:8080/p/a/t/h?q=p#f") self.assertFalse(u1 != u2, "%r == %r" % (u1, u2)) def test_differentUnequal(self): @@ -680,8 +735,8 @@ class TestURL(HyperlinkTestCase): """ Structurally different L{URL}s are unequal (C{!=}) to each other. """ - u1 = URL.from_text('http://localhost/a') - u2 = URL.from_text('http://localhost/b') + u1 = URL.from_text("http://localhost/a") + u2 = URL.from_text("http://localhost/b") self.assertTrue(u1 != u2, "%r == %r" % (u1, u2)) def test_otherTypesUnequal(self): @@ -689,7 +744,7 @@ class TestURL(HyperlinkTestCase): """ L{URL} is unequal (C{!=}) to other types. """ - u = URL.from_text('http://localhost/') + u = URL.from_text("http://localhost/") self.assertTrue(u != 42, "URL must differ from a number.") self.assertTrue(u != object(), "URL must be differ from an object.") @@ -699,21 +754,25 @@ class TestURL(HyperlinkTestCase): L{URL.asURI} produces an URI which converts any URI unicode encoding into pure US-ASCII and returns a new L{URL}. """ - unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/' - '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}' - '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}=' - '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}' - '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}') + unicodey = ( + "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/" + "\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}" + "?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}=" + "\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}" + "#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}" + ) iri = URL.from_text(unicodey) uri = iri.asURI() - self.assertEqual(iri.host, '\N{LATIN SMALL LETTER E WITH ACUTE}.com') - self.assertEqual(iri.path[0], - '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}') + self.assertEqual(iri.host, "\N{LATIN SMALL LETTER E WITH ACUTE}.com") + self.assertEqual( + iri.path[0], "\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}" + ) self.assertEqual(iri.to_text(), unicodey) - expectedURI = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA' + expectedURI = "http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA" actualURI = uri.to_text() - self.assertEqual(actualURI, expectedURI, - '%r != %r' % (actualURI, expectedURI)) + self.assertEqual( + actualURI, expectedURI, "%r != %r" % (actualURI, expectedURI) + ) def test_asIRI(self): # type: () -> None @@ -721,20 +780,23 @@ class TestURL(HyperlinkTestCase): L{URL.asIRI} decodes any percent-encoded text in the URI, making it more suitable for reading by humans, and returns a new L{URL}. """ - asciiish = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA' + asciiish = "http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA" uri = URL.from_text(asciiish) iri = uri.asIRI() - self.assertEqual(uri.host, 'xn--9ca.com') - self.assertEqual(uri.path[0], '%C3%A9') + self.assertEqual(uri.host, "xn--9ca.com") + self.assertEqual(uri.path[0], "%C3%A9") self.assertEqual(uri.to_text(), asciiish) - expectedIRI = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/' - '\N{LATIN SMALL LETTER E WITH ACUTE}' - '?\N{LATIN SMALL LETTER A WITH ACUTE}=' - '\N{LATIN SMALL LETTER I WITH ACUTE}' - '#\N{LATIN SMALL LETTER U WITH ACUTE}') + expectedIRI = ( + "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/" + "\N{LATIN SMALL LETTER E WITH ACUTE}" + "?\N{LATIN SMALL LETTER A WITH ACUTE}=" + "\N{LATIN SMALL LETTER I WITH ACUTE}" + "#\N{LATIN SMALL LETTER U WITH ACUTE}" + ) actualIRI = iri.to_text() - self.assertEqual(actualIRI, expectedIRI, - '%r != %r' % (actualIRI, expectedIRI)) + self.assertEqual( + actualIRI, expectedIRI, "%r != %r" % (actualIRI, expectedIRI) + ) def test_badUTF8AsIRI(self): # type: () -> None @@ -742,26 +804,31 @@ class TestURL(HyperlinkTestCase): Bad UTF-8 in a path segment, query parameter, or fragment results in that portion of the URI remaining percent-encoded in the IRI. """ - urlWithBinary = 'http://xn--9ca.com/%00%FF/%C3%A9' + urlWithBinary = "http://xn--9ca.com/%00%FF/%C3%A9" uri = URL.from_text(urlWithBinary) iri = uri.asIRI() - expectedIRI = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/' - '%00%FF/' - '\N{LATIN SMALL LETTER E WITH ACUTE}') + expectedIRI = ( + "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/" + "%00%FF/" + "\N{LATIN SMALL LETTER E WITH ACUTE}" + ) actualIRI = iri.to_text() - self.assertEqual(actualIRI, expectedIRI, - '%r != %r' % (actualIRI, expectedIRI)) + self.assertEqual( + actualIRI, expectedIRI, "%r != %r" % (actualIRI, expectedIRI) + ) def test_alreadyIRIAsIRI(self): # type: () -> None """ A L{URL} composed of non-ASCII text will result in non-ASCII text. """ - unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/' - '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}' - '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}=' - '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}' - '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}') + unicodey = ( + "http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/" + "\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}" + "?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}=" + "\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}" + "#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}" + ) iri = URL.from_text(unicodey) alsoIRI = iri.asIRI() self.assertEqual(alsoIRI.to_text(), unicodey) @@ -771,7 +838,7 @@ class TestURL(HyperlinkTestCase): """ A L{URL} composed of encoded text will remain encoded. """ - expectedURI = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA' + expectedURI = "http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA" uri = URL.from_text(expectedURI) actualURI = uri.asURI().to_text() self.assertEqual(actualURI, expectedURI) @@ -783,18 +850,20 @@ class TestURL(HyperlinkTestCase): separately from the host and port. """ url = URL.from_text( - 'http://someuser:somepassword@example.com/some-segment@ignore' + "http://someuser:somepassword@example.com/some-segment@ignore" ) - self.assertEqual(url.authority(True), - 'someuser:somepassword@example.com') - self.assertEqual(url.authority(False), 'someuser:@example.com') - self.assertEqual(url.userinfo, 'someuser:somepassword') - self.assertEqual(url.user, 'someuser') - self.assertEqual(url.to_text(), - 'http://someuser:@example.com/some-segment@ignore') self.assertEqual( - url.replace(userinfo=u"someuser").to_text(), - 'http://someuser@example.com/some-segment@ignore' + url.authority(True), "someuser:somepassword@example.com" + ) + self.assertEqual(url.authority(False), "someuser:@example.com") + self.assertEqual(url.userinfo, "someuser:somepassword") + self.assertEqual(url.user, "someuser") + self.assertEqual( + url.to_text(), "http://someuser:@example.com/some-segment@ignore" + ) + self.assertEqual( + url.replace(userinfo="someuser").to_text(), + "http://someuser@example.com/some-segment@ignore", ) def test_portText(self): @@ -802,9 +871,9 @@ class TestURL(HyperlinkTestCase): """ L{URL.from_text} parses custom port numbers as integers. """ - portURL = URL.from_text(u"http://www.example.com:8080/") + portURL = URL.from_text("http://www.example.com:8080/") self.assertEqual(portURL.port, 8080) - self.assertEqual(portURL.to_text(), u"http://www.example.com:8080/") + self.assertEqual(portURL.to_text(), "http://www.example.com:8080/") def test_mailto(self): # type: () -> None @@ -814,8 +883,22 @@ class TestURL(HyperlinkTestCase): L{URL.from_text}/L{URL.to_text} round-trips cleanly for a C{mailto:} URL representing an email address. """ - self.assertEqual(URL.from_text(u"mailto:user@example.com").to_text(), - u"mailto:user@example.com") + self.assertEqual( + URL.from_text("mailto:user@example.com").to_text(), + "mailto:user@example.com", + ) + + def test_httpWithoutHost(self): + # type: () -> None + """ + An HTTP URL without a hostname, but with a path, should also round-trip + cleanly. + """ + without_host = URL.from_text("http:relative-path") + self.assertEqual(without_host.host, "") + self.assertEqual(without_host.path, ("relative-path",)) + self.assertEqual(without_host.uses_netloc, False) + self.assertEqual(without_host.to_text(), "http:relative-path") def test_queryIterable(self): # type: () -> None @@ -824,10 +907,10 @@ class TestURL(HyperlinkTestCase): argument is converted into an N-tuple of 2-tuples, sensibly handling dictionaries. """ - expected = (('alpha', 'beta'),) - url = URL(query=[('alpha', 'beta')]) + expected = (("alpha", "beta"),) + url = URL(query=[("alpha", "beta")]) self.assertEqual(url.query, expected) - url = URL(query={'alpha': 'beta'}) + url = URL(query={"alpha": "beta"}) self.assertEqual(url.query, expected) def test_pathIterable(self): @@ -836,8 +919,8 @@ class TestURL(HyperlinkTestCase): When a L{URL} is created with a C{path} argument, the C{path} is converted into a tuple. """ - url = URL(path=['hello', 'world']) - self.assertEqual(url.path, ('hello', 'world')) + url = URL(path=["hello", "world"]) + self.assertEqual(url.path, ("hello", "world")) def test_invalidArguments(self): # type: () -> None @@ -850,6 +933,7 @@ class TestURL(HyperlinkTestCase): bad data crops up in a method call long after the code that called the constructor is off the stack. """ + class Unexpected(object): def __str__(self): # type: () -> str @@ -863,10 +947,12 @@ class TestURL(HyperlinkTestCase): def assertRaised(raised, expectation, name): # type: (Any, Text, Text) -> None - self.assertEqual(str(raised.exception), - "expected {0} for {1}, got {2}".format( - expectation, - name, "")) + self.assertEqual( + str(raised.exception), + "expected {0} for {1}, got {2}".format( + expectation, name, "" + ), + ) def check(param, expectation=defaultExpectation): # type: (Any, str) -> None @@ -888,13 +974,14 @@ class TestURL(HyperlinkTestCase): assertRaised(raised, defaultExpectation, "path segment") with self.assertRaises(TypeError) as raised: - URL(query=[(u"name", cast(Text, Unexpected()))]) + URL(query=[("name", cast(Text, Unexpected()))]) - assertRaised(raised, defaultExpectation + " or NoneType", - "query parameter value") + assertRaised( + raised, defaultExpectation + " or NoneType", "query parameter value" + ) with self.assertRaises(TypeError) as raised: - URL(query=[(cast(Text, Unexpected()), u"value")]) + URL(query=[(cast(Text, Unexpected()), "value")]) assertRaised(raised, defaultExpectation, "query parameter name") # No custom error message for this one, just want to make sure @@ -904,10 +991,10 @@ class TestURL(HyperlinkTestCase): URL(query=[cast(Tuple[Text, Text], Unexpected())]) with self.assertRaises(ValueError): - URL(query=[cast(Tuple[Text, Text], ('k', 'v', 'vv'))]) + URL(query=[cast(Tuple[Text, Text], ("k", "v", "vv"))]) with self.assertRaises(ValueError): - URL(query=[cast(Tuple[Text, Text], ('k',))]) + URL(query=[cast(Tuple[Text, Text], ("k",))]) url = URL.from_text("https://valid.example.com/") with self.assertRaises(TypeError) as raised: @@ -928,154 +1015,175 @@ class TestURL(HyperlinkTestCase): you want. """ with self.assertRaises(TypeError) as raised: - URL(path='foo') + URL(path="foo") self.assertEqual( str(raised.exception), - "expected iterable of text for path, not: {0}".format(repr('foo')) + "expected iterable of text for path, not: {0}".format(repr("foo")), ) def test_netloc(self): # type: () -> None - url = URL(scheme='https') + url = URL(scheme="https") self.assertEqual(url.uses_netloc, True) + self.assertEqual(url.to_text(), "https://") + # scheme, no host, no path, no netloc hack + self.assertEqual(URL.from_text("https:").uses_netloc, False) + # scheme, no host, absolute path, no netloc hack + self.assertEqual(URL.from_text("https:/").uses_netloc, False) + # scheme, no host, no path, netloc hack to indicate :// syntax + self.assertEqual(URL.from_text("https://").uses_netloc, True) - url = URL(scheme='git+https') - self.assertEqual(url.uses_netloc, True) - - url = URL(scheme='mailto') + url = URL(scheme="https", uses_netloc=False) self.assertEqual(url.uses_netloc, False) + self.assertEqual(url.to_text(), "https:") - url = URL(scheme='ztp') + url = URL(scheme="git+https") + self.assertEqual(url.uses_netloc, True) + self.assertEqual(url.to_text(), "git+https://") + + url = URL(scheme="mailto") + self.assertEqual(url.uses_netloc, False) + self.assertEqual(url.to_text(), "mailto:") + + url = URL(scheme="ztp") self.assertEqual(url.uses_netloc, None) + self.assertEqual(url.to_text(), "ztp:") - url = URL.from_text('ztp://test.com') + url = URL.from_text("ztp://test.com") self.assertEqual(url.uses_netloc, True) - url = URL.from_text('ztp:test:com') + url = URL.from_text("ztp:test:com") self.assertEqual(url.uses_netloc, False) def test_ipv6_with_port(self): # type: () -> None - t = 'https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:80/' + t = "https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:80/" url = URL.from_text(t) - assert url.host == '2001:0db8:85a3:0000:0000:8a2e:0370:7334' + assert url.host == "2001:0db8:85a3:0000:0000:8a2e:0370:7334" assert url.port == 80 assert SCHEME_PORT_MAP[url.scheme] != url.port def test_basic(self): # type: () -> None - text = 'https://user:pass@example.com/path/to/here?k=v#nice' + text = "https://user:pass@example.com/path/to/here?k=v#nice" url = URL.from_text(text) - assert url.scheme == 'https' - assert url.userinfo == 'user:pass' - assert url.host == 'example.com' - assert url.path == ('path', 'to', 'here') - assert url.fragment == 'nice' + assert url.scheme == "https" + assert url.userinfo == "user:pass" + assert url.host == "example.com" + assert url.path == ("path", "to", "here") + assert url.fragment == "nice" - text = 'https://user:pass@127.0.0.1/path/to/here?k=v#nice' + text = "https://user:pass@127.0.0.1/path/to/here?k=v#nice" url = URL.from_text(text) - assert url.scheme == 'https' - assert url.userinfo == 'user:pass' - assert url.host == '127.0.0.1' - assert url.path == ('path', 'to', 'here') + assert url.scheme == "https" + assert url.userinfo == "user:pass" + assert url.host == "127.0.0.1" + assert url.path == ("path", "to", "here") - text = 'https://user:pass@[::1]/path/to/here?k=v#nice' + text = "https://user:pass@[::1]/path/to/here?k=v#nice" url = URL.from_text(text) - assert url.scheme == 'https' - assert url.userinfo == 'user:pass' - assert url.host == '::1' - assert url.path == ('path', 'to', 'here') + assert url.scheme == "https" + assert url.userinfo == "user:pass" + assert url.host == "::1" + assert url.path == ("path", "to", "here") def test_invalid_url(self): # type: () -> None - self.assertRaises(URLParseError, URL.from_text, '#\n\n') + self.assertRaises(URLParseError, URL.from_text, "#\n\n") def test_invalid_authority_url(self): # type: () -> None - self.assertRaises(URLParseError, URL.from_text, 'http://abc:\n\n/#') + self.assertRaises(URLParseError, URL.from_text, "http://abc:\n\n/#") def test_invalid_ipv6(self): # type: () -> None - invalid_ipv6_ips = ['2001::0234:C1ab::A0:aabc:003F', - '2001::1::3F', - ':', - '::::', - '::256.0.0.1'] + invalid_ipv6_ips = [ + "2001::0234:C1ab::A0:aabc:003F", + "2001::1::3F", + ":", + "::::", + "::256.0.0.1", + ] for ip in invalid_ipv6_ips: - url_text = 'http://[' + ip + ']' + url_text = "http://[" + ip + "]" self.assertRaises(socket.error, inet_pton, socket.AF_INET6, ip) self.assertRaises(URLParseError, URL.from_text, url_text) def test_invalid_port(self): # type: () -> None + self.assertRaises(URLParseError, URL.from_text, "ftp://portmouth:smash") self.assertRaises( - URLParseError, URL.from_text, 'ftp://portmouth:smash' + ValueError, + URL.from_text, + "http://reader.googlewebsite.com:neverforget", ) - self.assertRaises(ValueError, URL.from_text, - 'http://reader.googlewebsite.com:neverforget') def test_idna(self): # type: () -> None - u1 = URL.from_text('http://bücher.ch') - self.assertEqual(u1.host, 'bücher.ch') - self.assertEqual(u1.to_text(), 'http://bücher.ch') - self.assertEqual(u1.to_uri().to_text(), 'http://xn--bcher-kva.ch') + u1 = URL.from_text("http://bücher.ch") + self.assertEqual(u1.host, "bücher.ch") + self.assertEqual(u1.to_text(), "http://bücher.ch") + self.assertEqual(u1.to_uri().to_text(), "http://xn--bcher-kva.ch") - u2 = URL.from_text('https://xn--bcher-kva.ch') - self.assertEqual(u2.host, 'xn--bcher-kva.ch') - self.assertEqual(u2.to_text(), 'https://xn--bcher-kva.ch') - self.assertEqual(u2.to_iri().to_text(), u'https://bücher.ch') + u2 = URL.from_text("https://xn--bcher-kva.ch") + self.assertEqual(u2.host, "xn--bcher-kva.ch") + self.assertEqual(u2.to_text(), "https://xn--bcher-kva.ch") + self.assertEqual(u2.to_iri().to_text(), "https://bücher.ch") def test_netloc_slashes(self): # type: () -> None # basic sanity checks - url = URL.from_text('mailto:mahmoud@hatnote.com') - self.assertEqual(url.scheme, 'mailto') - self.assertEqual(url.to_text(), 'mailto:mahmoud@hatnote.com') + url = URL.from_text("mailto:mahmoud@hatnote.com") + self.assertEqual(url.scheme, "mailto") + self.assertEqual(url.to_text(), "mailto:mahmoud@hatnote.com") - url = URL.from_text('http://hatnote.com') - self.assertEqual(url.scheme, 'http') - self.assertEqual(url.to_text(), 'http://hatnote.com') + url = URL.from_text("http://hatnote.com") + self.assertEqual(url.scheme, "http") + self.assertEqual(url.to_text(), "http://hatnote.com") # test that unrecognized schemes stay consistent with '//' - url = URL.from_text('newscheme:a:b:c') - self.assertEqual(url.scheme, 'newscheme') - self.assertEqual(url.to_text(), 'newscheme:a:b:c') + url = URL.from_text("newscheme:a:b:c") + self.assertEqual(url.scheme, "newscheme") + self.assertEqual(url.to_text(), "newscheme:a:b:c") - url = URL.from_text('newerscheme://a/b/c') - self.assertEqual(url.scheme, 'newerscheme') - self.assertEqual(url.to_text(), 'newerscheme://a/b/c') + url = URL.from_text("newerscheme://a/b/c") + self.assertEqual(url.scheme, "newerscheme") + self.assertEqual(url.to_text(), "newerscheme://a/b/c") # test that reasonable guesses are made - url = URL.from_text('git+ftp://gitstub.biz/glyph/lefkowitz') - self.assertEqual(url.scheme, 'git+ftp') - self.assertEqual(url.to_text(), - 'git+ftp://gitstub.biz/glyph/lefkowitz') + url = URL.from_text("git+ftp://gitstub.biz/glyph/lefkowitz") + self.assertEqual(url.scheme, "git+ftp") + self.assertEqual(url.to_text(), "git+ftp://gitstub.biz/glyph/lefkowitz") - url = URL.from_text('what+mailto:freerealestate@enotuniq.org') - self.assertEqual(url.scheme, 'what+mailto') - self.assertEqual(url.to_text(), - 'what+mailto:freerealestate@enotuniq.org') + url = URL.from_text("what+mailto:freerealestate@enotuniq.org") + self.assertEqual(url.scheme, "what+mailto") + self.assertEqual( + url.to_text(), "what+mailto:freerealestate@enotuniq.org" + ) - url = URL(scheme='ztp', path=('x', 'y', 'z'), rooted=True) - self.assertEqual(url.to_text(), 'ztp:/x/y/z') + url = URL(scheme="ztp", path=("x", "y", "z"), rooted=True) + self.assertEqual(url.to_text(), "ztp:/x/y/z") # also works when the input doesn't include '//' - url = URL(scheme='git+ftp', path=('x', 'y', 'z', ''), - rooted=True, uses_netloc=True) + url = URL( + scheme="git+ftp", + path=("x", "y", "z", ""), + rooted=True, + uses_netloc=True, + ) # broken bc urlunsplit - self.assertEqual(url.to_text(), 'git+ftp:///x/y/z/') + self.assertEqual(url.to_text(), "git+ftp:///x/y/z/") # really why would this ever come up but ok - url = URL.from_text('file:///path/to/heck') - url2 = url.replace(scheme='mailto') - self.assertEqual(url2.to_text(), 'mailto:/path/to/heck') + url = URL.from_text("file:///path/to/heck") + url2 = url.replace(scheme="mailto") + self.assertEqual(url2.to_text(), "mailto:/path/to/heck") - url_text = 'unregisteredscheme:///a/b/c' + url_text = "unregisteredscheme:///a/b/c" url = URL.from_text(url_text) no_netloc_url = url.replace(uses_netloc=False) - self.assertEqual(no_netloc_url.to_text(), 'unregisteredscheme:/a/b/c') + self.assertEqual(no_netloc_url.to_text(), "unregisteredscheme:/a/b/c") netloc_url = url.replace(uses_netloc=True) self.assertEqual(netloc_url.to_text(), url_text) @@ -1087,10 +1195,10 @@ class TestURL(HyperlinkTestCase): On host-relative URLs, the C{rooted} flag can be updated to indicate that the path should no longer be treated as absolute. """ - a = URL(path=['hello']) - self.assertEqual(a.to_text(), 'hello') + a = URL(path=["hello"]) + self.assertEqual(a.to_text(), "hello") b = a.replace(rooted=True) - self.assertEqual(b.to_text(), '/hello') + self.assertEqual(b.to_text(), "/hello") self.assertNotEqual(a, b) def test_autorooted(self): @@ -1104,18 +1212,45 @@ class TestURL(HyperlinkTestCase): elided and it becomes rooted, because these cases are syntactically indistinguisable in real URL text. """ - relative_path_rooted = URL(path=['', 'foo'], rooted=False) + relative_path_rooted = URL(path=["", "foo"], rooted=False) self.assertEqual(relative_path_rooted.rooted, True) - relative_flag_rooted = URL(path=['foo'], rooted=True) + relative_flag_rooted = URL(path=["foo"], rooted=True) self.assertEqual(relative_flag_rooted.rooted, True) self.assertEqual(relative_path_rooted, relative_flag_rooted) - attempt_unrooted_absolute = URL(host="foo", path=['bar'], rooted=False) + attempt_unrooted_absolute = URL(host="foo", path=["bar"], rooted=False) normal_absolute = URL(host="foo", path=["bar"]) self.assertEqual(attempt_unrooted_absolute, normal_absolute) self.assertEqual(normal_absolute.rooted, True) self.assertEqual(attempt_unrooted_absolute.rooted, True) + def test_rooted_with_port_but_no_host(self): + # type: () -> None + """ + URLs which include a ``://`` netloc-separator for any reason are + inherently rooted, regardless of the value or presence of the + ``rooted`` constructor argument. + + They may include a netloc-separator because their constructor was + directly invoked with an explicit host or port, or because they were + parsed from a string which included the literal ``://`` separator. + """ + directly_constructed = URL(scheme="udp", port=4900, rooted=False) + directly_constructed_implict = URL(scheme="udp", port=4900) + directly_constructed_rooted = URL(scheme="udp", port=4900, rooted=True) + self.assertEqual(directly_constructed.rooted, True) + self.assertEqual(directly_constructed_implict.rooted, True) + self.assertEqual(directly_constructed_rooted.rooted, True) + parsed = URL.from_text("udp://:4900") + self.assertEqual(str(directly_constructed), str(parsed)) + self.assertEqual(str(directly_constructed_implict), str(parsed)) + self.assertEqual(directly_constructed.asText(), parsed.asText()) + self.assertEqual(directly_constructed, parsed) + self.assertEqual(directly_constructed, directly_constructed_implict) + self.assertEqual(directly_constructed, directly_constructed_rooted) + self.assertEqual(directly_constructed_implict, parsed) + self.assertEqual(directly_constructed_rooted, parsed) + def test_wrong_constructor(self): # type: () -> None with self.assertRaises(ValueError): @@ -1123,37 +1258,33 @@ class TestURL(HyperlinkTestCase): URL(BASIC_URL) with self.assertRaises(ValueError): # explicitly bad scheme not allowed - URL('HTTP_____more_like_imHoTTeP') + URL("HTTP_____more_like_imHoTTeP") def test_encoded_userinfo(self): # type: () -> None - url = URL.from_text('http://user:pass@example.com') - assert url.userinfo == 'user:pass' - url = url.replace(userinfo='us%20her:pass') + url = URL.from_text("http://user:pass@example.com") + assert url.userinfo == "user:pass" + url = url.replace(userinfo="us%20her:pass") iri = url.to_iri() assert ( - iri.to_text(with_password=True) == - 'http://us her:pass@example.com' + iri.to_text(with_password=True) == "http://us her:pass@example.com" ) + assert iri.to_text(with_password=False) == "http://us her:@example.com" assert ( - iri.to_text(with_password=False) == - 'http://us her:@example.com' - ) - assert ( - iri.to_uri().to_text(with_password=True) == - 'http://us%20her:pass@example.com' + iri.to_uri().to_text(with_password=True) + == "http://us%20her:pass@example.com" ) def test_hash(self): # type: () -> None url_map = {} - url1 = URL.from_text('http://blog.hatnote.com/ask?utm_source=geocity') + url1 = URL.from_text("http://blog.hatnote.com/ask?utm_source=geocity") assert hash(url1) == hash(url1) # sanity url_map[url1] = 1 - url2 = URL.from_text('http://blog.hatnote.com/ask') - url2 = url2.set('utm_source', 'geocity') + url2 = URL.from_text("http://blog.hatnote.com/ask") + url2 = url2.set("utm_source", "geocity") url_map[url2] = 2 @@ -1169,26 +1300,26 @@ class TestURL(HyperlinkTestCase): assert len(res) > 15 # twisted compat - assert 'fromText' not in res - assert 'asText' not in res - assert 'asURI' not in res - assert 'asIRI' not in res + assert "fromText" not in res + assert "asText" not in res + assert "asURI" not in res + assert "asIRI" not in res def test_twisted_compat(self): # type: () -> None - url = URL.fromText(u'http://example.com/a%20té%C3%A9st') - assert url.asText() == 'http://example.com/a%20té%C3%A9st' - assert url.asURI().asText() == 'http://example.com/a%20t%C3%A9%C3%A9st' + url = URL.fromText("http://example.com/a%20té%C3%A9st") + assert url.asText() == "http://example.com/a%20té%C3%A9st" + assert url.asURI().asText() == "http://example.com/a%20t%C3%A9%C3%A9st" # TODO: assert url.asIRI().asText() == u'http://example.com/a%20téést' def test_set_ordering(self): # type: () -> None # TODO - url = URL.from_text('http://example.com/?a=b&c') - url = url.set(u'x', u'x') - url = url.add(u'x', u'y') - assert url.to_text() == u'http://example.com/?a=b&x=x&c&x=y' + url = URL.from_text("http://example.com/?a=b&c") + url = url.set("x", "x") + url = url.add("x", "y") + assert url.to_text() == "http://example.com/?a=b&x=x&c&x=y" # Would expect: # assert url.to_text() == u'http://example.com/?a=b&c&x=x&x=y' @@ -1206,7 +1337,7 @@ class TestURL(HyperlinkTestCase): # test that colons are ok past the first segment u4 = URL.from_text("first-segment/urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob") u5 = u4.to_iri() - assert u5.to_text() == u'first-segment/urn:ietf:wg:oauth:2.0:oob' + assert u5.to_text() == "first-segment/urn:ietf:wg:oauth:2.0:oob" u6 = URL.from_text(u5.to_text()).to_uri() assert u5 == u6 # colons stay decoded bc they're not in the first seg @@ -1214,7 +1345,7 @@ class TestURL(HyperlinkTestCase): def test_emoji_domain(self): # type: () -> None "See issue #7, affecting only narrow builds (2.6-3.3)" - url = URL.from_text('https://xn--vi8hiv.ws') + url = URL.from_text("https://xn--vi8hiv.ws") iri = url.to_iri() iri.to_text() # as long as we don't get ValueErrors, we're good @@ -1222,118 +1353,120 @@ class TestURL(HyperlinkTestCase): def test_delim_in_param(self): # type: () -> None "Per issue #6 and #8" - self.assertRaises(ValueError, URL, scheme=u'http', host=u'a/c') - self.assertRaises(ValueError, URL, path=(u"?",)) - self.assertRaises(ValueError, URL, path=(u"#",)) - self.assertRaises(ValueError, URL, query=((u"&", "test"))) + self.assertRaises(ValueError, URL, scheme="http", host="a/c") + self.assertRaises(ValueError, URL, path=("?",)) + self.assertRaises(ValueError, URL, path=("#",)) + self.assertRaises(ValueError, URL, query=(("&", "test"))) def test_empty_paths_eq(self): # type: () -> None - u1 = URL.from_text('http://example.com/') - u2 = URL.from_text('http://example.com') + u1 = URL.from_text("http://example.com/") + u2 = URL.from_text("http://example.com") assert u1 == u2 - u1 = URL.from_text('http://example.com') - u2 = URL.from_text('http://example.com') + u1 = URL.from_text("http://example.com") + u2 = URL.from_text("http://example.com") assert u1 == u2 - u1 = URL.from_text('http://example.com') - u2 = URL.from_text('http://example.com/') + u1 = URL.from_text("http://example.com") + u2 = URL.from_text("http://example.com/") assert u1 == u2 - u1 = URL.from_text('http://example.com/') - u2 = URL.from_text('http://example.com/') + u1 = URL.from_text("http://example.com/") + u2 = URL.from_text("http://example.com/") assert u1 == u2 def test_from_text_type(self): # type: () -> None - assert URL.from_text(u'#ok').fragment == u'ok' # sanity - self.assertRaises(TypeError, URL.from_text, b'bytes://x.y.z') + assert URL.from_text("#ok").fragment == "ok" # sanity + self.assertRaises(TypeError, URL.from_text, b"bytes://x.y.z") self.assertRaises(TypeError, URL.from_text, object()) def test_from_text_bad_authority(self): # type: () -> None # bad ipv6 brackets - self.assertRaises(URLParseError, URL.from_text, 'http://[::1/') - self.assertRaises(URLParseError, URL.from_text, 'http://::1]/') - self.assertRaises(URLParseError, URL.from_text, 'http://[[::1]/') - self.assertRaises(URLParseError, URL.from_text, 'http://[::1]]/') + self.assertRaises(URLParseError, URL.from_text, "http://[::1/") + self.assertRaises(URLParseError, URL.from_text, "http://::1]/") + self.assertRaises(URLParseError, URL.from_text, "http://[[::1]/") + self.assertRaises(URLParseError, URL.from_text, "http://[::1]]/") # empty port - self.assertRaises(URLParseError, URL.from_text, 'http://127.0.0.1:') + self.assertRaises(URLParseError, URL.from_text, "http://127.0.0.1:") # non-integer port - self.assertRaises(URLParseError, URL.from_text, 'http://127.0.0.1:hi') + self.assertRaises(URLParseError, URL.from_text, "http://127.0.0.1:hi") # extra port colon (makes for an invalid host) - self.assertRaises(URLParseError, URL.from_text, 'http://127.0.0.1::80') + self.assertRaises(URLParseError, URL.from_text, "http://127.0.0.1::80") def test_normalize(self): # type: () -> None - url = URL.from_text('HTTP://Example.com/A%61/./../A%61?B%62=C%63#D%64') - assert url.get('Bb') == [] - assert url.get('B%62') == ['C%63'] + url = URL.from_text("HTTP://Example.com/A%61/./../A%61?B%62=C%63#D%64") + assert url.get("Bb") == [] + assert url.get("B%62") == ["C%63"] assert len(url.path) == 4 # test that most expected normalizations happen norm_url = url.normalize() - assert norm_url.scheme == 'http' - assert norm_url.host == 'example.com' - assert norm_url.path == ('Aa',) - assert norm_url.get('Bb') == ['Cc'] - assert norm_url.fragment == 'Dd' - assert norm_url.to_text() == 'http://example.com/Aa?Bb=Cc#Dd' + assert norm_url.scheme == "http" + assert norm_url.host == "example.com" + assert norm_url.path == ("Aa",) + assert norm_url.get("Bb") == ["Cc"] + assert norm_url.fragment == "Dd" + assert norm_url.to_text() == "http://example.com/Aa?Bb=Cc#Dd" # test that flags work - noop_norm_url = url.normalize(scheme=False, host=False, - path=False, query=False, fragment=False) + noop_norm_url = url.normalize( + scheme=False, host=False, path=False, query=False, fragment=False + ) assert noop_norm_url == url # test that empty paths get at least one slash - slashless_url = URL.from_text('http://example.io') + slashless_url = URL.from_text("http://example.io") slashful_url = slashless_url.normalize() - assert slashful_url.to_text() == 'http://example.io/' + assert slashful_url.to_text() == "http://example.io/" # test case normalization for percent encoding - delimited_url = URL.from_text('/a%2fb/cd%3f?k%3d=v%23#test') + delimited_url = URL.from_text("/a%2fb/cd%3f?k%3d=v%23#test") norm_delimited_url = delimited_url.normalize() - assert norm_delimited_url.to_text() == '/a%2Fb/cd%3F?k%3D=v%23#test' + assert norm_delimited_url.to_text() == "/a%2Fb/cd%3F?k%3D=v%23#test" # test invalid percent encoding during normalize assert ( - URL(path=('', '%te%sts')).normalize(percents=False).to_text() == - '/%te%sts' - ) - assert ( - URL(path=('', '%te%sts')).normalize().to_text() == '/%25te%25sts' + URL(path=("", "%te%sts")).normalize(percents=False).to_text() + == "/%te%sts" ) + assert URL(path=("", "%te%sts")).normalize().to_text() == "/%25te%25sts" percenty_url = URL( - scheme='ftp', path=['%%%', '%a%b'], query=[('%', '%%')], - fragment='%', userinfo='%:%', + scheme="ftp", + path=["%%%", "%a%b"], + query=[("%", "%%")], + fragment="%", + userinfo="%:%", ) assert ( - percenty_url.to_text(with_password=True) == - 'ftp://%:%@/%%%/%a%b?%=%%#%' + percenty_url.to_text(with_password=True) + == "ftp://%:%@/%%%/%a%b?%=%%#%" ) assert ( - percenty_url.normalize().to_text(with_password=True) == - 'ftp://%25:%25@/%25%25%25/%25a%25b?%25=%25%25#%25' + percenty_url.normalize().to_text(with_password=True) + == "ftp://%25:%25@/%25%25%25/%25a%25b?%25=%25%25#%25" ) def test_str(self): # type: () -> None # see also issue #49 - text = u'http://example.com/á/y%20a%20y/?b=%25' + text = "http://example.com/á/y%20a%20y/?b=%25" url = URL.from_text(text) assert unicode(url) == text - assert bytes(url) == b'http://example.com/%C3%A1/y%20a%20y/?b=%25' + assert bytes(url) == b"http://example.com/%C3%A1/y%20a%20y/?b=%25" if PY2: assert isinstance(str(url), bytes) @@ -1344,18 +1477,17 @@ class TestURL(HyperlinkTestCase): def test_idna_corners(self): # type: () -> None - url = URL.from_text(u'http://abé.com/') - assert url.to_iri().host == u'abé.com' - assert url.to_uri().host == u'xn--ab-cja.com' + url = URL.from_text("http://abé.com/") + assert url.to_iri().host == "abé.com" + assert url.to_uri().host == "xn--ab-cja.com" url = URL.from_text("http://ドメイン.テスト.co.jp#test") - assert url.to_iri().host == u'ドメイン.テスト.co.jp' - assert url.to_uri().host == u'xn--eckwd4c7c.xn--zckzah.co.jp' + assert url.to_iri().host == "ドメイン.テスト.co.jp" + assert url.to_uri().host == "xn--eckwd4c7c.xn--zckzah.co.jp" - assert url.to_uri().get_decoded_url().host == u'ドメイン.テスト.co.jp' + assert url.to_uri().get_decoded_url().host == "ドメイン.テスト.co.jp" - text = 'http://Example.com' + text = "http://Example.com" assert ( - URL.from_text(text).to_uri().get_decoded_url().host == - 'example.com' + URL.from_text(text).to_uri().get_decoded_url().host == "example.com" ) diff --git a/tox.ini b/tox.ini index bedef7f..610a618 100644 --- a/tox.ini +++ b/tox.ini @@ -1,17 +1,30 @@ [tox] envlist = - flake8, mypy - test-py{26,27,34,35,36,37,38,py,py3} + flake8, mypy, black + test-py{26,27,34,35,36,37,38,py2,py3} coverage_report - packaging docs + packaging skip_missing_interpreters = {tty:True:False} +[default] + +basepython = python3.8 + +deps = + idna==2.9 + +setenv = + PY_MODULE=hyperlink + + PYTHONPYCACHEPREFIX={envtmpdir}/pycache + + ## -# Build (default environment) +# Default environment: unit tests ## [testenv] @@ -31,52 +44,62 @@ basepython = pypy3: pypy3 deps = - test: coverage==4.5.4 # rq.filter: <5 - test-py27: hypothesis==4.43.3 + {[default]deps} + + {py26,py27}: typing==3.7.4.1 + + {py26,py27,py34}: pytest==4.6.9 + {py35,py36,py37,py38}: pytest==5.2.4 + py27: mock==3.0.5 + + {[testenv:coverage_report]deps} + pytest-cov==2.8.1 + # py34 isn't supported by hypothesis - test-py35: hypothesis==4.43.3 - test-py36: hypothesis==4.43.3 - test-py37: hypothesis==4.43.3 - test-py38: hypothesis==4.43.3 - test-py39: hypothesis==4.43.3 - test: idna==2.9 - test-py27: mock==3.0.5 - test: typing==3.7.4.1 - test: {py26,py27,py34}: pytest==4.6.9 - test: {py35,py36,py37,py38}: pytest==5.2.4 - test: pytest-cov==2.8.1 - -passenv = - # For Hypothesis settings - test: CI - - # See https://github.com/codecov/codecov-python/blob/master/README.md#using-tox - # And CI-specific docs: - # https://help.github.com/en/articles/virtual-environments-for-github-actions#default-environment-variables - # https://docs.travis-ci.com/user/environment-variables#default-environment-variables - # https://www.appveyor.com/docs/environment-variables/ - codecov: TOXENV CODECOV_* CI - codecov: GITHUB_* - codecov: TRAVIS TRAVIS_* - codecov: APPVEYOR APPVEYOR_* - - # Used in our AppVeyor config - codecov: OS + py27: hypothesis==4.43.3 # rq.filter: <4.44 + {py35,py36,py37,py38,py39,py2,py3}: hypothesis==5.8.4 setenv = - PY_MODULE=hyperlink - test: HYPOTHESIS_STORAGE_DIRECTORY={toxworkdir}/hypothesis - test: PYTHONPYCACHEPREFIX={envtmpdir}/pycache + {[default]setenv} test: COVERAGE_FILE={toxworkdir}/coverage.{envname} - {coverage_report,codecov}: COVERAGE_FILE={toxworkdir}/coverage - codecov: COVERAGE_XML={envlogdir}/coverage_report.xml commands = test: pytest --cov={env:PY_MODULE} --cov-report=term-missing:skip-covered --doctest-modules {posargs:src/{env:PY_MODULE}} +## +# Black code formatting +## + +[testenv:black] + +description = run Black (linter) + +basepython = {[default]basepython} + +skip_install = True + +deps = + black==19.10b0 + +setenv = + BLACK_LINT_ARGS=--check + +commands = + black {env:BLACK_LINT_ARGS:} setup.py src + + +[testenv:black-reformat] + +description = {[testenv:black]description} and reformat +basepython = {[testenv:black]basepython} +skip_install = {[testenv:black]skip_install} +deps = {[testenv:black]deps} +commands = {[testenv:black]commands} + + ## # Flake8 linting ## @@ -85,16 +108,16 @@ commands = description = run Flake8 (linter) -basepython = python3.8 +basepython = {[default]basepython} skip_install = True deps = flake8-bugbear==20.1.4 - #flake8-docstrings==1.5.0 flake8==3.7.9 mccabe==0.6.1 pep8-naming==0.10.0 + pycodestyle==2.5.0 pydocstyle==5.0.2 # pin pyflakes pending a release with https://github.com/PyCQA/pyflakes/pull/455 git+git://github.com/PyCQA/pyflakes@ffe9386#egg=pyflakes @@ -111,6 +134,8 @@ select = A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z show-source = True doctests = True +max-line-length = 80 + # Codes: http://flake8.pycqa.org/en/latest/user/error-codes.html ignore = # syntax error in type comment @@ -131,8 +156,8 @@ ignore = # variable in global scope should not be mixedCase N816, - # line break after binary operator - W504, + # line break before binary operator + W503, # End of list (allows last item to end with trailing ',') EOL @@ -149,13 +174,13 @@ application-import-names = deploy description = run Mypy (static type checker) -basepython = python3.8 - -skip_install = True +basepython = {[default]basepython} deps = mypy==0.770 + {[default]deps} + commands = mypy \ --config-file="{toxinidir}/tox.ini" \ @@ -182,11 +207,7 @@ warn_return_any = True warn_unreachable = True warn_unused_ignores = True -[mypy-hyperlink._url] # Don't complain about dependencies known to lack type hints -# 4 at time of writing (2020-20-01), so maybe disable this soon -allow_untyped_defs = True - [mypy-idna] ignore_missing_imports = True @@ -210,12 +231,19 @@ ignore_missing_imports = True description = generate coverage report -basepython = python +depends = test-py{26,27,34,35,36,37,38,py,py3} + +basepython = {[default]basepython} skip_install = True deps = - coverage==4.5.4 + coverage==4.5.4 # rq.filter: <5 # coverage 5.0 drops Python 3.4 support + +setenv = + {[default]setenv} + + COVERAGE_FILE={toxworkdir}/coverage commands = coverage combine @@ -231,17 +259,34 @@ commands = description = upload coverage to Codecov +depends = {[coverage_report]depends} + basepython = python skip_install = True deps = - coverage==4.5.4 + {[testenv:coverage_report]deps} codecov==2.0.22 -commands = - # Note documentation for CI variables in default environment's passenv +passenv = + # See https://github.com/codecov/codecov-python/blob/master/README.md#using-tox + # And CI-specific docs: + # https://help.github.com/en/articles/virtual-environments-for-github-actions#default-environment-variables + # https://docs.travis-ci.com/user/environment-variables#default-environment-variables + # https://www.appveyor.com/docs/environment-variables/ + TOXENV CODECOV_* CI + GITHUB_* + TRAVIS TRAVIS_* + APPVEYOR APPVEYOR_* +setenv = + {[testenv:coverage_report]setenv} + + COVERAGE_XML={envlogdir}/coverage_report.xml + +commands = + # Note documentation for CI variables in passenv above coverage combine coverage xml -o "{env:COVERAGE_XML}" codecov --file="{env:COVERAGE_XML}" --env \ @@ -261,28 +306,27 @@ commands = description = build documentation -basepython = python3.8 +basepython = {[default]basepython} deps = - Sphinx==2.3.1 + Sphinx==2.4.4 sphinx-rtd-theme==0.4.3 commands = sphinx-build \ -b html -d "{envtmpdir}/doctrees" \ "{toxinidir}/docs" \ - "{toxworkdir}/docs/html" + "{toxinidir}/htmldocs" [testenv:docs-auto] description = build documentation and rebuild automatically -basepython = python3.8 +basepython = {[default]basepython} deps = - Sphinx==2.2.2 - sphinx-rtd-theme==0.4.3 + {[testenv:docs]deps} sphinx-autobuild==0.7.1 commands = @@ -290,7 +334,7 @@ commands = -b html -d "{envtmpdir}/doctrees" \ --host=localhost \ "{toxinidir}/docs" \ - "{toxworkdir}/docs/html" + "{toxinidir}/htmldocs" ## @@ -301,7 +345,9 @@ commands = description = check for potential packaging problems -basepython = python +basepython = {[default]basepython} + +skip_install = True deps = check-manifest==0.41