Merge pull request #84 from python-hyper/strategies
Provide Hypothesis strategies
This commit is contained in:
commit
d31fb08609
|
@ -31,6 +31,7 @@ pip-log.txt
|
|||
|
||||
# Testing
|
||||
/.tox/
|
||||
/.hypothesis/
|
||||
nosetests.xml
|
||||
|
||||
# Coverage
|
||||
|
|
1
LICENSE
1
LICENSE
|
@ -5,6 +5,7 @@ Jean Paul Calderone
|
|||
Adi Roiban
|
||||
Amber Hawkie Brown
|
||||
Mahmoud Hashemi
|
||||
Wilfredo Sanchez Vega
|
||||
|
||||
and others that have contributed code to the public domain.
|
||||
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
include README.md LICENSE CHANGELOG.md tox.ini pyproject.toml .coveragerc Makefile pytest.ini .tox-coveragerc
|
||||
exclude TODO.md .appveyor.yml
|
||||
include README.md LICENSE CHANGELOG.md
|
||||
include tox.ini pytest.ini .coveragerc
|
||||
exclude TODO.md
|
||||
exclude .appveyor.yml
|
||||
|
||||
include src/hyperlink/idna-tables-properties.csv.gz
|
||||
|
||||
graft docs
|
||||
prune docs/_build
|
||||
|
|
|
@ -0,0 +1,320 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Hypothesis strategies.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
try:
|
||||
import hypothesis
|
||||
|
||||
del hypothesis
|
||||
except ImportError:
|
||||
from typing import Tuple
|
||||
|
||||
__all__ = () # type: Tuple[str, ...]
|
||||
else:
|
||||
from csv import reader as csv_reader
|
||||
from os.path import dirname, join
|
||||
from string import ascii_letters, digits
|
||||
from sys import maxunicode
|
||||
from typing import (
|
||||
Callable,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Text,
|
||||
TypeVar,
|
||||
cast,
|
||||
)
|
||||
from gzip import open as open_gzip
|
||||
|
||||
from . import DecodedURL, EncodedURL
|
||||
|
||||
from hypothesis import assume
|
||||
from hypothesis.strategies import (
|
||||
composite,
|
||||
integers,
|
||||
lists,
|
||||
sampled_from,
|
||||
text,
|
||||
)
|
||||
|
||||
from idna import IDNAError, check_label, encode as idna_encode
|
||||
|
||||
__all__ = (
|
||||
"decoded_urls",
|
||||
"encoded_urls",
|
||||
"hostname_labels",
|
||||
"hostnames",
|
||||
"idna_text",
|
||||
"paths",
|
||||
"port_numbers",
|
||||
)
|
||||
|
||||
T = TypeVar("T")
|
||||
DrawCallable = Callable[[Callable[..., T]], T]
|
||||
|
||||
try:
|
||||
unichr
|
||||
except NameError: # Py3
|
||||
unichr = chr # type: Callable[[int], Text]
|
||||
|
||||
def idna_characters():
|
||||
# type: () -> Text
|
||||
"""
|
||||
Returns a string containing IDNA characters.
|
||||
"""
|
||||
global _idnaCharacters
|
||||
|
||||
if not _idnaCharacters:
|
||||
result = []
|
||||
|
||||
# Data source "IDNA Derived Properties":
|
||||
# https://www.iana.org/assignments/idna-tables-6.3.0/
|
||||
# idna-tables-6.3.0.xhtml#idna-tables-properties
|
||||
dataFileName = join(
|
||||
dirname(__file__), "idna-tables-properties.csv.gz"
|
||||
)
|
||||
with open_gzip(dataFileName) as dataFile:
|
||||
reader = csv_reader(
|
||||
(line.decode("utf-8") for line in dataFile), delimiter=",",
|
||||
)
|
||||
next(reader) # Skip header row
|
||||
for row in reader:
|
||||
codes, prop, description = row
|
||||
|
||||
if prop != "PVALID":
|
||||
# CONTEXTO or CONTEXTJ are also allowed, but they come
|
||||
# with rules, so we're punting on those here.
|
||||
# See: https://tools.ietf.org/html/rfc5892
|
||||
continue
|
||||
|
||||
startEnd = row[0].split("-", 1)
|
||||
if len(startEnd) == 1:
|
||||
# No end of range given; use start
|
||||
startEnd.append(startEnd[0])
|
||||
start, end = (int(i, 16) for i in startEnd)
|
||||
|
||||
for i in range(start, end + 1):
|
||||
if i > maxunicode: # Happens using Py2 on Windows
|
||||
break
|
||||
result.append(unichr(i))
|
||||
|
||||
_idnaCharacters = u"".join(result)
|
||||
|
||||
return _idnaCharacters
|
||||
|
||||
_idnaCharacters = "" # type: Text
|
||||
|
||||
@composite
|
||||
def idna_text(draw, min_size=1, max_size=None):
|
||||
# type: (DrawCallable, int, Optional[int]) -> Text
|
||||
"""
|
||||
A strategy which generates IDNA-encodable text.
|
||||
|
||||
@param min_size: The minimum number of characters in the text.
|
||||
C{None} is treated as C{0}.
|
||||
|
||||
@param max_size: The maximum number of characters in the text.
|
||||
Use C{None} for an unbounded size.
|
||||
"""
|
||||
alphabet = idna_characters()
|
||||
|
||||
assert min_size >= 1
|
||||
|
||||
if max_size is not None:
|
||||
assert max_size >= 1
|
||||
|
||||
result = cast(
|
||||
Text,
|
||||
draw(text(min_size=min_size, max_size=max_size, alphabet=alphabet)),
|
||||
)
|
||||
|
||||
# FIXME: There should be a more efficient way to ensure we produce
|
||||
# valid IDNA text.
|
||||
try:
|
||||
idna_encode(result)
|
||||
except IDNAError:
|
||||
assume(False)
|
||||
|
||||
return result
|
||||
|
||||
@composite
|
||||
def port_numbers(draw, allow_zero=False):
|
||||
# type: (DrawCallable, bool) -> int
|
||||
"""
|
||||
A strategy which generates port numbers.
|
||||
|
||||
@param allow_zero: Whether to allow port C{0} as a possible value.
|
||||
"""
|
||||
if allow_zero:
|
||||
min_value = 0
|
||||
else:
|
||||
min_value = 1
|
||||
|
||||
return cast(int, draw(integers(min_value=min_value, max_value=65535)))
|
||||
|
||||
@composite
|
||||
def hostname_labels(draw, allow_idn=True):
|
||||
# type: (DrawCallable, bool) -> Text
|
||||
"""
|
||||
A strategy which generates host name labels.
|
||||
|
||||
@param allow_idn: Whether to allow non-ASCII characters as allowed by
|
||||
internationalized domain names (IDNs).
|
||||
"""
|
||||
if allow_idn:
|
||||
label = cast(Text, draw(idna_text(min_size=1, max_size=63)))
|
||||
|
||||
try:
|
||||
label.encode("ascii")
|
||||
except UnicodeEncodeError:
|
||||
# If the label doesn't encode to ASCII, then we need to check
|
||||
# the length of the label after encoding to punycode and adding
|
||||
# the xn-- prefix.
|
||||
while len(label.encode("punycode")) > 63 - len("xn--"):
|
||||
# Rather than bombing out, just trim from the end until it
|
||||
# is short enough, so hypothesis doesn't have to generate
|
||||
# new data.
|
||||
label = label[:-1]
|
||||
|
||||
else:
|
||||
label = cast(
|
||||
Text,
|
||||
draw(
|
||||
text(
|
||||
min_size=1,
|
||||
max_size=63,
|
||||
alphabet=Text(ascii_letters + digits + u"-"),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
# Filter invalid labels.
|
||||
# It would be better to reliably avoid generation of bogus labels in
|
||||
# the first place, but it's hard...
|
||||
try:
|
||||
check_label(label)
|
||||
except UnicodeError: # pragma: no cover (not always drawn)
|
||||
assume(False)
|
||||
|
||||
return label
|
||||
|
||||
@composite
|
||||
def hostnames(draw, allow_leading_digit=True, allow_idn=True):
|
||||
# type: (DrawCallable, bool, bool) -> Text
|
||||
"""
|
||||
A strategy which generates host names.
|
||||
|
||||
@param allow_leading_digit: Whether to allow a leading digit in host
|
||||
names; they were not allowed prior to RFC 1123.
|
||||
|
||||
@param allow_idn: Whether to allow non-ASCII characters as allowed by
|
||||
internationalized domain names (IDNs).
|
||||
"""
|
||||
# Draw first label, filtering out labels with leading digits if needed
|
||||
labels = [
|
||||
cast(
|
||||
Text,
|
||||
draw(
|
||||
hostname_labels(allow_idn=allow_idn).filter(
|
||||
lambda l: (
|
||||
True if allow_leading_digit else l[0] not in digits
|
||||
)
|
||||
)
|
||||
),
|
||||
)
|
||||
]
|
||||
# Draw remaining labels
|
||||
labels += cast(
|
||||
List[Text],
|
||||
draw(
|
||||
lists(
|
||||
hostname_labels(allow_idn=allow_idn),
|
||||
min_size=1,
|
||||
max_size=4,
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
# Trim off labels until the total host name length fits in 252
|
||||
# characters. This avoids having to filter the data.
|
||||
while sum(len(label) for label in labels) + len(labels) - 1 > 252:
|
||||
labels = labels[:-1]
|
||||
|
||||
return u".".join(labels)
|
||||
|
||||
def path_characters():
|
||||
# type: () -> str
|
||||
"""
|
||||
Returns a string containing valid URL path characters.
|
||||
"""
|
||||
global _path_characters
|
||||
|
||||
if _path_characters is None:
|
||||
|
||||
def chars():
|
||||
# type: () -> Iterable[Text]
|
||||
for i in range(maxunicode):
|
||||
c = unichr(i)
|
||||
|
||||
# Exclude reserved characters
|
||||
if c in "#/?":
|
||||
continue
|
||||
|
||||
# Exclude anything not UTF-8 compatible
|
||||
try:
|
||||
c.encode("utf-8")
|
||||
except UnicodeEncodeError:
|
||||
continue
|
||||
|
||||
yield c
|
||||
|
||||
_path_characters = "".join(chars())
|
||||
|
||||
return _path_characters
|
||||
|
||||
_path_characters = None # type: Optional[str]
|
||||
|
||||
@composite
|
||||
def paths(draw):
|
||||
# type: (DrawCallable) -> Sequence[Text]
|
||||
return cast(
|
||||
List[Text],
|
||||
draw(
|
||||
lists(text(min_size=1, alphabet=path_characters()), max_size=10)
|
||||
),
|
||||
)
|
||||
|
||||
@composite
|
||||
def encoded_urls(draw):
|
||||
# type: (DrawCallable) -> EncodedURL
|
||||
"""
|
||||
A strategy which generates L{EncodedURL}s.
|
||||
Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
|
||||
protocol-friendly URI.
|
||||
"""
|
||||
port = cast(Optional[int], draw(port_numbers(allow_zero=True)))
|
||||
host = cast(Text, draw(hostnames()))
|
||||
path = cast(Sequence[Text], draw(paths()))
|
||||
|
||||
if port == 0:
|
||||
port = None
|
||||
|
||||
return EncodedURL(
|
||||
scheme=cast(Text, draw(sampled_from((u"http", u"https")))),
|
||||
host=host,
|
||||
port=port,
|
||||
path=path,
|
||||
)
|
||||
|
||||
@composite
|
||||
def decoded_urls(draw):
|
||||
# type: (DrawCallable) -> DecodedURL
|
||||
"""
|
||||
A strategy which generates L{DecodedURL}s.
|
||||
Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
|
||||
protocol-friendly URI.
|
||||
"""
|
||||
return DecodedURL(draw(encoded_urls()))
|
Binary file not shown.
|
@ -0,0 +1,31 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Tests for hyperlink
|
||||
"""
|
||||
|
||||
__all = ()
|
||||
|
||||
|
||||
def _init_hypothesis():
|
||||
# type: () -> None
|
||||
from os import environ
|
||||
|
||||
if "CI" in environ:
|
||||
try:
|
||||
from hypothesis import HealthCheck, settings
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
settings.register_profile(
|
||||
"patience",
|
||||
settings(
|
||||
suppress_health_check=[
|
||||
HealthCheck.too_slow,
|
||||
HealthCheck.filter_too_much,
|
||||
]
|
||||
),
|
||||
)
|
||||
settings.load_profile("patience")
|
||||
|
||||
|
||||
_init_hypothesis()
|
|
@ -0,0 +1,214 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Tests for hyperlink.hypothesis.
|
||||
"""
|
||||
|
||||
try:
|
||||
import hypothesis
|
||||
|
||||
del hypothesis
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
from string import digits
|
||||
from typing import Sequence, Text
|
||||
|
||||
try:
|
||||
from unittest.mock import patch
|
||||
except ImportError:
|
||||
from mock import patch # type: ignore[misc]
|
||||
|
||||
from hypothesis import given, settings
|
||||
from hypothesis.strategies import SearchStrategy, data
|
||||
|
||||
from idna import IDNAError, check_label, encode as idna_encode
|
||||
|
||||
from .common import HyperlinkTestCase
|
||||
from .. import DecodedURL, EncodedURL
|
||||
from ..hypothesis import (
|
||||
DrawCallable,
|
||||
composite,
|
||||
decoded_urls,
|
||||
encoded_urls,
|
||||
hostname_labels,
|
||||
hostnames,
|
||||
idna_text,
|
||||
paths,
|
||||
port_numbers,
|
||||
)
|
||||
|
||||
class TestHypothesisStrategies(HyperlinkTestCase):
|
||||
"""
|
||||
Tests for hyperlink.hypothesis.
|
||||
"""
|
||||
|
||||
@given(idna_text())
|
||||
def test_idna_text_valid(self, text):
|
||||
# type: (Text) -> None
|
||||
"""
|
||||
idna_text() generates IDNA-encodable text.
|
||||
"""
|
||||
try:
|
||||
idna_encode(text)
|
||||
except IDNAError: # pragma: no cover
|
||||
raise AssertionError("Invalid IDNA text: {!r}".format(text))
|
||||
|
||||
@given(data())
|
||||
def test_idna_text_min_max(self, data):
|
||||
# type: (SearchStrategy) -> None
|
||||
"""
|
||||
idna_text() raises AssertionError if min_size is < 1.
|
||||
"""
|
||||
self.assertRaises(AssertionError, data.draw, idna_text(min_size=0))
|
||||
self.assertRaises(AssertionError, data.draw, idna_text(max_size=0))
|
||||
|
||||
@given(port_numbers())
|
||||
def test_port_numbers_bounds(self, port):
|
||||
# type: (int) -> None
|
||||
"""
|
||||
port_numbers() generates integers between 1 and 65535, inclusive.
|
||||
"""
|
||||
self.assertGreaterEqual(port, 1)
|
||||
self.assertLessEqual(port, 65535)
|
||||
|
||||
@given(port_numbers(allow_zero=True))
|
||||
def test_port_numbers_bounds_allow_zero(self, port):
|
||||
# type: (int) -> None
|
||||
"""
|
||||
port_numbers(allow_zero=True) generates integers between 0 and
|
||||
65535, inclusive.
|
||||
"""
|
||||
self.assertGreaterEqual(port, 0)
|
||||
self.assertLessEqual(port, 65535)
|
||||
|
||||
@given(hostname_labels())
|
||||
def test_hostname_labels_valid_idn(self, label):
|
||||
# type: (Text) -> None
|
||||
"""
|
||||
hostname_labels() generates IDN host name labels.
|
||||
"""
|
||||
try:
|
||||
check_label(label)
|
||||
idna_encode(label)
|
||||
except UnicodeError: # pragma: no cover
|
||||
raise AssertionError("Invalid IDN label: {!r}".format(label))
|
||||
|
||||
@given(data())
|
||||
@settings(max_examples=10)
|
||||
def test_hostname_labels_long_idn_punycode(self, data):
|
||||
# type: (SearchStrategy) -> None
|
||||
"""
|
||||
hostname_labels() handles case where idna_text() generates text
|
||||
that encoded to punycode ends up as longer than allowed.
|
||||
"""
|
||||
|
||||
@composite
|
||||
def mock_idna_text(draw, min_size, max_size):
|
||||
# type: (DrawCallable, int, int) -> Text
|
||||
# We want a string that does not exceed max_size, but when
|
||||
# encoded to punycode, does exceed max_size.
|
||||
# So use a unicode character that is larger when encoded,
|
||||
# "á" being a great example, and use it max_size times, which
|
||||
# will be max_size * 3 in size when encoded.
|
||||
return u"\N{LATIN SMALL LETTER A WITH ACUTE}" * max_size
|
||||
|
||||
with patch("hyperlink.hypothesis.idna_text", mock_idna_text):
|
||||
label = data.draw(hostname_labels())
|
||||
try:
|
||||
check_label(label)
|
||||
idna_encode(label)
|
||||
except UnicodeError: # pragma: no cover
|
||||
raise AssertionError(
|
||||
"Invalid IDN label: {!r}".format(label)
|
||||
)
|
||||
|
||||
@given(hostname_labels(allow_idn=False))
|
||||
def test_hostname_labels_valid_ascii(self, label):
|
||||
# type: (Text) -> None
|
||||
"""
|
||||
hostname_labels() generates a ASCII host name labels.
|
||||
"""
|
||||
try:
|
||||
check_label(label)
|
||||
label.encode("ascii")
|
||||
except UnicodeError: # pragma: no cover
|
||||
raise AssertionError("Invalid ASCII label: {!r}".format(label))
|
||||
|
||||
@given(hostnames())
|
||||
def test_hostnames_idn(self, hostname):
|
||||
# type: (Text) -> None
|
||||
"""
|
||||
hostnames() generates a IDN host names.
|
||||
"""
|
||||
try:
|
||||
for label in hostname.split(u"."):
|
||||
check_label(label)
|
||||
idna_encode(hostname)
|
||||
except UnicodeError: # pragma: no cover
|
||||
raise AssertionError(
|
||||
"Invalid IDN host name: {!r}".format(hostname)
|
||||
)
|
||||
|
||||
@given(hostnames(allow_leading_digit=False))
|
||||
def test_hostnames_idn_nolead(self, hostname):
|
||||
# type: (Text) -> None
|
||||
"""
|
||||
hostnames(allow_leading_digit=False) generates a IDN host names
|
||||
without leading digits.
|
||||
"""
|
||||
self.assertTrue(hostname == hostname.lstrip(digits))
|
||||
|
||||
@given(hostnames(allow_idn=False))
|
||||
def test_hostnames_ascii(self, hostname):
|
||||
# type: (Text) -> None
|
||||
"""
|
||||
hostnames() generates a ASCII host names.
|
||||
"""
|
||||
try:
|
||||
for label in hostname.split(u"."):
|
||||
check_label(label)
|
||||
hostname.encode("ascii")
|
||||
except UnicodeError: # pragma: no cover
|
||||
raise AssertionError(
|
||||
"Invalid ASCII host name: {!r}".format(hostname)
|
||||
)
|
||||
|
||||
@given(hostnames(allow_leading_digit=False, allow_idn=False))
|
||||
def test_hostnames_ascii_nolead(self, hostname):
|
||||
# type: (Text) -> None
|
||||
"""
|
||||
hostnames(allow_leading_digit=False, allow_idn=False) generates
|
||||
ASCII host names without leading digits.
|
||||
"""
|
||||
self.assertTrue(hostname == hostname.lstrip(digits))
|
||||
|
||||
@given(paths())
|
||||
def test_paths(self, path):
|
||||
# type: (Sequence[Text]) -> None
|
||||
"""
|
||||
paths() generates sequences of URL path components.
|
||||
"""
|
||||
text = u"/".join(path)
|
||||
try:
|
||||
text.encode("utf-8")
|
||||
except UnicodeError: # pragma: no cover
|
||||
raise AssertionError("Invalid URL path: {!r}".format(path))
|
||||
|
||||
for segment in path:
|
||||
self.assertNotIn("#/?", segment)
|
||||
|
||||
@given(encoded_urls())
|
||||
def test_encoded_urls(self, url):
|
||||
# type: (EncodedURL) -> None
|
||||
"""
|
||||
encoded_urls() generates EncodedURLs.
|
||||
"""
|
||||
self.assertIsInstance(url, EncodedURL)
|
||||
|
||||
@given(decoded_urls())
|
||||
def test_decoded_urls(self, url):
|
||||
# type: (DecodedURL) -> None
|
||||
"""
|
||||
decoded_urls() generates DecodedURLs.
|
||||
"""
|
||||
self.assertIsInstance(url, DecodedURL)
|
22
tox.ini
22
tox.ini
|
@ -58,10 +58,18 @@ deps =
|
|||
{[testenv:coverage_report]deps}
|
||||
pytest-cov==2.8.1
|
||||
|
||||
# For hypothesis. Note Python 3.4 isn't supported by hypothesis.
|
||||
py27: hypothesis==4.43.3 # rq.filter: <4.44
|
||||
{py35,py36,py37,py38,py39,py2,py3}: hypothesis==5.8.4
|
||||
py27: mock==3.0.5
|
||||
|
||||
setenv =
|
||||
{[default]setenv}
|
||||
|
||||
COVERAGE_FILE={toxworkdir}/coverage.{envname}
|
||||
HYPOTHESIS_STORAGE_DIRECTORY={toxworkdir}/hypothesis
|
||||
|
||||
passenv = CI
|
||||
|
||||
commands =
|
||||
pytest --cov={env:PY_MODULE} --cov-report=term-missing:skip-covered --doctest-modules {posargs:src/{env:PY_MODULE}}
|
||||
|
@ -205,8 +213,20 @@ warn_return_any = True
|
|||
warn_unreachable = True
|
||||
warn_unused_ignores = True
|
||||
|
||||
# DrawCallable is generic
|
||||
|
||||
[mypy-hyperlink.hypothesis]
|
||||
disallow_any_generics = False
|
||||
[mypy-hyperlink.test.test_hypothesis]
|
||||
disallow_any_generics = False
|
||||
|
||||
# Don't complain about dependencies known to lack type hints
|
||||
|
||||
[mypy-hypothesis]
|
||||
ignore_missing_imports = True
|
||||
[mypy-hypothesis.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-idna]
|
||||
ignore_missing_imports = True
|
||||
|
||||
|
@ -255,7 +275,7 @@ skip_install = True
|
|||
|
||||
deps =
|
||||
{[testenv:coverage_report]deps}
|
||||
codecov==2.0.22
|
||||
codecov==2.1.4
|
||||
|
||||
passenv =
|
||||
# See https://github.com/codecov/codecov-python/blob/master/README.md#using-tox
|
||||
|
|
Loading…
Reference in New Issue