Merge pull request #84 from python-hyper/strategies

Provide Hypothesis strategies
This commit is contained in:
Mahmoud Hashemi 2020-06-07 14:32:45 -07:00 committed by GitHub
commit d31fb08609
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 594 additions and 3 deletions

1
.gitignore vendored
View File

@ -31,6 +31,7 @@ pip-log.txt
# Testing
/.tox/
/.hypothesis/
nosetests.xml
# Coverage

View File

@ -5,6 +5,7 @@ Jean Paul Calderone
Adi Roiban
Amber Hawkie Brown
Mahmoud Hashemi
Wilfredo Sanchez Vega
and others that have contributed code to the public domain.

View File

@ -1,5 +1,9 @@
include README.md LICENSE CHANGELOG.md tox.ini pyproject.toml .coveragerc Makefile pytest.ini .tox-coveragerc
exclude TODO.md .appveyor.yml
include README.md LICENSE CHANGELOG.md
include tox.ini pytest.ini .coveragerc
exclude TODO.md
exclude .appveyor.yml
include src/hyperlink/idna-tables-properties.csv.gz
graft docs
prune docs/_build

320
src/hyperlink/hypothesis.py Normal file
View File

@ -0,0 +1,320 @@
# -*- coding: utf-8 -*-
"""
Hypothesis strategies.
"""
from __future__ import absolute_import
try:
import hypothesis
del hypothesis
except ImportError:
from typing import Tuple
__all__ = () # type: Tuple[str, ...]
else:
from csv import reader as csv_reader
from os.path import dirname, join
from string import ascii_letters, digits
from sys import maxunicode
from typing import (
Callable,
Iterable,
List,
Optional,
Sequence,
Text,
TypeVar,
cast,
)
from gzip import open as open_gzip
from . import DecodedURL, EncodedURL
from hypothesis import assume
from hypothesis.strategies import (
composite,
integers,
lists,
sampled_from,
text,
)
from idna import IDNAError, check_label, encode as idna_encode
__all__ = (
"decoded_urls",
"encoded_urls",
"hostname_labels",
"hostnames",
"idna_text",
"paths",
"port_numbers",
)
T = TypeVar("T")
DrawCallable = Callable[[Callable[..., T]], T]
try:
unichr
except NameError: # Py3
unichr = chr # type: Callable[[int], Text]
def idna_characters():
# type: () -> Text
"""
Returns a string containing IDNA characters.
"""
global _idnaCharacters
if not _idnaCharacters:
result = []
# Data source "IDNA Derived Properties":
# https://www.iana.org/assignments/idna-tables-6.3.0/
# idna-tables-6.3.0.xhtml#idna-tables-properties
dataFileName = join(
dirname(__file__), "idna-tables-properties.csv.gz"
)
with open_gzip(dataFileName) as dataFile:
reader = csv_reader(
(line.decode("utf-8") for line in dataFile), delimiter=",",
)
next(reader) # Skip header row
for row in reader:
codes, prop, description = row
if prop != "PVALID":
# CONTEXTO or CONTEXTJ are also allowed, but they come
# with rules, so we're punting on those here.
# See: https://tools.ietf.org/html/rfc5892
continue
startEnd = row[0].split("-", 1)
if len(startEnd) == 1:
# No end of range given; use start
startEnd.append(startEnd[0])
start, end = (int(i, 16) for i in startEnd)
for i in range(start, end + 1):
if i > maxunicode: # Happens using Py2 on Windows
break
result.append(unichr(i))
_idnaCharacters = u"".join(result)
return _idnaCharacters
_idnaCharacters = "" # type: Text
@composite
def idna_text(draw, min_size=1, max_size=None):
# type: (DrawCallable, int, Optional[int]) -> Text
"""
A strategy which generates IDNA-encodable text.
@param min_size: The minimum number of characters in the text.
C{None} is treated as C{0}.
@param max_size: The maximum number of characters in the text.
Use C{None} for an unbounded size.
"""
alphabet = idna_characters()
assert min_size >= 1
if max_size is not None:
assert max_size >= 1
result = cast(
Text,
draw(text(min_size=min_size, max_size=max_size, alphabet=alphabet)),
)
# FIXME: There should be a more efficient way to ensure we produce
# valid IDNA text.
try:
idna_encode(result)
except IDNAError:
assume(False)
return result
@composite
def port_numbers(draw, allow_zero=False):
# type: (DrawCallable, bool) -> int
"""
A strategy which generates port numbers.
@param allow_zero: Whether to allow port C{0} as a possible value.
"""
if allow_zero:
min_value = 0
else:
min_value = 1
return cast(int, draw(integers(min_value=min_value, max_value=65535)))
@composite
def hostname_labels(draw, allow_idn=True):
# type: (DrawCallable, bool) -> Text
"""
A strategy which generates host name labels.
@param allow_idn: Whether to allow non-ASCII characters as allowed by
internationalized domain names (IDNs).
"""
if allow_idn:
label = cast(Text, draw(idna_text(min_size=1, max_size=63)))
try:
label.encode("ascii")
except UnicodeEncodeError:
# If the label doesn't encode to ASCII, then we need to check
# the length of the label after encoding to punycode and adding
# the xn-- prefix.
while len(label.encode("punycode")) > 63 - len("xn--"):
# Rather than bombing out, just trim from the end until it
# is short enough, so hypothesis doesn't have to generate
# new data.
label = label[:-1]
else:
label = cast(
Text,
draw(
text(
min_size=1,
max_size=63,
alphabet=Text(ascii_letters + digits + u"-"),
)
),
)
# Filter invalid labels.
# It would be better to reliably avoid generation of bogus labels in
# the first place, but it's hard...
try:
check_label(label)
except UnicodeError: # pragma: no cover (not always drawn)
assume(False)
return label
@composite
def hostnames(draw, allow_leading_digit=True, allow_idn=True):
# type: (DrawCallable, bool, bool) -> Text
"""
A strategy which generates host names.
@param allow_leading_digit: Whether to allow a leading digit in host
names; they were not allowed prior to RFC 1123.
@param allow_idn: Whether to allow non-ASCII characters as allowed by
internationalized domain names (IDNs).
"""
# Draw first label, filtering out labels with leading digits if needed
labels = [
cast(
Text,
draw(
hostname_labels(allow_idn=allow_idn).filter(
lambda l: (
True if allow_leading_digit else l[0] not in digits
)
)
),
)
]
# Draw remaining labels
labels += cast(
List[Text],
draw(
lists(
hostname_labels(allow_idn=allow_idn),
min_size=1,
max_size=4,
)
),
)
# Trim off labels until the total host name length fits in 252
# characters. This avoids having to filter the data.
while sum(len(label) for label in labels) + len(labels) - 1 > 252:
labels = labels[:-1]
return u".".join(labels)
def path_characters():
# type: () -> str
"""
Returns a string containing valid URL path characters.
"""
global _path_characters
if _path_characters is None:
def chars():
# type: () -> Iterable[Text]
for i in range(maxunicode):
c = unichr(i)
# Exclude reserved characters
if c in "#/?":
continue
# Exclude anything not UTF-8 compatible
try:
c.encode("utf-8")
except UnicodeEncodeError:
continue
yield c
_path_characters = "".join(chars())
return _path_characters
_path_characters = None # type: Optional[str]
@composite
def paths(draw):
# type: (DrawCallable) -> Sequence[Text]
return cast(
List[Text],
draw(
lists(text(min_size=1, alphabet=path_characters()), max_size=10)
),
)
@composite
def encoded_urls(draw):
# type: (DrawCallable) -> EncodedURL
"""
A strategy which generates L{EncodedURL}s.
Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
protocol-friendly URI.
"""
port = cast(Optional[int], draw(port_numbers(allow_zero=True)))
host = cast(Text, draw(hostnames()))
path = cast(Sequence[Text], draw(paths()))
if port == 0:
port = None
return EncodedURL(
scheme=cast(Text, draw(sampled_from((u"http", u"https")))),
host=host,
port=port,
path=path,
)
@composite
def decoded_urls(draw):
# type: (DrawCallable) -> DecodedURL
"""
A strategy which generates L{DecodedURL}s.
Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
protocol-friendly URI.
"""
return DecodedURL(draw(encoded_urls()))

Binary file not shown.

View File

@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
"""
Tests for hyperlink
"""
__all = ()
def _init_hypothesis():
# type: () -> None
from os import environ
if "CI" in environ:
try:
from hypothesis import HealthCheck, settings
except ImportError:
return
settings.register_profile(
"patience",
settings(
suppress_health_check=[
HealthCheck.too_slow,
HealthCheck.filter_too_much,
]
),
)
settings.load_profile("patience")
_init_hypothesis()

View File

@ -0,0 +1,214 @@
# -*- coding: utf-8 -*-
"""
Tests for hyperlink.hypothesis.
"""
try:
import hypothesis
del hypothesis
except ImportError:
pass
else:
from string import digits
from typing import Sequence, Text
try:
from unittest.mock import patch
except ImportError:
from mock import patch # type: ignore[misc]
from hypothesis import given, settings
from hypothesis.strategies import SearchStrategy, data
from idna import IDNAError, check_label, encode as idna_encode
from .common import HyperlinkTestCase
from .. import DecodedURL, EncodedURL
from ..hypothesis import (
DrawCallable,
composite,
decoded_urls,
encoded_urls,
hostname_labels,
hostnames,
idna_text,
paths,
port_numbers,
)
class TestHypothesisStrategies(HyperlinkTestCase):
"""
Tests for hyperlink.hypothesis.
"""
@given(idna_text())
def test_idna_text_valid(self, text):
# type: (Text) -> None
"""
idna_text() generates IDNA-encodable text.
"""
try:
idna_encode(text)
except IDNAError: # pragma: no cover
raise AssertionError("Invalid IDNA text: {!r}".format(text))
@given(data())
def test_idna_text_min_max(self, data):
# type: (SearchStrategy) -> None
"""
idna_text() raises AssertionError if min_size is < 1.
"""
self.assertRaises(AssertionError, data.draw, idna_text(min_size=0))
self.assertRaises(AssertionError, data.draw, idna_text(max_size=0))
@given(port_numbers())
def test_port_numbers_bounds(self, port):
# type: (int) -> None
"""
port_numbers() generates integers between 1 and 65535, inclusive.
"""
self.assertGreaterEqual(port, 1)
self.assertLessEqual(port, 65535)
@given(port_numbers(allow_zero=True))
def test_port_numbers_bounds_allow_zero(self, port):
# type: (int) -> None
"""
port_numbers(allow_zero=True) generates integers between 0 and
65535, inclusive.
"""
self.assertGreaterEqual(port, 0)
self.assertLessEqual(port, 65535)
@given(hostname_labels())
def test_hostname_labels_valid_idn(self, label):
# type: (Text) -> None
"""
hostname_labels() generates IDN host name labels.
"""
try:
check_label(label)
idna_encode(label)
except UnicodeError: # pragma: no cover
raise AssertionError("Invalid IDN label: {!r}".format(label))
@given(data())
@settings(max_examples=10)
def test_hostname_labels_long_idn_punycode(self, data):
# type: (SearchStrategy) -> None
"""
hostname_labels() handles case where idna_text() generates text
that encoded to punycode ends up as longer than allowed.
"""
@composite
def mock_idna_text(draw, min_size, max_size):
# type: (DrawCallable, int, int) -> Text
# We want a string that does not exceed max_size, but when
# encoded to punycode, does exceed max_size.
# So use a unicode character that is larger when encoded,
# "á" being a great example, and use it max_size times, which
# will be max_size * 3 in size when encoded.
return u"\N{LATIN SMALL LETTER A WITH ACUTE}" * max_size
with patch("hyperlink.hypothesis.idna_text", mock_idna_text):
label = data.draw(hostname_labels())
try:
check_label(label)
idna_encode(label)
except UnicodeError: # pragma: no cover
raise AssertionError(
"Invalid IDN label: {!r}".format(label)
)
@given(hostname_labels(allow_idn=False))
def test_hostname_labels_valid_ascii(self, label):
# type: (Text) -> None
"""
hostname_labels() generates a ASCII host name labels.
"""
try:
check_label(label)
label.encode("ascii")
except UnicodeError: # pragma: no cover
raise AssertionError("Invalid ASCII label: {!r}".format(label))
@given(hostnames())
def test_hostnames_idn(self, hostname):
# type: (Text) -> None
"""
hostnames() generates a IDN host names.
"""
try:
for label in hostname.split(u"."):
check_label(label)
idna_encode(hostname)
except UnicodeError: # pragma: no cover
raise AssertionError(
"Invalid IDN host name: {!r}".format(hostname)
)
@given(hostnames(allow_leading_digit=False))
def test_hostnames_idn_nolead(self, hostname):
# type: (Text) -> None
"""
hostnames(allow_leading_digit=False) generates a IDN host names
without leading digits.
"""
self.assertTrue(hostname == hostname.lstrip(digits))
@given(hostnames(allow_idn=False))
def test_hostnames_ascii(self, hostname):
# type: (Text) -> None
"""
hostnames() generates a ASCII host names.
"""
try:
for label in hostname.split(u"."):
check_label(label)
hostname.encode("ascii")
except UnicodeError: # pragma: no cover
raise AssertionError(
"Invalid ASCII host name: {!r}".format(hostname)
)
@given(hostnames(allow_leading_digit=False, allow_idn=False))
def test_hostnames_ascii_nolead(self, hostname):
# type: (Text) -> None
"""
hostnames(allow_leading_digit=False, allow_idn=False) generates
ASCII host names without leading digits.
"""
self.assertTrue(hostname == hostname.lstrip(digits))
@given(paths())
def test_paths(self, path):
# type: (Sequence[Text]) -> None
"""
paths() generates sequences of URL path components.
"""
text = u"/".join(path)
try:
text.encode("utf-8")
except UnicodeError: # pragma: no cover
raise AssertionError("Invalid URL path: {!r}".format(path))
for segment in path:
self.assertNotIn("#/?", segment)
@given(encoded_urls())
def test_encoded_urls(self, url):
# type: (EncodedURL) -> None
"""
encoded_urls() generates EncodedURLs.
"""
self.assertIsInstance(url, EncodedURL)
@given(decoded_urls())
def test_decoded_urls(self, url):
# type: (DecodedURL) -> None
"""
decoded_urls() generates DecodedURLs.
"""
self.assertIsInstance(url, DecodedURL)

22
tox.ini
View File

@ -58,10 +58,18 @@ deps =
{[testenv:coverage_report]deps}
pytest-cov==2.8.1
# For hypothesis. Note Python 3.4 isn't supported by hypothesis.
py27: hypothesis==4.43.3 # rq.filter: <4.44
{py35,py36,py37,py38,py39,py2,py3}: hypothesis==5.8.4
py27: mock==3.0.5
setenv =
{[default]setenv}
COVERAGE_FILE={toxworkdir}/coverage.{envname}
HYPOTHESIS_STORAGE_DIRECTORY={toxworkdir}/hypothesis
passenv = CI
commands =
pytest --cov={env:PY_MODULE} --cov-report=term-missing:skip-covered --doctest-modules {posargs:src/{env:PY_MODULE}}
@ -205,8 +213,20 @@ warn_return_any = True
warn_unreachable = True
warn_unused_ignores = True
# DrawCallable is generic
[mypy-hyperlink.hypothesis]
disallow_any_generics = False
[mypy-hyperlink.test.test_hypothesis]
disallow_any_generics = False
# Don't complain about dependencies known to lack type hints
[mypy-hypothesis]
ignore_missing_imports = True
[mypy-hypothesis.*]
ignore_missing_imports = True
[mypy-idna]
ignore_missing_imports = True
@ -255,7 +275,7 @@ skip_install = True
deps =
{[testenv:coverage_report]deps}
codecov==2.0.22
codecov==2.1.4
passenv =
# See https://github.com/codecov/codecov-python/blob/master/README.md#using-tox