initial commit

This commit is contained in:
Davidson Gomes
2024-10-30 11:19:09 -03:00
commit 8654a31a4d
3744 changed files with 585542 additions and 0 deletions

View File

@@ -0,0 +1,53 @@
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
An implementation of semantics and validations described in RFC 3986.
See http://rfc3986.readthedocs.io/ for detailed documentation.
:copyright: (c) 2014 Rackspace
:license: Apache v2.0, see LICENSE for details
"""
from .api import iri_reference
from .api import IRIReference
from .api import is_valid_uri
from .api import normalize_uri
from .api import uri_reference
from .api import URIReference
from .api import urlparse
from .parseresult import ParseResult
__title__ = "rfc3986"
__author__ = "Ian Stapleton Cordasco"
__author_email__ = "graffatcolmingov@gmail.com"
__license__ = "Apache v2.0"
__copyright__ = "Copyright 2014 Rackspace; 2016 Ian Stapleton Cordasco"
__version__ = "2.0.0"
__all__ = (
"ParseResult",
"URIReference",
"IRIReference",
"is_valid_uri",
"normalize_uri",
"uri_reference",
"iri_reference",
"urlparse",
"__title__",
"__author__",
"__author_email__",
"__license__",
"__copyright__",
"__version__",
)

View File

@@ -0,0 +1,373 @@
"""Module containing the implementation of the URIMixin class."""
import warnings
from . import exceptions as exc
from . import misc
from . import normalizers
from . import validators
class URIMixin:
"""Mixin with all shared methods for URIs and IRIs."""
__hash__ = tuple.__hash__
def authority_info(self):
"""Return a dictionary with the ``userinfo``, ``host``, and ``port``.
If the authority is not valid, it will raise a
:class:`~rfc3986.exceptions.InvalidAuthority` Exception.
:returns:
``{'userinfo': 'username:password', 'host': 'www.example.com',
'port': '80'}``
:rtype: dict
:raises rfc3986.exceptions.InvalidAuthority:
If the authority is not ``None`` and can not be parsed.
"""
if not self.authority:
return {"userinfo": None, "host": None, "port": None}
match = self._match_subauthority()
if match is None:
# In this case, we have an authority that was parsed from the URI
# Reference, but it cannot be further parsed by our
# misc.SUBAUTHORITY_MATCHER. In this case it must not be a valid
# authority.
raise exc.InvalidAuthority(self.authority.encode(self.encoding))
# We had a match, now let's ensure that it is actually a valid host
# address if it is IPv4
matches = match.groupdict()
host = matches.get("host")
if (
host
and misc.IPv4_MATCHER.match(host)
and not validators.valid_ipv4_host_address(host)
):
# If we have a host, it appears to be IPv4 and it does not have
# valid bytes, it is an InvalidAuthority.
raise exc.InvalidAuthority(self.authority.encode(self.encoding))
return matches
def _match_subauthority(self):
return misc.SUBAUTHORITY_MATCHER.match(self.authority)
@property
def host(self):
"""If present, a string representing the host."""
try:
authority = self.authority_info()
except exc.InvalidAuthority:
return None
return authority["host"]
@property
def port(self):
"""If present, the port extracted from the authority."""
try:
authority = self.authority_info()
except exc.InvalidAuthority:
return None
return authority["port"]
@property
def userinfo(self):
"""If present, the userinfo extracted from the authority."""
try:
authority = self.authority_info()
except exc.InvalidAuthority:
return None
return authority["userinfo"]
def is_absolute(self):
"""Determine if this URI Reference is an absolute URI.
See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation.
:returns: ``True`` if it is an absolute URI, ``False`` otherwise.
:rtype: bool
"""
return bool(misc.ABSOLUTE_URI_MATCHER.match(self.unsplit()))
def is_valid(self, **kwargs):
"""Determine if the URI is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param bool require_scheme: Set to ``True`` if you wish to require the
presence of the scheme component.
:param bool require_authority: Set to ``True`` if you wish to require
the presence of the authority component.
:param bool require_path: Set to ``True`` if you wish to require the
presence of the path component.
:param bool require_query: Set to ``True`` if you wish to require the
presence of the query component.
:param bool require_fragment: Set to ``True`` if you wish to require
the presence of the fragment component.
:returns: ``True`` if the URI is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn(
"Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning,
)
validators = [
(self.scheme_is_valid, kwargs.get("require_scheme", False)),
(self.authority_is_valid, kwargs.get("require_authority", False)),
(self.path_is_valid, kwargs.get("require_path", False)),
(self.query_is_valid, kwargs.get("require_query", False)),
(self.fragment_is_valid, kwargs.get("require_fragment", False)),
]
return all(v(r) for v, r in validators)
def authority_is_valid(self, require=False):
"""Determine if the authority component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param bool require:
Set to ``True`` to require the presence of this component.
:returns:
``True`` if the authority is valid. ``False`` otherwise.
:rtype:
bool
"""
warnings.warn(
"Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning,
)
try:
self.authority_info()
except exc.InvalidAuthority:
return False
return validators.authority_is_valid(
self.authority,
host=self.host,
require=require,
)
def scheme_is_valid(self, require=False):
"""Determine if the scheme component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the scheme is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn(
"Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning,
)
return validators.scheme_is_valid(self.scheme, require)
def path_is_valid(self, require=False):
"""Determine if the path component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the path is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn(
"Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning,
)
return validators.path_is_valid(self.path, require)
def query_is_valid(self, require=False):
"""Determine if the query component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the query is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn(
"Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning,
)
return validators.query_is_valid(self.query, require)
def fragment_is_valid(self, require=False):
"""Determine if the fragment component is valid.
.. deprecated:: 1.1.0
Use the Validator object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the fragment is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn(
"Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning,
)
return validators.fragment_is_valid(self.fragment, require)
def normalized_equality(self, other_ref):
"""Compare this URIReference to another URIReference.
:param URIReference other_ref: (required), The reference with which
we're comparing.
:returns: ``True`` if the references are equal, ``False`` otherwise.
:rtype: bool
"""
return tuple(self.normalize()) == tuple(other_ref.normalize())
def resolve_with(self, base_uri, strict=False):
"""Use an absolute URI Reference to resolve this relative reference.
Assuming this is a relative reference that you would like to resolve,
use the provided base URI to resolve it.
See http://tools.ietf.org/html/rfc3986#section-5 for more information.
:param base_uri: Either a string or URIReference. It must be an
absolute URI or it will raise an exception.
:returns: A new URIReference which is the result of resolving this
reference using ``base_uri``.
:rtype: :class:`URIReference`
:raises rfc3986.exceptions.ResolutionError:
If the ``base_uri`` does not at least have a scheme.
"""
if not isinstance(base_uri, URIMixin):
base_uri = type(self).from_string(base_uri)
if not base_uri.is_valid(require_scheme=True):
raise exc.ResolutionError(base_uri)
# This is optional per
# http://tools.ietf.org/html/rfc3986#section-5.2.1
base_uri = base_uri.normalize()
# The reference we're resolving
resolving = self
if not strict and resolving.scheme == base_uri.scheme:
resolving = resolving.copy_with(scheme=None)
# http://tools.ietf.org/html/rfc3986#page-32
if resolving.scheme is not None:
target = resolving.copy_with(
path=normalizers.normalize_path(resolving.path)
)
else:
if resolving.authority is not None:
target = resolving.copy_with(
scheme=base_uri.scheme,
path=normalizers.normalize_path(resolving.path),
)
else:
if resolving.path is None:
if resolving.query is not None:
query = resolving.query
else:
query = base_uri.query
target = resolving.copy_with(
scheme=base_uri.scheme,
authority=base_uri.authority,
path=base_uri.path,
query=query,
)
else:
if resolving.path.startswith("/"):
path = normalizers.normalize_path(resolving.path)
else:
path = normalizers.normalize_path(
misc.merge_paths(base_uri, resolving.path)
)
target = resolving.copy_with(
scheme=base_uri.scheme,
authority=base_uri.authority,
path=path,
query=resolving.query,
)
return target
def unsplit(self):
"""Create a URI string from the components.
:returns: The URI Reference reconstituted as a string.
:rtype: str
"""
# See http://tools.ietf.org/html/rfc3986#section-5.3
result_list = []
if self.scheme:
result_list.extend([self.scheme, ":"])
if self.authority:
result_list.extend(["//", self.authority])
if self.path:
result_list.append(self.path)
if self.query is not None:
result_list.extend(["?", self.query])
if self.fragment is not None:
result_list.extend(["#", self.fragment])
return "".join(result_list)
def copy_with(
self,
scheme=misc.UseExisting,
authority=misc.UseExisting,
path=misc.UseExisting,
query=misc.UseExisting,
fragment=misc.UseExisting,
):
"""Create a copy of this reference with the new components.
:param str scheme:
(optional) The scheme to use for the new reference.
:param str authority:
(optional) The authority to use for the new reference.
:param str path:
(optional) The path to use for the new reference.
:param str query:
(optional) The query to use for the new reference.
:param str fragment:
(optional) The fragment to use for the new reference.
:returns:
New URIReference with provided components.
:rtype:
URIReference
"""
attributes = {
"scheme": scheme,
"authority": authority,
"path": path,
"query": query,
"fragment": fragment,
}
for key, value in list(attributes.items()):
if value is misc.UseExisting:
del attributes[key]
uri = self._replace(**attributes)
uri.encoding = self.encoding
return uri

View File

@@ -0,0 +1,275 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module for the regular expressions crafted from ABNF."""
import sys
# https://tools.ietf.org/html/rfc3986#page-13
GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@"
GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS)
# https://tools.ietf.org/html/rfc3986#page-13
SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;="
SUB_DELIMITERS_SET = set(SUB_DELIMITERS)
# Escape the '*' for use in regular expressions
SUB_DELIMITERS_RE = r"!$&'()\*+,;="
RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET)
ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
DIGIT = "0123456789"
# https://tools.ietf.org/html/rfc3986#section-2.3
UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + r"._!-~"
UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS)
NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET)
# We need to escape the '-' in this case:
UNRESERVED_RE = r"A-Za-z0-9._~\-"
# Percent encoded character values
PERCENT_ENCODED = PCT_ENCODED = "%[A-Fa-f0-9]{2}"
PCHAR = "([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":@]|%s)" % PCT_ENCODED
# NOTE(sigmavirus24): We're going to use more strict regular expressions
# than appear in Appendix B for scheme. This will prevent over-eager
# consuming of items that aren't schemes.
SCHEME_RE = "[a-zA-Z][a-zA-Z0-9+.-]*"
_AUTHORITY_RE = "[^\\\\/?#]*"
_PATH_RE = "[^?#]*"
_QUERY_RE = "[^#]*"
_FRAGMENT_RE = ".*"
# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B
COMPONENT_PATTERN_DICT = {
"scheme": SCHEME_RE,
"authority": _AUTHORITY_RE,
"path": _PATH_RE,
"query": _QUERY_RE,
"fragment": _FRAGMENT_RE,
}
# See http://tools.ietf.org/html/rfc3986#appendix-B
# In this case, we name each of the important matches so we can use
# SRE_Match#groupdict to parse the values out if we so choose. This is also
# modified to ignore other matches that are not important to the parsing of
# the reference so we can also simply use SRE_Match#groups.
URL_PARSING_RE = (
r"(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?"
r"(?P<path>{path})(?:\?(?P<query>{query}))?"
r"(?:#(?P<fragment>{fragment}))?"
).format(**COMPONENT_PATTERN_DICT)
# #########################
# Authority Matcher Section
# #########################
# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2
# The pattern for a regular name, e.g., www.google.com, api.github.com
REGULAR_NAME_RE = REG_NAME = "((?:{}|[{}])*)".format(
"%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + UNRESERVED_RE
)
# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1,
IPv4_RE = r"([0-9]{1,3}\.){3}[0-9]{1,3}"
# Hexadecimal characters used in each piece of an IPv6 address
HEXDIG_RE = "[0-9A-Fa-f]{1,4}"
# Least-significant 32 bits of an IPv6 address
LS32_RE = "({hex}:{hex}|{ipv4})".format(hex=HEXDIG_RE, ipv4=IPv4_RE)
# Substitutions into the following patterns for IPv6 patterns defined
# http://tools.ietf.org/html/rfc3986#page-20
_subs = {"hex": HEXDIG_RE, "ls32": LS32_RE}
# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details
# about ABNF (Augmented Backus-Naur Form) use in the comments
variations = [
# 6( h16 ":" ) ls32
"(%(hex)s:){6}%(ls32)s" % _subs,
# "::" 5( h16 ":" ) ls32
"::(%(hex)s:){5}%(ls32)s" % _subs,
# [ h16 ] "::" 4( h16 ":" ) ls32
"(%(hex)s)?::(%(hex)s:){4}%(ls32)s" % _subs,
# [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
"((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s" % _subs,
# [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
"((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s" % _subs,
# [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
"((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s" % _subs,
# [ *4( h16 ":" ) h16 ] "::" ls32
"((%(hex)s:){0,4}%(hex)s)?::%(ls32)s" % _subs,
# [ *5( h16 ":" ) h16 ] "::" h16
"((%(hex)s:){0,5}%(hex)s)?::%(hex)s" % _subs,
# [ *6( h16 ":" ) h16 ] "::"
"((%(hex)s:){0,6}%(hex)s)?::" % _subs,
]
IPv6_RE = "(({})|({})|({})|({})|({})|({})|({})|({})|({}))".format(*variations)
IPv_FUTURE_RE = r"v[0-9A-Fa-f]+\.[%s]+" % (
UNRESERVED_RE + SUB_DELIMITERS_RE + ":"
)
# RFC 6874 Zone ID ABNF
ZONE_ID = "(?:[" + UNRESERVED_RE + "]|" + PCT_ENCODED + ")+"
IPv6_ADDRZ_RFC4007_RE = IPv6_RE + "(?:(?:%25|%)" + ZONE_ID + ")?"
IPv6_ADDRZ_RE = IPv6_RE + "(?:%25" + ZONE_ID + ")?"
IP_LITERAL_RE = r"\[({}|{})\]".format(
IPv6_ADDRZ_RFC4007_RE,
IPv_FUTURE_RE,
)
# Pattern for matching the host piece of the authority
HOST_RE = HOST_PATTERN = "({}|{}|{})".format(
REG_NAME,
IPv4_RE,
IP_LITERAL_RE,
)
USERINFO_RE = (
"^([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":]|%s)+" % (PCT_ENCODED)
)
PORT_RE = "[0-9]{1,5}"
# ####################
# Path Matcher Section
# ####################
# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information
# about the path patterns defined below.
segments = {
"segment": PCHAR + "*",
# Non-zero length segment
"segment-nz": PCHAR + "+",
# Non-zero length segment without ":"
"segment-nz-nc": PCHAR.replace(":", "") + "+",
}
# Path types taken from Section 3.3 (linked above)
PATH_EMPTY = "^$"
PATH_ROOTLESS = "%(segment-nz)s(/%(segment)s)*" % segments
PATH_NOSCHEME = "%(segment-nz-nc)s(/%(segment)s)*" % segments
PATH_ABSOLUTE = "/(%s)?" % PATH_ROOTLESS
PATH_ABEMPTY = "(/%(segment)s)*" % segments
PATH_RE = "^({}|{}|{}|{}|{})$".format(
PATH_ABEMPTY,
PATH_ABSOLUTE,
PATH_NOSCHEME,
PATH_ROOTLESS,
PATH_EMPTY,
)
FRAGMENT_RE = QUERY_RE = (
"^([/?:@" + UNRESERVED_RE + SUB_DELIMITERS_RE + "]|%s)*$" % PCT_ENCODED
)
# ##########################
# Relative reference matcher
# ##########################
# See http://tools.ietf.org/html/rfc3986#section-4.2 for details
RELATIVE_PART_RE = "(//{}{}|{}|{}|{})".format(
COMPONENT_PATTERN_DICT["authority"],
PATH_ABEMPTY,
PATH_ABSOLUTE,
PATH_NOSCHEME,
PATH_EMPTY,
)
# See http://tools.ietf.org/html/rfc3986#section-3 for definition
HIER_PART_RE = "(//{}{}|{}|{}|{})".format(
COMPONENT_PATTERN_DICT["authority"],
PATH_ABEMPTY,
PATH_ABSOLUTE,
PATH_ROOTLESS,
PATH_EMPTY,
)
# ###############
# IRIs / RFC 3987
# ###############
# Only wide-unicode gets the high-ranges of UCSCHAR
if sys.maxunicode > 0xFFFF: # pragma: no cover
IPRIVATE = "\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD"
UCSCHAR_RE = (
"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"
"\U00010000-\U0001FFFD\U00020000-\U0002FFFD"
"\U00030000-\U0003FFFD\U00040000-\U0004FFFD"
"\U00050000-\U0005FFFD\U00060000-\U0006FFFD"
"\U00070000-\U0007FFFD\U00080000-\U0008FFFD"
"\U00090000-\U0009FFFD\U000A0000-\U000AFFFD"
"\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD"
"\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD"
)
else: # pragma: no cover
IPRIVATE = "\uE000-\uF8FF"
UCSCHAR_RE = "\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"
IUNRESERVED_RE = "A-Za-z0-9\\._~\\-" + UCSCHAR_RE
IPCHAR = "([" + IUNRESERVED_RE + SUB_DELIMITERS_RE + ":@]|%s)" % PCT_ENCODED
isegments = {
"isegment": IPCHAR + "*",
# Non-zero length segment
"isegment-nz": IPCHAR + "+",
# Non-zero length segment without ":"
"isegment-nz-nc": IPCHAR.replace(":", "") + "+",
}
IPATH_ROOTLESS = "%(isegment-nz)s(/%(isegment)s)*" % isegments
IPATH_NOSCHEME = "%(isegment-nz-nc)s(/%(isegment)s)*" % isegments
IPATH_ABSOLUTE = "/(?:%s)?" % IPATH_ROOTLESS
IPATH_ABEMPTY = "(?:/%(isegment)s)*" % isegments
IPATH_RE = "^(?:{}|{}|{}|{}|{})$".format(
IPATH_ABEMPTY,
IPATH_ABSOLUTE,
IPATH_NOSCHEME,
IPATH_ROOTLESS,
PATH_EMPTY,
)
IREGULAR_NAME_RE = IREG_NAME = "(?:{}|[{}])*".format(
"%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + IUNRESERVED_RE
)
IHOST_RE = IHOST_PATTERN = "({}|{}|{})".format(
IREG_NAME,
IPv4_RE,
IP_LITERAL_RE,
)
IUSERINFO_RE = (
"^(?:[" + IUNRESERVED_RE + SUB_DELIMITERS_RE + ":]|%s)+" % (PCT_ENCODED)
)
IFRAGMENT_RE = (
"^(?:[/?:@" + IUNRESERVED_RE + SUB_DELIMITERS_RE + "]|%s)*$" % PCT_ENCODED
)
IQUERY_RE = (
"^(?:[/?:@"
+ IUNRESERVED_RE
+ SUB_DELIMITERS_RE
+ IPRIVATE
+ "]|%s)*$" % PCT_ENCODED
)
IRELATIVE_PART_RE = "(//{}{}|{}|{}|{})".format(
COMPONENT_PATTERN_DICT["authority"],
IPATH_ABEMPTY,
IPATH_ABSOLUTE,
IPATH_NOSCHEME,
PATH_EMPTY,
)
IHIER_PART_RE = "(//{}{}|{}|{}|{})".format(
COMPONENT_PATTERN_DICT["authority"],
IPATH_ABEMPTY,
IPATH_ABSOLUTE,
IPATH_ROOTLESS,
PATH_EMPTY,
)

View File

@@ -0,0 +1,104 @@
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Module containing the simple and functional API for rfc3986.
This module defines functions and provides access to the public attributes
and classes of rfc3986.
"""
from .iri import IRIReference
from .parseresult import ParseResult
from .uri import URIReference
def uri_reference(uri, encoding="utf-8"):
"""Parse a URI string into a URIReference.
This is a convenience function. You could achieve the same end by using
``URIReference.from_string(uri)``.
:param str uri: The URI which needs to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: A parsed URI
:rtype: :class:`URIReference`
"""
return URIReference.from_string(uri, encoding)
def iri_reference(iri, encoding="utf-8"):
"""Parse a IRI string into an IRIReference.
This is a convenience function. You could achieve the same end by using
``IRIReference.from_string(iri)``.
:param str iri: The IRI which needs to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: A parsed IRI
:rtype: :class:`IRIReference`
"""
return IRIReference.from_string(iri, encoding)
def is_valid_uri(uri, encoding="utf-8", **kwargs):
"""Determine if the URI given is valid.
This is a convenience function. You could use either
``uri_reference(uri).is_valid()`` or
``URIReference.from_string(uri).is_valid()`` to achieve the same result.
:param str uri: The URI to be validated.
:param str encoding: The encoding of the string provided
:param bool require_scheme: Set to ``True`` if you wish to require the
presence of the scheme component.
:param bool require_authority: Set to ``True`` if you wish to require the
presence of the authority component.
:param bool require_path: Set to ``True`` if you wish to require the
presence of the path component.
:param bool require_query: Set to ``True`` if you wish to require the
presence of the query component.
:param bool require_fragment: Set to ``True`` if you wish to require the
presence of the fragment component.
:returns: ``True`` if the URI is valid, ``False`` otherwise.
:rtype: bool
"""
return URIReference.from_string(uri, encoding).is_valid(**kwargs)
def normalize_uri(uri, encoding="utf-8"):
"""Normalize the given URI.
This is a convenience function. You could use either
``uri_reference(uri).normalize().unsplit()`` or
``URIReference.from_string(uri).normalize().unsplit()`` instead.
:param str uri: The URI to be normalized.
:param str encoding: The encoding of the string provided
:returns: The normalized URI.
:rtype: str
"""
normalized_reference = URIReference.from_string(uri, encoding).normalize()
return normalized_reference.unsplit()
def urlparse(uri, encoding="utf-8"):
"""Parse a given URI and return a ParseResult.
This is a partial replacement of the standard library's urlparse function.
:param str uri: The URI to be parsed.
:param str encoding: The encoding of the string provided.
:returns: A parsed URI
:rtype: :class:`~rfc3986.parseresult.ParseResult`
"""
return ParseResult.from_string(uri, encoding, strict=False)

View File

@@ -0,0 +1,388 @@
# Copyright (c) 2017 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module containing the logic for the URIBuilder object."""
from . import compat
from . import normalizers
from . import uri
from . import uri_reference
class URIBuilder:
"""Object to aid in building up a URI Reference from parts.
.. note::
This object should be instantiated by the user, but it's recommended
that it is not provided with arguments. Instead, use the available
method to populate the fields.
"""
def __init__(
self,
scheme=None,
userinfo=None,
host=None,
port=None,
path=None,
query=None,
fragment=None,
):
"""Initialize our URI builder.
:param str scheme:
(optional)
:param str userinfo:
(optional)
:param str host:
(optional)
:param int port:
(optional)
:param str path:
(optional)
:param str query:
(optional)
:param str fragment:
(optional)
"""
self.scheme = scheme
self.userinfo = userinfo
self.host = host
self.port = port
self.path = path
self.query = query
self.fragment = fragment
def __repr__(self):
"""Provide a convenient view of our builder object."""
formatstr = (
"URIBuilder(scheme={b.scheme}, userinfo={b.userinfo}, "
"host={b.host}, port={b.port}, path={b.path}, "
"query={b.query}, fragment={b.fragment})"
)
return formatstr.format(b=self)
@classmethod
def from_uri(cls, reference):
"""Initialize the URI builder from another URI.
Takes the given URI reference and creates a new URI builder instance
populated with the values from the reference. If given a string it will
try to convert it to a reference before constructing the builder.
"""
if not isinstance(reference, uri.URIReference):
reference = uri_reference(reference)
return cls(
scheme=reference.scheme,
userinfo=reference.userinfo,
host=reference.host,
port=reference.port,
path=reference.path,
query=reference.query,
fragment=reference.fragment,
)
def add_scheme(self, scheme):
"""Add a scheme to our builder object.
After normalizing, this will generate a new URIBuilder instance with
the specified scheme and all other attributes the same.
.. code-block:: python
>>> URIBuilder().add_scheme('HTTPS')
URIBuilder(scheme='https', userinfo=None, host=None, port=None,
path=None, query=None, fragment=None)
"""
scheme = normalizers.normalize_scheme(scheme)
return URIBuilder(
scheme=scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_credentials(self, username, password):
"""Add credentials as the userinfo portion of the URI.
.. code-block:: python
>>> URIBuilder().add_credentials('root', 's3crete')
URIBuilder(scheme=None, userinfo='root:s3crete', host=None,
port=None, path=None, query=None, fragment=None)
>>> URIBuilder().add_credentials('root', None)
URIBuilder(scheme=None, userinfo='root', host=None,
port=None, path=None, query=None, fragment=None)
"""
if username is None:
raise ValueError("Username cannot be None")
userinfo = normalizers.normalize_username(username)
if password is not None:
userinfo = "{}:{}".format(
userinfo,
normalizers.normalize_password(password),
)
return URIBuilder(
scheme=self.scheme,
userinfo=userinfo,
host=self.host,
port=self.port,
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_host(self, host):
"""Add hostname to the URI.
.. code-block:: python
>>> URIBuilder().add_host('google.com')
URIBuilder(scheme=None, userinfo=None, host='google.com',
port=None, path=None, query=None, fragment=None)
"""
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=normalizers.normalize_host(host),
port=self.port,
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_port(self, port):
"""Add port to the URI.
.. code-block:: python
>>> URIBuilder().add_port(80)
URIBuilder(scheme=None, userinfo=None, host=None, port='80',
path=None, query=None, fragment=None)
>>> URIBuilder().add_port(443)
URIBuilder(scheme=None, userinfo=None, host=None, port='443',
path=None, query=None, fragment=None)
"""
port_int = int(port)
if port_int < 0:
raise ValueError(
"ports are not allowed to be negative. You provided {}".format(
port_int,
)
)
if port_int > 65535:
raise ValueError(
"ports are not allowed to be larger than 65535. "
"You provided {}".format(
port_int,
)
)
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=f"{port_int}",
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_path(self, path):
"""Add a path to the URI.
.. code-block:: python
>>> URIBuilder().add_path('sigmavirus24/rfc3985')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path='/sigmavirus24/rfc3986', query=None, fragment=None)
>>> URIBuilder().add_path('/checkout.php')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path='/checkout.php', query=None, fragment=None)
"""
if not path.startswith("/"):
path = f"/{path}"
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=normalizers.normalize_path(path),
query=self.query,
fragment=self.fragment,
)
def extend_path(self, path):
"""Extend the existing path value with the provided value.
.. versionadded:: 1.5.0
.. code-block:: python
>>> URIBuilder(path="/users").extend_path("/sigmavirus24")
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path='/users/sigmavirus24', query=None, fragment=None)
>>> URIBuilder(path="/users/").extend_path("/sigmavirus24")
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path='/users/sigmavirus24', query=None, fragment=None)
>>> URIBuilder(path="/users/").extend_path("sigmavirus24")
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path='/users/sigmavirus24', query=None, fragment=None)
>>> URIBuilder(path="/users").extend_path("sigmavirus24")
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path='/users/sigmavirus24', query=None, fragment=None)
"""
existing_path = self.path or ""
path = "{}/{}".format(existing_path.rstrip("/"), path.lstrip("/"))
return self.add_path(path)
def add_query_from(self, query_items):
"""Generate and add a query a dictionary or list of tuples.
.. code-block:: python
>>> URIBuilder().add_query_from({'a': 'b c'})
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b+c', fragment=None)
>>> URIBuilder().add_query_from([('a', 'b c')])
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b+c', fragment=None)
"""
query = normalizers.normalize_query(compat.urlencode(query_items))
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=query,
fragment=self.fragment,
)
def extend_query_with(self, query_items):
"""Extend the existing query string with the new query items.
.. versionadded:: 1.5.0
.. code-block:: python
>>> URIBuilder(query='a=b+c').extend_query_with({'a': 'b c'})
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b+c&a=b+c', fragment=None)
>>> URIBuilder(query='a=b+c').extend_query_with([('a', 'b c')])
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b+c&a=b+c', fragment=None)
"""
original_query_items = compat.parse_qsl(self.query or "")
if not isinstance(query_items, list):
query_items = list(query_items.items())
return self.add_query_from(original_query_items + query_items)
def add_query(self, query):
"""Add a pre-formated query string to the URI.
.. code-block:: python
>>> URIBuilder().add_query('a=b&c=d')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b&c=d', fragment=None)
"""
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=normalizers.normalize_query(query),
fragment=self.fragment,
)
def add_fragment(self, fragment):
"""Add a fragment to the URI.
.. code-block:: python
>>> URIBuilder().add_fragment('section-2.6.1')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment='section-2.6.1')
"""
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=self.query,
fragment=normalizers.normalize_fragment(fragment),
)
def finalize(self):
"""Create a URIReference from our builder.
.. code-block:: python
>>> URIBuilder().add_scheme('https').add_host('github.com'
... ).add_path('sigmavirus24/rfc3986').finalize().unsplit()
'https://github.com/sigmavirus24/rfc3986'
>>> URIBuilder().add_scheme('https').add_host('github.com'
... ).add_path('sigmavirus24/rfc3986').add_credentials(
... 'sigmavirus24', 'not-re@l').finalize().unsplit()
'https://sigmavirus24:not-re%40l@github.com/sigmavirus24/rfc3986'
"""
return uri.URIReference(
self.scheme,
normalizers.normalize_authority(
(self.userinfo, self.host, self.port)
),
self.path,
self.query,
self.fragment,
)
def geturl(self):
"""Generate the URL from this builder.
.. versionadded:: 1.5.0
This is an alternative to calling :meth:`finalize` and keeping the
:class:`rfc3986.uri.URIReference` around.
"""
return self.finalize().unsplit()

View File

@@ -0,0 +1,59 @@
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compatibility module for Python 2 and 3 support."""
import sys
try:
from urllib.parse import quote as urlquote
except ImportError: # Python 2.x
from urllib import quote as urlquote
try:
from urllib.parse import parse_qsl
except ImportError: # Python 2.x
from urlparse import parse_qsl
try:
from urllib.parse import urlencode
except ImportError: # Python 2.x
from urllib import urlencode
__all__ = (
"to_bytes",
"to_str",
"urlquote",
"urlencode",
"parse_qsl",
)
PY3 = (3, 0) <= sys.version_info < (4, 0)
PY2 = (2, 6) <= sys.version_info < (2, 8)
if PY3:
unicode = str # Python 3.x
def to_str(b, encoding="utf-8"):
"""Ensure that b is text in the specified encoding."""
if hasattr(b, "decode") and not isinstance(b, unicode):
b = b.decode(encoding)
return b
def to_bytes(s, encoding="utf-8"):
"""Ensure that s is converted to bytes from the encoding."""
if hasattr(s, "encode") and not isinstance(s, bytes):
s = s.encode(encoding)
return s

View File

@@ -0,0 +1,120 @@
"""Exceptions module for rfc3986."""
from . import compat
class RFC3986Exception(Exception):
"""Base class for all rfc3986 exception classes."""
pass
class InvalidAuthority(RFC3986Exception):
"""Exception when the authority string is invalid."""
def __init__(self, authority):
"""Initialize the exception with the invalid authority."""
super().__init__(
f"The authority ({compat.to_str(authority)}) is not valid."
)
class InvalidPort(RFC3986Exception):
"""Exception when the port is invalid."""
def __init__(self, port):
"""Initialize the exception with the invalid port."""
super().__init__(f'The port ("{port}") is not valid.')
class ResolutionError(RFC3986Exception):
"""Exception to indicate a failure to resolve a URI."""
def __init__(self, uri):
"""Initialize the error with the failed URI."""
super().__init__(
"{} does not meet the requirements for resolution.".format(
uri.unsplit()
)
)
class ValidationError(RFC3986Exception):
"""Exception raised during Validation of a URI."""
pass
class MissingComponentError(ValidationError):
"""Exception raised when a required component is missing."""
def __init__(self, uri, *component_names):
"""Initialize the error with the missing component name."""
verb = "was"
if len(component_names) > 1:
verb = "were"
self.uri = uri
self.components = sorted(component_names)
components = ", ".join(self.components)
super().__init__(
f"{components} {verb} required but missing",
uri,
self.components,
)
class UnpermittedComponentError(ValidationError):
"""Exception raised when a component has an unpermitted value."""
def __init__(self, component_name, component_value, allowed_values):
"""Initialize the error with the unpermitted component."""
super().__init__(
"{} was required to be one of {!r} but was {!r}".format(
component_name,
list(sorted(allowed_values)),
component_value,
),
component_name,
component_value,
allowed_values,
)
self.component_name = component_name
self.component_value = component_value
self.allowed_values = allowed_values
class PasswordForbidden(ValidationError):
"""Exception raised when a URL has a password in the userinfo section."""
def __init__(self, uri):
"""Initialize the error with the URI that failed validation."""
unsplit = getattr(uri, "unsplit", lambda: uri)
super().__init__(
'"{}" contained a password when validation forbade it'.format(
unsplit()
)
)
self.uri = uri
class InvalidComponentsError(ValidationError):
"""Exception raised when one or more components are invalid."""
def __init__(self, uri, *component_names):
"""Initialize the error with the invalid component name(s)."""
verb = "was"
if len(component_names) > 1:
verb = "were"
self.uri = uri
self.components = sorted(component_names)
components = ", ".join(self.components)
super().__init__(
f"{components} {verb} found to be invalid",
uri,
self.components,
)
class MissingDependencyError(RFC3986Exception):
"""Exception raised when an IRI is encoded without the 'idna' module."""

View File

@@ -0,0 +1,161 @@
"""Module containing the implementation of the IRIReference class."""
# Copyright (c) 2014 Rackspace
# Copyright (c) 2015 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
from . import compat
from . import exceptions
from . import misc
from . import normalizers
from . import uri
try:
import idna
except ImportError: # pragma: no cover
idna = None
class IRIReference(
namedtuple("IRIReference", misc.URI_COMPONENTS), uri.URIMixin
):
"""Immutable object representing a parsed IRI Reference.
Can be encoded into an URIReference object via the procedure
specified in RFC 3987 Section 3.1
.. note::
The IRI submodule is a new interface and may possibly change in
the future. Check for changes to the interface when upgrading.
"""
slots = ()
def __new__(
cls, scheme, authority, path, query, fragment, encoding="utf-8"
):
"""Create a new IRIReference."""
ref = super().__new__(
cls,
scheme or None,
authority or None,
path or None,
query,
fragment,
)
ref.encoding = encoding
return ref
def __eq__(self, other):
"""Compare this reference to another."""
other_ref = other
if isinstance(other, tuple):
other_ref = self.__class__(*other)
elif not isinstance(other, IRIReference):
try:
other_ref = self.__class__.from_string(other)
except TypeError:
raise TypeError(
"Unable to compare {}() to {}()".format(
type(self).__name__, type(other).__name__
)
)
# See http://tools.ietf.org/html/rfc3986#section-6.2
return tuple(self) == tuple(other_ref)
def _match_subauthority(self):
return misc.ISUBAUTHORITY_MATCHER.match(self.authority)
@classmethod
def from_string(cls, iri_string, encoding="utf-8"):
"""Parse a IRI reference from the given unicode IRI string.
:param str iri_string: Unicode IRI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: :class:`IRIReference` or subclass thereof
"""
iri_string = compat.to_str(iri_string, encoding)
split_iri = misc.IRI_MATCHER.match(iri_string).groupdict()
return cls(
split_iri["scheme"],
split_iri["authority"],
normalizers.encode_component(split_iri["path"], encoding),
normalizers.encode_component(split_iri["query"], encoding),
normalizers.encode_component(split_iri["fragment"], encoding),
encoding,
)
def encode(self, idna_encoder=None): # noqa: C901
"""Encode an IRIReference into a URIReference instance.
If the ``idna`` module is installed or the ``rfc3986[idna]``
extra is used then unicode characters in the IRI host
component will be encoded with IDNA2008.
:param idna_encoder:
Function that encodes each part of the host component
If not given will raise an exception if the IRI
contains a host component.
:rtype: uri.URIReference
:returns: A URI reference
"""
authority = self.authority
if authority:
if idna_encoder is None:
if idna is None: # pragma: no cover
raise exceptions.MissingDependencyError(
"Could not import the 'idna' module "
"and the IRI hostname requires encoding"
)
def idna_encoder(name):
if any(ord(c) > 128 for c in name):
try:
return idna.encode(
name.lower(), strict=True, std3_rules=True
)
except idna.IDNAError:
raise exceptions.InvalidAuthority(self.authority)
return name
authority = ""
if self.host:
authority = ".".join(
[
compat.to_str(idna_encoder(part))
for part in self.host.split(".")
]
)
if self.userinfo is not None:
authority = (
normalizers.encode_component(self.userinfo, self.encoding)
+ "@"
+ authority
)
if self.port is not None:
authority += ":" + str(self.port)
return uri.URIReference(
self.scheme,
authority,
path=self.path,
query=self.query,
fragment=self.fragment,
encoding=self.encoding,
)

View File

@@ -0,0 +1,131 @@
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Module containing compiled regular expressions and constants.
This module contains important constants, patterns, and compiled regular
expressions for parsing and validating URIs and their components.
"""
import re
from . import abnf_regexp
# These are enumerated for the named tuple used as a superclass of
# URIReference
URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"]
important_characters = {
"generic_delimiters": abnf_regexp.GENERIC_DELIMITERS,
"sub_delimiters": abnf_regexp.SUB_DELIMITERS,
# We need to escape the '*' in this case
"re_sub_delimiters": abnf_regexp.SUB_DELIMITERS_RE,
"unreserved_chars": abnf_regexp.UNRESERVED_CHARS,
# We need to escape the '-' in this case:
"re_unreserved": abnf_regexp.UNRESERVED_RE,
}
# For details about delimiters and reserved characters, see:
# http://tools.ietf.org/html/rfc3986#section-2.2
GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET
SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET
RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET
# For details about unreserved characters, see:
# http://tools.ietf.org/html/rfc3986#section-2.3
UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET
NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET
URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE)
SUBAUTHORITY_MATCHER = re.compile(
(
"^(?:(?P<userinfo>{})@)?" # userinfo
"(?P<host>{})" # host
":?(?P<port>{})?$" # port
).format(
abnf_regexp.USERINFO_RE, abnf_regexp.HOST_PATTERN, abnf_regexp.PORT_RE
)
)
HOST_MATCHER = re.compile("^" + abnf_regexp.HOST_RE + "$")
IPv4_MATCHER = re.compile("^" + abnf_regexp.IPv4_RE + "$")
IPv6_MATCHER = re.compile(r"^\[" + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r"\]$")
# Used by host validator
IPv6_NO_RFC4007_MATCHER = re.compile(r"^\[%s\]$" % (abnf_regexp.IPv6_ADDRZ_RE))
# Matcher used to validate path components
PATH_MATCHER = re.compile(abnf_regexp.PATH_RE)
# ##################################
# Query and Fragment Matcher Section
# ##################################
QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE)
FRAGMENT_MATCHER = QUERY_MATCHER
# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
SCHEME_MATCHER = re.compile(f"^{abnf_regexp.SCHEME_RE}$")
RELATIVE_REF_MATCHER = re.compile(
r"^%s(\?%s)?(#%s)?$"
% (
abnf_regexp.RELATIVE_PART_RE,
abnf_regexp.QUERY_RE,
abnf_regexp.FRAGMENT_RE,
)
)
# See http://tools.ietf.org/html/rfc3986#section-4.3
ABSOLUTE_URI_MATCHER = re.compile(
r"^%s:%s(\?%s)?$"
% (
abnf_regexp.COMPONENT_PATTERN_DICT["scheme"],
abnf_regexp.HIER_PART_RE,
abnf_regexp.QUERY_RE[1:-1],
)
)
# ###############
# IRIs / RFC 3987
# ###############
IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE)
ISUBAUTHORITY_MATCHER = re.compile(
(
"^(?:(?P<userinfo>{})@)?" # iuserinfo
"(?P<host>{})" # ihost
":?(?P<port>{})?$" # port
).format(
abnf_regexp.IUSERINFO_RE, abnf_regexp.IHOST_RE, abnf_regexp.PORT_RE
),
re.UNICODE,
)
# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
def merge_paths(base_uri, relative_path):
"""Merge a base URI's path with a relative URI's path."""
if base_uri.path is None and base_uri.authority is not None:
return "/" + relative_path
else:
path = base_uri.path or ""
index = path.rfind("/")
return path[:index] + "/" + relative_path
UseExisting = object()

View File

@@ -0,0 +1,171 @@
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module with functions to normalize components."""
import re
from . import compat
from . import misc
def normalize_scheme(scheme):
"""Normalize the scheme component."""
return scheme.lower()
def normalize_authority(authority):
"""Normalize an authority tuple to a string."""
userinfo, host, port = authority
result = ""
if userinfo:
result += normalize_percent_characters(userinfo) + "@"
if host:
result += normalize_host(host)
if port:
result += ":" + port
return result
def normalize_username(username):
"""Normalize a username to make it safe to include in userinfo."""
return compat.urlquote(username)
def normalize_password(password):
"""Normalize a password to make safe for userinfo."""
return compat.urlquote(password)
def normalize_host(host):
"""Normalize a host string."""
if misc.IPv6_MATCHER.match(host):
percent = host.find("%")
if percent != -1:
percent_25 = host.find("%25")
# Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25'
# from RFC 6874. If the host is '[<IPv6 addr>%25]' then we
# assume RFC 4007 and normalize to '[<IPV6 addr>%2525]'
if (
percent_25 == -1
or percent < percent_25
or (percent == percent_25 and percent_25 == len(host) - 4)
):
host = host.replace("%", "%25", 1)
# Don't normalize the casing of the Zone ID
return host[:percent].lower() + host[percent:]
return host.lower()
def normalize_path(path):
"""Normalize the path string."""
if not path:
return path
path = normalize_percent_characters(path)
return remove_dot_segments(path)
def normalize_query(query):
"""Normalize the query string."""
if not query:
return query
return normalize_percent_characters(query)
def normalize_fragment(fragment):
"""Normalize the fragment string."""
if not fragment:
return fragment
return normalize_percent_characters(fragment)
PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}")
def normalize_percent_characters(s):
"""All percent characters should be upper-cased.
For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``.
"""
matches = set(PERCENT_MATCHER.findall(s))
for m in matches:
if not m.isupper():
s = s.replace(m, m.upper())
return s
def remove_dot_segments(s):
"""Remove dot segments from the string.
See also Section 5.2.4 of :rfc:`3986`.
"""
# See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
segments = s.split("/") # Turn the path into a list of segments
output = [] # Initialize the variable to use to store output
for segment in segments:
# '.' is the current directory, so ignore it, it is superfluous
if segment == ".":
continue
# Anything other than '..', should be appended to the output
elif segment != "..":
output.append(segment)
# In this case segment == '..', if we can, we should pop the last
# element
elif output:
output.pop()
# If the path starts with '/' and the output is empty or the first string
# is non-empty
if s.startswith("/") and (not output or output[0]):
output.insert(0, "")
# If the path starts with '/.' or '/..' ensure we add one more empty
# string to add a trailing '/'
if s.endswith(("/.", "/..")):
output.append("")
return "/".join(output)
def encode_component(uri_component, encoding):
"""Encode the specific component in the provided encoding."""
if uri_component is None:
return uri_component
# Try to see if the component we're encoding is already percent-encoded
# so we can skip all '%' characters but still encode all others.
percent_encodings = len(
PERCENT_MATCHER.findall(compat.to_str(uri_component, encoding))
)
uri_bytes = compat.to_bytes(uri_component, encoding)
is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
encoded_uri = bytearray()
for i in range(0, len(uri_bytes)):
# Will return a single character bytestring on both Python 2 & 3
byte = uri_bytes[i : i + 1]
byte_ord = ord(byte)
if (is_percent_encoded and byte == b"%") or (
byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED
):
encoded_uri.extend(byte)
continue
encoded_uri.extend(f"%{byte_ord:02x}".encode().upper())
return encoded_uri.decode(encoding)

View File

@@ -0,0 +1,474 @@
# Copyright (c) 2015 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module containing the urlparse compatibility logic."""
from collections import namedtuple
from . import compat
from . import exceptions
from . import misc
from . import normalizers
from . import uri
__all__ = ("ParseResult", "ParseResultBytes")
PARSED_COMPONENTS = (
"scheme",
"userinfo",
"host",
"port",
"path",
"query",
"fragment",
)
class ParseResultMixin:
def _generate_authority(self, attributes):
# I swear I did not align the comparisons below. That's just how they
# happened to align based on pep8 and attribute lengths.
userinfo, host, port = (
attributes[p] for p in ("userinfo", "host", "port")
)
if self.userinfo != userinfo or self.host != host or self.port != port:
if port:
port = f"{port}"
return normalizers.normalize_authority(
(
compat.to_str(userinfo, self.encoding),
compat.to_str(host, self.encoding),
port,
)
)
if isinstance(self.authority, bytes):
return self.authority.decode("utf-8")
return self.authority
def geturl(self):
"""Shim to match the standard library method."""
return self.unsplit()
@property
def hostname(self):
"""Shim to match the standard library."""
return self.host
@property
def netloc(self):
"""Shim to match the standard library."""
return self.authority
@property
def params(self):
"""Shim to match the standard library."""
return self.query
class ParseResult(
namedtuple("ParseResult", PARSED_COMPONENTS), ParseResultMixin
):
"""Implementation of urlparse compatibility class.
This uses the URIReference logic to handle compatibility with the
urlparse.ParseResult class.
"""
slots = ()
def __new__(
cls,
scheme,
userinfo,
host,
port,
path,
query,
fragment,
uri_ref,
encoding="utf-8",
):
"""Create a new ParseResult."""
parse_result = super().__new__(
cls,
scheme or None,
userinfo or None,
host,
port or None,
path or None,
query,
fragment,
)
parse_result.encoding = encoding
parse_result.reference = uri_ref
return parse_result
@classmethod
def from_parts(
cls,
scheme=None,
userinfo=None,
host=None,
port=None,
path=None,
query=None,
fragment=None,
encoding="utf-8",
):
"""Create a ParseResult instance from its parts."""
authority = ""
if userinfo is not None:
authority += userinfo + "@"
if host is not None:
authority += host
if port is not None:
authority += f":{port}"
uri_ref = uri.URIReference(
scheme=scheme,
authority=authority,
path=path,
query=query,
fragment=fragment,
encoding=encoding,
).normalize()
userinfo, host, port = authority_from(uri_ref, strict=True)
return cls(
scheme=uri_ref.scheme,
userinfo=userinfo,
host=host,
port=port,
path=uri_ref.path,
query=uri_ref.query,
fragment=uri_ref.fragment,
uri_ref=uri_ref,
encoding=encoding,
)
@classmethod
def from_string(
cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True
):
"""Parse a URI from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:param bool strict: Parse strictly according to :rfc:`3986` if True.
If False, parse similarly to the standard library's urlparse
function.
:returns: :class:`ParseResult` or subclass thereof
"""
reference = uri.URIReference.from_string(uri_string, encoding)
if not lazy_normalize:
reference = reference.normalize()
userinfo, host, port = authority_from(reference, strict)
return cls(
scheme=reference.scheme,
userinfo=userinfo,
host=host,
port=port,
path=reference.path,
query=reference.query,
fragment=reference.fragment,
uri_ref=reference,
encoding=encoding,
)
@property
def authority(self):
"""Return the normalized authority."""
return self.reference.authority
def copy_with(
self,
scheme=misc.UseExisting,
userinfo=misc.UseExisting,
host=misc.UseExisting,
port=misc.UseExisting,
path=misc.UseExisting,
query=misc.UseExisting,
fragment=misc.UseExisting,
):
"""Create a copy of this instance replacing with specified parts."""
attributes = zip(
PARSED_COMPONENTS,
(scheme, userinfo, host, port, path, query, fragment),
)
attrs_dict = {}
for name, value in attributes:
if value is misc.UseExisting:
value = getattr(self, name)
attrs_dict[name] = value
authority = self._generate_authority(attrs_dict)
ref = self.reference.copy_with(
scheme=attrs_dict["scheme"],
authority=authority,
path=attrs_dict["path"],
query=attrs_dict["query"],
fragment=attrs_dict["fragment"],
)
return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict)
def encode(self, encoding=None):
"""Convert to an instance of ParseResultBytes."""
encoding = encoding or self.encoding
attrs = dict(
zip(
PARSED_COMPONENTS,
(
attr.encode(encoding) if hasattr(attr, "encode") else attr
for attr in self
),
)
)
return ParseResultBytes(
uri_ref=self.reference, encoding=encoding, **attrs
)
def unsplit(self, use_idna=False):
"""Create a URI string from the components.
:returns: The parsed URI reconstituted as a string.
:rtype: str
"""
parse_result = self
if use_idna and self.host:
hostbytes = self.host.encode("idna")
host = hostbytes.decode(self.encoding)
parse_result = self.copy_with(host=host)
return parse_result.reference.unsplit()
class ParseResultBytes(
namedtuple("ParseResultBytes", PARSED_COMPONENTS), ParseResultMixin
):
"""Compatibility shim for the urlparse.ParseResultBytes object."""
def __new__(
cls,
scheme,
userinfo,
host,
port,
path,
query,
fragment,
uri_ref,
encoding="utf-8",
lazy_normalize=True,
):
"""Create a new ParseResultBytes instance."""
parse_result = super().__new__(
cls,
scheme or None,
userinfo or None,
host,
port or None,
path or None,
query or None,
fragment or None,
)
parse_result.encoding = encoding
parse_result.reference = uri_ref
parse_result.lazy_normalize = lazy_normalize
return parse_result
@classmethod
def from_parts(
cls,
scheme=None,
userinfo=None,
host=None,
port=None,
path=None,
query=None,
fragment=None,
encoding="utf-8",
lazy_normalize=True,
):
"""Create a ParseResult instance from its parts."""
authority = ""
if userinfo is not None:
authority += userinfo + "@"
if host is not None:
authority += host
if port is not None:
authority += f":{int(port)}"
uri_ref = uri.URIReference(
scheme=scheme,
authority=authority,
path=path,
query=query,
fragment=fragment,
encoding=encoding,
)
if not lazy_normalize:
uri_ref = uri_ref.normalize()
to_bytes = compat.to_bytes
userinfo, host, port = authority_from(uri_ref, strict=True)
return cls(
scheme=to_bytes(scheme, encoding),
userinfo=to_bytes(userinfo, encoding),
host=to_bytes(host, encoding),
port=port,
path=to_bytes(path, encoding),
query=to_bytes(query, encoding),
fragment=to_bytes(fragment, encoding),
uri_ref=uri_ref,
encoding=encoding,
lazy_normalize=lazy_normalize,
)
@classmethod
def from_string(
cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True
):
"""Parse a URI from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:param bool strict: Parse strictly according to :rfc:`3986` if True.
If False, parse similarly to the standard library's urlparse
function.
:returns: :class:`ParseResultBytes` or subclass thereof
"""
reference = uri.URIReference.from_string(uri_string, encoding)
if not lazy_normalize:
reference = reference.normalize()
userinfo, host, port = authority_from(reference, strict)
to_bytes = compat.to_bytes
return cls(
scheme=to_bytes(reference.scheme, encoding),
userinfo=to_bytes(userinfo, encoding),
host=to_bytes(host, encoding),
port=port,
path=to_bytes(reference.path, encoding),
query=to_bytes(reference.query, encoding),
fragment=to_bytes(reference.fragment, encoding),
uri_ref=reference,
encoding=encoding,
lazy_normalize=lazy_normalize,
)
@property
def authority(self):
"""Return the normalized authority."""
return self.reference.authority.encode(self.encoding)
def copy_with(
self,
scheme=misc.UseExisting,
userinfo=misc.UseExisting,
host=misc.UseExisting,
port=misc.UseExisting,
path=misc.UseExisting,
query=misc.UseExisting,
fragment=misc.UseExisting,
lazy_normalize=True,
):
"""Create a copy of this instance replacing with specified parts."""
attributes = zip(
PARSED_COMPONENTS,
(scheme, userinfo, host, port, path, query, fragment),
)
attrs_dict = {}
for name, value in attributes:
if value is misc.UseExisting:
value = getattr(self, name)
if not isinstance(value, bytes) and hasattr(value, "encode"):
value = value.encode(self.encoding)
attrs_dict[name] = value
authority = self._generate_authority(attrs_dict)
to_str = compat.to_str
ref = self.reference.copy_with(
scheme=to_str(attrs_dict["scheme"], self.encoding),
authority=to_str(authority, self.encoding),
path=to_str(attrs_dict["path"], self.encoding),
query=to_str(attrs_dict["query"], self.encoding),
fragment=to_str(attrs_dict["fragment"], self.encoding),
)
if not lazy_normalize:
ref = ref.normalize()
return ParseResultBytes(
uri_ref=ref,
encoding=self.encoding,
lazy_normalize=lazy_normalize,
**attrs_dict,
)
def unsplit(self, use_idna=False):
"""Create a URI bytes object from the components.
:returns: The parsed URI reconstituted as a string.
:rtype: bytes
"""
parse_result = self
if use_idna and self.host:
# self.host is bytes, to encode to idna, we need to decode it
# first
host = self.host.decode(self.encoding)
hostbytes = host.encode("idna")
parse_result = self.copy_with(host=hostbytes)
if self.lazy_normalize:
parse_result = parse_result.copy_with(lazy_normalize=False)
uri = parse_result.reference.unsplit()
return uri.encode(self.encoding)
def split_authority(authority):
# Initialize our expected return values
userinfo = host = port = None
# Initialize an extra var we may need to use
extra_host = None
# Set-up rest in case there is no userinfo portion
rest = authority
if "@" in authority:
userinfo, rest = authority.rsplit("@", 1)
# Handle IPv6 host addresses
if rest.startswith("["):
host, rest = rest.split("]", 1)
host += "]"
if ":" in rest:
extra_host, port = rest.split(":", 1)
elif not host and rest:
host = rest
if extra_host and not host:
host = extra_host
return userinfo, host, port
def authority_from(reference, strict):
try:
subauthority = reference.authority_info()
except exceptions.InvalidAuthority:
if strict:
raise
userinfo, host, port = split_authority(reference.authority)
else:
# Thanks to Richard Barrell for this idea:
# https://twitter.com/0x2ba22e11/status/617338811975139328
userinfo, host, port = (
subauthority.get(p) for p in ("userinfo", "host", "port")
)
if port:
try:
port = int(port)
except ValueError:
raise exceptions.InvalidPort(port)
return userinfo, host, port

View File

@@ -0,0 +1,160 @@
"""Module containing the implementation of the URIReference class."""
# Copyright (c) 2014 Rackspace
# Copyright (c) 2015 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
from . import compat
from . import misc
from . import normalizers
from ._mixin import URIMixin
class URIReference(namedtuple("URIReference", misc.URI_COMPONENTS), URIMixin):
"""Immutable object representing a parsed URI Reference.
.. note::
This class is not intended to be directly instantiated by the user.
This object exposes attributes for the following components of a
URI:
- scheme
- authority
- path
- query
- fragment
.. attribute:: scheme
The scheme that was parsed for the URI Reference. For example,
``http``, ``https``, ``smtp``, ``imap``, etc.
.. attribute:: authority
Component of the URI that contains the user information, host,
and port sub-components. For example,
``google.com``, ``127.0.0.1:5000``, ``username@[::1]``,
``username:password@example.com:443``, etc.
.. attribute:: path
The path that was parsed for the given URI Reference. For example,
``/``, ``/index.php``, etc.
.. attribute:: query
The query component for a given URI Reference. For example, ``a=b``,
``a=b%20c``, ``a=b+c``, ``a=b,c=d,e=%20f``, etc.
.. attribute:: fragment
The fragment component of a URI. For example, ``section-3.1``.
This class also provides extra attributes for easier access to information
like the subcomponents of the authority component.
.. attribute:: userinfo
The user information parsed from the authority.
.. attribute:: host
The hostname, IPv4, or IPv6 address parsed from the authority.
.. attribute:: port
The port parsed from the authority.
"""
slots = ()
def __new__(
cls, scheme, authority, path, query, fragment, encoding="utf-8"
):
"""Create a new URIReference."""
ref = super().__new__(
cls,
scheme or None,
authority or None,
path or None,
query,
fragment,
)
ref.encoding = encoding
return ref
__hash__ = tuple.__hash__
def __eq__(self, other):
"""Compare this reference to another."""
other_ref = other
if isinstance(other, tuple):
other_ref = URIReference(*other)
elif not isinstance(other, URIReference):
try:
other_ref = URIReference.from_string(other)
except TypeError:
raise TypeError(
"Unable to compare URIReference() to {}()".format(
type(other).__name__
)
)
# See http://tools.ietf.org/html/rfc3986#section-6.2
naive_equality = tuple(self) == tuple(other_ref)
return naive_equality or self.normalized_equality(other_ref)
def normalize(self):
"""Normalize this reference as described in Section 6.2.2.
This is not an in-place normalization. Instead this creates a new
URIReference.
:returns: A new reference object with normalized components.
:rtype: URIReference
"""
# See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in
# this method.
return URIReference(
normalizers.normalize_scheme(self.scheme or ""),
normalizers.normalize_authority(
(self.userinfo, self.host, self.port)
),
normalizers.normalize_path(self.path or ""),
normalizers.normalize_query(self.query),
normalizers.normalize_fragment(self.fragment),
self.encoding,
)
@classmethod
def from_string(cls, uri_string, encoding="utf-8"):
"""Parse a URI reference from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: :class:`URIReference` or subclass thereof
"""
uri_string = compat.to_str(uri_string, encoding)
split_uri = misc.URI_MATCHER.match(uri_string).groupdict()
return cls(
split_uri["scheme"],
split_uri["authority"],
normalizers.encode_component(split_uri["path"], encoding),
normalizers.encode_component(split_uri["query"], encoding),
normalizers.encode_component(split_uri["fragment"], encoding),
encoding,
)

View File

@@ -0,0 +1,440 @@
# Copyright (c) 2017 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module containing the validation logic for rfc3986."""
from . import exceptions
from . import misc
from . import normalizers
class Validator:
"""Object used to configure validation of all objects in rfc3986.
.. versionadded:: 1.0
Example usage::
>>> from rfc3986 import api, validators
>>> uri = api.uri_reference('https://github.com/')
>>> validator = validators.Validator().require_presence_of(
... 'scheme', 'host', 'path',
... ).allow_schemes(
... 'http', 'https',
... ).allow_hosts(
... '127.0.0.1', 'github.com',
... )
>>> validator.validate(uri)
>>> invalid_uri = rfc3986.uri_reference('imap://mail.google.com')
>>> validator.validate(invalid_uri)
Traceback (most recent call last):
...
rfc3986.exceptions.MissingComponentError: ('path was required but
missing', URIReference(scheme=u'imap', authority=u'mail.google.com',
path=None, query=None, fragment=None), ['path'])
"""
COMPONENT_NAMES = frozenset(
["scheme", "userinfo", "host", "port", "path", "query", "fragment"]
)
def __init__(self):
"""Initialize our default validations."""
self.allowed_schemes = set()
self.allowed_hosts = set()
self.allowed_ports = set()
self.allow_password = True
self.required_components = {
"scheme": False,
"userinfo": False,
"host": False,
"port": False,
"path": False,
"query": False,
"fragment": False,
}
self.validated_components = self.required_components.copy()
def allow_schemes(self, *schemes):
"""Require the scheme to be one of the provided schemes.
.. versionadded:: 1.0
:param schemes:
Schemes, without ``://`` that are allowed.
:returns:
The validator instance.
:rtype:
Validator
"""
for scheme in schemes:
self.allowed_schemes.add(normalizers.normalize_scheme(scheme))
return self
def allow_hosts(self, *hosts):
"""Require the host to be one of the provided hosts.
.. versionadded:: 1.0
:param hosts:
Hosts that are allowed.
:returns:
The validator instance.
:rtype:
Validator
"""
for host in hosts:
self.allowed_hosts.add(normalizers.normalize_host(host))
return self
def allow_ports(self, *ports):
"""Require the port to be one of the provided ports.
.. versionadded:: 1.0
:param ports:
Ports that are allowed.
:returns:
The validator instance.
:rtype:
Validator
"""
for port in ports:
port_int = int(port, base=10)
if 0 <= port_int <= 65535:
self.allowed_ports.add(port)
return self
def allow_use_of_password(self):
"""Allow passwords to be present in the URI.
.. versionadded:: 1.0
:returns:
The validator instance.
:rtype:
Validator
"""
self.allow_password = True
return self
def forbid_use_of_password(self):
"""Prevent passwords from being included in the URI.
.. versionadded:: 1.0
:returns:
The validator instance.
:rtype:
Validator
"""
self.allow_password = False
return self
def check_validity_of(self, *components):
"""Check the validity of the components provided.
This can be specified repeatedly.
.. versionadded:: 1.1
:param components:
Names of components from :attr:`Validator.COMPONENT_NAMES`.
:returns:
The validator instance.
:rtype:
Validator
"""
components = [c.lower() for c in components]
for component in components:
if component not in self.COMPONENT_NAMES:
raise ValueError(f'"{component}" is not a valid component')
self.validated_components.update(
{component: True for component in components}
)
return self
def require_presence_of(self, *components):
"""Require the components provided.
This can be specified repeatedly.
.. versionadded:: 1.0
:param components:
Names of components from :attr:`Validator.COMPONENT_NAMES`.
:returns:
The validator instance.
:rtype:
Validator
"""
components = [c.lower() for c in components]
for component in components:
if component not in self.COMPONENT_NAMES:
raise ValueError(f'"{component}" is not a valid component')
self.required_components.update(
{component: True for component in components}
)
return self
def validate(self, uri):
"""Check a URI for conditions specified on this validator.
.. versionadded:: 1.0
:param uri:
Parsed URI to validate.
:type uri:
rfc3986.uri.URIReference
:raises MissingComponentError:
When a required component is missing.
:raises UnpermittedComponentError:
When a component is not one of those allowed.
:raises PasswordForbidden:
When a password is present in the userinfo component but is
not permitted by configuration.
:raises InvalidComponentsError:
When a component was found to be invalid.
"""
if not self.allow_password:
check_password(uri)
required_components = [
component
for component, required in self.required_components.items()
if required
]
validated_components = [
component
for component, required in self.validated_components.items()
if required
]
if required_components:
ensure_required_components_exist(uri, required_components)
if validated_components:
ensure_components_are_valid(uri, validated_components)
ensure_one_of(self.allowed_schemes, uri, "scheme")
ensure_one_of(self.allowed_hosts, uri, "host")
ensure_one_of(self.allowed_ports, uri, "port")
def check_password(uri):
"""Assert that there is no password present in the uri."""
userinfo = uri.userinfo
if not userinfo:
return
credentials = userinfo.split(":", 1)
if len(credentials) <= 1:
return
raise exceptions.PasswordForbidden(uri)
def ensure_one_of(allowed_values, uri, attribute):
"""Assert that the uri's attribute is one of the allowed values."""
value = getattr(uri, attribute)
if value is not None and allowed_values and value not in allowed_values:
raise exceptions.UnpermittedComponentError(
attribute,
value,
allowed_values,
)
def ensure_required_components_exist(uri, required_components):
"""Assert that all required components are present in the URI."""
missing_components = sorted(
component
for component in required_components
if getattr(uri, component) is None
)
if missing_components:
raise exceptions.MissingComponentError(uri, *missing_components)
def is_valid(value, matcher, require):
"""Determine if a value is valid based on the provided matcher.
:param str value:
Value to validate.
:param matcher:
Compiled regular expression to use to validate the value.
:param require:
Whether or not the value is required.
"""
if require:
return value is not None and matcher.match(value)
# require is False and value is not None
return value is None or matcher.match(value)
def authority_is_valid(authority, host=None, require=False):
"""Determine if the authority string is valid.
:param str authority:
The authority to validate.
:param str host:
(optional) The host portion of the authority to validate.
:param bool require:
(optional) Specify if authority must not be None.
:returns:
``True`` if valid, ``False`` otherwise
:rtype:
bool
"""
validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require)
if validated and host is not None:
return host_is_valid(host, require)
return validated
def host_is_valid(host, require=False):
"""Determine if the host string is valid.
:param str host:
The host to validate.
:param bool require:
(optional) Specify if host must not be None.
:returns:
``True`` if valid, ``False`` otherwise
:rtype:
bool
"""
validated = is_valid(host, misc.HOST_MATCHER, require)
if validated and host is not None and misc.IPv4_MATCHER.match(host):
return valid_ipv4_host_address(host)
elif validated and host is not None and misc.IPv6_MATCHER.match(host):
return misc.IPv6_NO_RFC4007_MATCHER.match(host) is not None
return validated
def scheme_is_valid(scheme, require=False):
"""Determine if the scheme is valid.
:param str scheme:
The scheme string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a scheme.
:returns:
``True`` if the scheme is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(scheme, misc.SCHEME_MATCHER, require)
def path_is_valid(path, require=False):
"""Determine if the path component is valid.
:param str path:
The path string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a path.
:returns:
``True`` if the path is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(path, misc.PATH_MATCHER, require)
def query_is_valid(query, require=False):
"""Determine if the query component is valid.
:param str query:
The query string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a query.
:returns:
``True`` if the query is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(query, misc.QUERY_MATCHER, require)
def fragment_is_valid(fragment, require=False):
"""Determine if the fragment component is valid.
:param str fragment:
The fragment string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a fragment.
:returns:
``True`` if the fragment is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(fragment, misc.FRAGMENT_MATCHER, require)
def valid_ipv4_host_address(host):
"""Determine if the given host is a valid IPv4 address."""
# If the host exists, and it might be IPv4, check each byte in the
# address.
return all([0 <= int(byte, base=10) <= 255 for byte in host.split(".")])
_COMPONENT_VALIDATORS = {
"scheme": scheme_is_valid,
"path": path_is_valid,
"query": query_is_valid,
"fragment": fragment_is_valid,
}
_SUBAUTHORITY_VALIDATORS = {"userinfo", "host", "port"}
def subauthority_component_is_valid(uri, component):
"""Determine if the userinfo, host, and port are valid."""
try:
subauthority_dict = uri.authority_info()
except exceptions.InvalidAuthority:
return False
# If we can parse the authority into sub-components and we're not
# validating the port, we can assume it's valid.
if component == "host":
return host_is_valid(subauthority_dict["host"])
elif component != "port":
return True
try:
port = int(subauthority_dict["port"])
except TypeError:
# If the port wasn't provided it'll be None and int(None) raises a
# TypeError
return True
return 0 <= port <= 65535
def ensure_components_are_valid(uri, validated_components):
"""Assert that all components are valid in the URI."""
invalid_components = set()
for component in validated_components:
if component in _SUBAUTHORITY_VALIDATORS:
if not subauthority_component_is_valid(uri, component):
invalid_components.add(component)
# Python's peephole optimizer means that while this continue *is*
# actually executed, coverage.py cannot detect that. See also,
# https://bitbucket.org/ned/coveragepy/issues/198/continue-marked-as-not-covered
continue # nocov: Python 2.7, 3.3, 3.4
validator = _COMPONENT_VALIDATORS[component]
if not validator(getattr(uri, component)):
invalid_components.add(component)
if invalid_components:
raise exceptions.InvalidComponentsError(uri, *invalid_components)