initial commit

This commit is contained in:
Davidson Gomes
2024-10-30 11:19:09 -03:00
commit 8654a31a4d
3744 changed files with 585542 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
# Copyright 2014 Donald Stufft
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@@ -0,0 +1,66 @@
import argparse
from readme_renderer.markdown import render as render_md
from readme_renderer.rst import render as render_rst
from readme_renderer.txt import render as render_txt
import pathlib
from importlib.metadata import metadata
import sys
from typing import Optional, List
def main(cli_args: Optional[List[str]] = None) -> None:
parser = argparse.ArgumentParser(
description="Renders a .md, .rst, or .txt README to HTML",
)
parser.add_argument("-p", "--package", help="Get README from package metadata",
action="store_true")
parser.add_argument("-f", "--format", choices=["md", "rst", "txt"],
help="README format (inferred from input file name or package)")
parser.add_argument('input', help="Input README file or package name")
parser.add_argument('-o', '--output', help="Output file (default: stdout)",
default='-')
args = parser.parse_args(cli_args)
content_format = args.format
if args.package:
message = metadata(args.input)
source = message.get_payload() # type: ignore[attr-defined] # noqa: E501 https://peps.python.org/pep-0566/
# Infer the format of the description from package metadata.
if not content_format:
content_type = message.get("Description-Content-Type", "text/x-rst")
if content_type == "text/x-rst":
content_format = "rst"
elif content_type == "text/markdown":
content_format = "md"
elif content_type == "text/plain":
content_format = "txt"
else:
raise ValueError(f"invalid content type {content_type} for package "
"`long_description`")
else:
filename = pathlib.Path(args.input)
content_format = content_format or filename.suffix.lstrip(".")
with filename.open() as fp:
source = fp.read()
if content_format == "md":
rendered = render_md(source, stream=sys.stderr)
elif content_format == "rst":
rendered = render_rst(source, stream=sys.stderr)
elif content_format == "txt":
rendered = render_txt(source, stream=sys.stderr)
else:
raise ValueError(f"invalid README format: {content_format} (expected `md`, "
"`rst`, or `txt`)")
if rendered is None:
sys.exit(1)
if args.output == "-":
print(rendered, file=sys.stdout)
else:
with open(args.output, "w") as fp:
print(rendered, file=fp)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,89 @@
# Copyright 2014 Donald Stufft
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, Optional, Set
import nh3
ALLOWED_TAGS = {
# Bleach Defaults
"a", "abbr", "acronym", "b", "blockquote", "code", "em", "i", "li", "ol",
"strong", "ul",
# Custom Additions
"br", "caption", "cite", "col", "colgroup", "dd", "del", "details", "div",
"dl", "dt", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "img", "p", "pre",
"span", "sub", "summary", "sup", "table", "tbody", "td", "th", "thead",
"tr", "tt", "kbd", "var", "input", "section", "aside", "nav", "figure",
"figcaption", "picture",
}
ALLOWED_ATTRIBUTES = {
# Bleach Defaults
"a": {"href", "title"},
"abbr": {"title"},
"acronym": {"title"},
# Custom Additions
"*": {"id"},
"hr": {"class"},
"img": {"src", "width", "height", "alt", "align", "class"},
"span": {"class"},
"th": {"align", "class"},
"td": {"align", "colspan", "rowspan"},
"div": {"align", "class"},
"h1": {"align"},
"h2": {"align"},
"h3": {"align"},
"h4": {"align"},
"h5": {"align"},
"h6": {"align"},
"code": {"class"},
"p": {"align", "class"},
"pre": {"lang"},
"ol": {"start"},
"input": {"type", "checked", "disabled"},
"aside": {"class"},
"dd": {"class"},
"dl": {"class"},
"dt": {"class"},
"ul": {"class"},
"nav": {"class"},
"figure": {"class"},
}
def clean(
html: str,
tags: Optional[Set[str]] = None,
attributes: Optional[Dict[str, Set[str]]] = None
) -> Optional[str]:
if tags is None:
tags = ALLOWED_TAGS
if attributes is None:
attributes = ALLOWED_ATTRIBUTES
try:
cleaned = nh3.clean(
html,
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
link_rel="nofollow",
url_schemes={"http", "https", "mailto"},
)
return cleaned
except ValueError:
return None

View File

@@ -0,0 +1,123 @@
# Copyright 2014 Donald Stufft
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import warnings
from typing import cast, Any, Dict, Callable, Match, Optional
from html import unescape
import pygments
import pygments.lexers
import pygments.formatters
from .clean import clean
_EXTRA_WARNING = (
"Markdown renderers are not available. "
"Install 'readme_renderer[md]' to enable Markdown rendering."
)
try:
import cmarkgfm
from cmarkgfm.cmark import Options as cmarkgfmOptions
variants: Dict[str, Callable[[str], str]] = {
"GFM": lambda raw: cast(str, cmarkgfm.github_flavored_markdown_to_html(
raw, options=cmarkgfmOptions.CMARK_OPT_UNSAFE
)),
"CommonMark": lambda raw: cast(str, cmarkgfm.markdown_to_html(
raw, options=cmarkgfmOptions.CMARK_OPT_UNSAFE
)),
}
except ImportError:
warnings.warn(_EXTRA_WARNING)
variants = {}
# Make code fences with `python` as the language default to highlighting as
# Python 3.
_LANG_ALIASES = {
'python': 'python3',
}
def render(
raw: str,
variant: str = "GFM",
**kwargs: Any
) -> Optional[str]:
if not variants:
warnings.warn(_EXTRA_WARNING)
return None
renderer = variants.get(variant)
if not renderer:
return None
rendered = renderer(raw)
if not rendered:
return None
highlighted = _highlight(rendered)
cleaned = clean(highlighted)
return cleaned
def _highlight(html: str) -> str:
"""Syntax-highlights HTML-rendered Markdown.
Plucks sections to highlight that conform the the GitHub fenced code info
string as defined at https://github.github.com/gfm/#info-string.
Args:
html (str): The rendered HTML.
Returns:
str: The HTML with Pygments syntax highlighting applied to all code
blocks.
"""
formatter = pygments.formatters.HtmlFormatter(nowrap=True)
code_expr = re.compile(
# cmarkgfm<0.6.0: <pre><code class="language-python">print('hello')</code></pre>
# cmarkgfm>=0.6.0: <pre lang="python"><code>print('hello')</code></pre>
r'(<pre>(?P<in_code><code) class="language-|<pre lang=")(?P<lang>[^"]+?)">'
'(?(in_code)|<code>)(?P<code>.+?)'
r'</code></pre>', re.DOTALL)
def replacer(match: Match[Any]) -> str:
try:
lang = match.group('lang')
lang = _LANG_ALIASES.get(lang, lang)
lexer = pygments.lexers.get_lexer_by_name(lang)
except ValueError:
lexer = pygments.lexers.TextLexer()
code = match.group('code')
# Decode html entities in the code. cmark tries to be helpful and
# translate '"' to '&quot;', but it confuses pygments. Pygments will
# escape any html entities when re-writing the code, and we run
# everything through bleach after.
code = unescape(code)
highlighted = pygments.highlight(code, lexer, formatter)
return f'<pre lang="{lang}">{highlighted}</pre>'
result = code_expr.sub(replacer, html)
return result

View File

@@ -0,0 +1,135 @@
# Copyright 2014 Donald Stufft
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
from typing import Any, Dict, IO, Optional, Union
from docutils.core import publish_parts
from docutils.nodes import colspec, image
from docutils.writers.html5_polyglot import HTMLTranslator, Writer
from docutils.utils import SystemMessage
from .clean import clean
class ReadMeHTMLTranslator(HTMLTranslator): # type: ignore[misc] # docutils is incomplete, returns `Any` python/typeshed#7256 # noqa E501
# Overrides base class not to output `<object>` tag for SVG images.
object_image_types: Dict[str, str] = {}
def emptytag(
self,
node: Union[colspec, image],
tagname: str,
suffix: str = "\n",
**attributes: Any
) -> Any:
"""Override this to add back the width/height attributes."""
if tagname == "img":
if "width" in node:
attributes["width"] = node["width"]
if "height" in node:
attributes["height"] = node["height"]
return super().emptytag(
node, tagname, suffix, **attributes
)
SETTINGS = {
# Cloaking email addresses provides a small amount of additional
# privacy protection for email addresses inside of a chunk of ReST.
"cloak_email_addresses": True,
# Prevent a lone top level heading from being promoted to document
# title, and thus second level headings from being promoted to top
# level.
"doctitle_xform": True,
# Prevent a lone subsection heading from being promoted to section
# title, and thus second level headings from being promoted to top
# level.
"sectsubtitle_xform": True,
# Set our initial header level
"initial_header_level": 2,
# Prevent local files from being included into the rendered output.
# This is a security concern because people can insert files
# that are part of the system, such as /etc/passwd.
"file_insertion_enabled": False,
# Halt rendering and throw an exception if there was any errors or
# warnings from docutils.
"halt_level": 2,
# Output math blocks as LaTeX that can be interpreted by MathJax for
# a prettier display of Math formulas.
# Pass a dummy path to supress docutils warning and emit HTML.
"math_output": "MathJax /dummy.js",
# Disable raw html as enabling it is a security risk, we do not want
# people to be able to include any old HTML in the final output.
"raw_enabled": False,
# Disable all system messages from being reported.
"report_level": 5,
# Use typographic quotes, and transform --, ---, and ... into their
# typographic counterparts.
"smart_quotes": True,
# Strip all comments from the rendered output.
"strip_comments": True,
# Use the short form of syntax highlighting so that the generated
# Pygments CSS can be used to style the output.
"syntax_highlight": "short",
# Maximum width (in characters) for one-column field names.
# 0 means "no limit"
"field_name_limit": 0,
}
def render(
raw: str,
stream: Optional[IO[str]] = None,
**kwargs: Any
) -> Optional[str]:
if stream is None:
# Use a io.StringIO as the warning stream to prevent warnings from
# being printed to sys.stderr.
stream = io.StringIO()
settings = SETTINGS.copy()
settings["warning_stream"] = stream
writer = Writer()
writer.translator_class = ReadMeHTMLTranslator
try:
parts = publish_parts(raw, writer=writer, settings_overrides=settings)
except SystemMessage:
rendered = None
else:
rendered = parts.get("docinfo", "") + parts.get("fragment", "")
if rendered:
return clean(rendered)
else:
# If the warnings stream is empty, docutils had none, so add ours.
if not stream.tell():
stream.write("No content rendered from RST source.")
return None

View File

@@ -0,0 +1,24 @@
# Copyright 2015 Donald Stufft
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Optional
from .clean import clean
from html import escape as html_escape
def render(raw: str, **kwargs: Any) -> Optional[str]:
rendered = html_escape(raw).replace("\n", "<br>")
return clean(rendered, tags={"br"})