initial commit

2026-02-04 05:46:23 -06:00 · 2024-10-30 11:19:09 -03:00
commit 8654a31a4d
3744 changed files with 585542 additions and 0 deletions
@@ -0,0 +1,13 @@
+# Copyright 2014 Donald Stufft
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,66 @@
+import argparse
+from readme_renderer.markdown import render as render_md
+from readme_renderer.rst import render as render_rst
+from readme_renderer.txt import render as render_txt
+import pathlib
+from importlib.metadata import metadata
+import sys
+from typing import Optional, List
+
+
+def main(cli_args: Optional[List[str]] = None) -> None:
+    parser = argparse.ArgumentParser(
+        description="Renders a .md, .rst, or .txt README to HTML",
+    )
+    parser.add_argument("-p", "--package", help="Get README from package metadata",
+                        action="store_true")
+    parser.add_argument("-f", "--format", choices=["md", "rst", "txt"],
+                        help="README format (inferred from input file name or package)")
+    parser.add_argument('input', help="Input README file or package name")
+    parser.add_argument('-o', '--output', help="Output file (default: stdout)",
+                        default='-')
+    args = parser.parse_args(cli_args)
+
+    content_format = args.format
+    if args.package:
+        message = metadata(args.input)
+        source = message.get_payload()  # type: ignore[attr-defined] # noqa: E501 https://peps.python.org/pep-0566/
+
+        # Infer the format of the description from package metadata.
+        if not content_format:
+            content_type = message.get("Description-Content-Type", "text/x-rst")
+            if content_type == "text/x-rst":
+                content_format = "rst"
+            elif content_type == "text/markdown":
+                content_format = "md"
+            elif content_type == "text/plain":
+                content_format = "txt"
+            else:
+                raise ValueError(f"invalid content type {content_type} for package "
+                                 "`long_description`")
+    else:
+        filename = pathlib.Path(args.input)
+        content_format = content_format or filename.suffix.lstrip(".")
+        with filename.open() as fp:
+            source = fp.read()
+
+    if content_format == "md":
+        rendered = render_md(source, stream=sys.stderr)
+    elif content_format == "rst":
+        rendered = render_rst(source, stream=sys.stderr)
+    elif content_format == "txt":
+        rendered = render_txt(source, stream=sys.stderr)
+    else:
+        raise ValueError(f"invalid README format: {content_format} (expected `md`, "
+                         "`rst`, or `txt`)")
+    if rendered is None:
+        sys.exit(1)
+    if args.output == "-":
+        print(rendered, file=sys.stdout)
+    else:
+        with open(args.output, "w") as fp:
+            print(rendered, file=fp)
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,89 @@
+# Copyright 2014 Donald Stufft
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Optional, Set
+
+import nh3
+
+
+ALLOWED_TAGS = {
+    # Bleach Defaults
+    "a", "abbr", "acronym", "b", "blockquote", "code", "em", "i", "li", "ol",
+    "strong", "ul",
+
+    # Custom Additions
+    "br", "caption", "cite", "col", "colgroup", "dd", "del", "details", "div",
+    "dl", "dt", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "img", "p", "pre",
+    "span", "sub", "summary", "sup", "table", "tbody", "td", "th", "thead",
+    "tr", "tt", "kbd", "var", "input", "section", "aside", "nav", "figure",
+    "figcaption", "picture",
+}
+
+ALLOWED_ATTRIBUTES = {
+    # Bleach Defaults
+    "a": {"href", "title"},
+    "abbr": {"title"},
+    "acronym": {"title"},
+
+    # Custom Additions
+    "*": {"id"},
+    "hr": {"class"},
+    "img": {"src", "width", "height", "alt", "align", "class"},
+    "span": {"class"},
+    "th": {"align", "class"},
+    "td": {"align", "colspan", "rowspan"},
+    "div": {"align", "class"},
+    "h1": {"align"},
+    "h2": {"align"},
+    "h3": {"align"},
+    "h4": {"align"},
+    "h5": {"align"},
+    "h6": {"align"},
+    "code": {"class"},
+    "p": {"align", "class"},
+    "pre": {"lang"},
+    "ol": {"start"},
+    "input": {"type", "checked", "disabled"},
+    "aside": {"class"},
+    "dd": {"class"},
+    "dl": {"class"},
+    "dt": {"class"},
+    "ul": {"class"},
+    "nav": {"class"},
+    "figure": {"class"},
+}
+
+
+def clean(
+    html: str,
+    tags: Optional[Set[str]] = None,
+    attributes: Optional[Dict[str, Set[str]]] = None
+) -> Optional[str]:
+    if tags is None:
+        tags = ALLOWED_TAGS
+    if attributes is None:
+        attributes = ALLOWED_ATTRIBUTES
+
+    try:
+        cleaned = nh3.clean(
+            html,
+            tags=ALLOWED_TAGS,
+            attributes=ALLOWED_ATTRIBUTES,
+            link_rel="nofollow",
+            url_schemes={"http", "https", "mailto"},
+        )
+
+        return cleaned
+    except ValueError:
+        return None
@@ -0,0 +1,123 @@
+# Copyright 2014 Donald Stufft
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import warnings
+from typing import cast, Any, Dict, Callable, Match, Optional
+
+from html import unescape
+
+import pygments
+import pygments.lexers
+import pygments.formatters
+
+from .clean import clean
+
+_EXTRA_WARNING = (
+    "Markdown renderers are not available. "
+    "Install 'readme_renderer[md]' to enable Markdown rendering."
+)
+
+try:
+    import cmarkgfm
+    from cmarkgfm.cmark import Options as cmarkgfmOptions
+    variants: Dict[str, Callable[[str], str]] = {
+        "GFM": lambda raw: cast(str, cmarkgfm.github_flavored_markdown_to_html(
+            raw, options=cmarkgfmOptions.CMARK_OPT_UNSAFE
+        )),
+        "CommonMark": lambda raw: cast(str, cmarkgfm.markdown_to_html(
+            raw, options=cmarkgfmOptions.CMARK_OPT_UNSAFE
+        )),
+    }
+except ImportError:
+    warnings.warn(_EXTRA_WARNING)
+    variants = {}
+
+# Make code fences with `python` as the language default to highlighting as
+# Python 3.
+_LANG_ALIASES = {
+    'python': 'python3',
+}
+
+
+def render(
+    raw: str,
+    variant: str = "GFM",
+    **kwargs: Any
+) -> Optional[str]:
+    if not variants:
+        warnings.warn(_EXTRA_WARNING)
+        return None
+
+    renderer = variants.get(variant)
+
+    if not renderer:
+        return None
+
+    rendered = renderer(raw)
+
+    if not rendered:
+        return None
+
+    highlighted = _highlight(rendered)
+    cleaned = clean(highlighted)
+    return cleaned
+
+
+def _highlight(html: str) -> str:
+    """Syntax-highlights HTML-rendered Markdown.
+
+    Plucks sections to highlight that conform the the GitHub fenced code info
+    string as defined at https://github.github.com/gfm/#info-string.
+
+    Args:
+        html (str): The rendered HTML.
+
+    Returns:
+        str: The HTML with Pygments syntax highlighting applied to all code
+            blocks.
+    """
+
+    formatter = pygments.formatters.HtmlFormatter(nowrap=True)
+
+    code_expr = re.compile(
+        # cmarkgfm<0.6.0: <pre><code class="language-python">print('hello')</code></pre>
+        # cmarkgfm>=0.6.0: <pre lang="python"><code>print('hello')</code></pre>
+        r'(<pre>(?P<in_code><code) class="language-|<pre lang=")(?P<lang>[^"]+?)">'
+        '(?(in_code)|<code>)(?P<code>.+?)'
+        r'</code></pre>', re.DOTALL)
+
+    def replacer(match: Match[Any]) -> str:
+        try:
+            lang = match.group('lang')
+            lang = _LANG_ALIASES.get(lang, lang)
+            lexer = pygments.lexers.get_lexer_by_name(lang)
+        except ValueError:
+            lexer = pygments.lexers.TextLexer()
+
+        code = match.group('code')
+
+        # Decode html entities in the code. cmark tries to be helpful and
+        # translate '"' to '&quot;', but it confuses pygments. Pygments will
+        # escape any html entities when re-writing the code, and we run
+        # everything through bleach after.
+        code = unescape(code)
+
+        highlighted = pygments.highlight(code, lexer, formatter)
+
+        return f'<pre lang="{lang}">{highlighted}</pre>'
+
+    result = code_expr.sub(replacer, html)
+
+    return result
@@ -0,0 +1,135 @@
+# Copyright 2014 Donald Stufft
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import io
+from typing import Any, Dict, IO, Optional, Union
+
+from docutils.core import publish_parts
+from docutils.nodes import colspec, image
+from docutils.writers.html5_polyglot import HTMLTranslator, Writer
+from docutils.utils import SystemMessage
+
+from .clean import clean
+
+
+class ReadMeHTMLTranslator(HTMLTranslator):  # type: ignore[misc] # docutils is incomplete, returns `Any` python/typeshed#7256 # noqa E501
+
+    # Overrides base class not to output `<object>` tag for SVG images.
+    object_image_types: Dict[str, str] = {}
+
+    def emptytag(
+        self,
+        node: Union[colspec, image],
+        tagname: str,
+        suffix: str = "\n",
+        **attributes: Any
+    ) -> Any:
+        """Override this to add back the width/height attributes."""
+        if tagname == "img":
+            if "width" in node:
+                attributes["width"] = node["width"]
+            if "height" in node:
+                attributes["height"] = node["height"]
+
+        return super().emptytag(
+            node, tagname, suffix, **attributes
+        )
+
+
+SETTINGS = {
+    # Cloaking email addresses provides a small amount of additional
+    # privacy protection for email addresses inside of a chunk of ReST.
+    "cloak_email_addresses": True,
+
+    # Prevent a lone top level heading from being promoted to document
+    # title, and thus second level headings from being promoted to top
+    # level.
+    "doctitle_xform": True,
+
+    # Prevent a lone subsection heading from being promoted to section
+    # title, and thus second level headings from being promoted to top
+    # level.
+    "sectsubtitle_xform": True,
+
+    # Set our initial header level
+    "initial_header_level": 2,
+
+    # Prevent local files from being included into the rendered output.
+    # This is a security concern because people can insert files
+    # that are part of the system, such as /etc/passwd.
+    "file_insertion_enabled": False,
+
+    # Halt rendering and throw an exception if there was any errors or
+    # warnings from docutils.
+    "halt_level": 2,
+
+    # Output math blocks as LaTeX that can be interpreted by MathJax for
+    # a prettier display of Math formulas.
+    # Pass a dummy path to supress docutils warning and emit HTML.
+    "math_output": "MathJax /dummy.js",
+
+    # Disable raw html as enabling it is a security risk, we do not want
+    # people to be able to include any old HTML in the final output.
+    "raw_enabled": False,
+
+    # Disable all system messages from being reported.
+    "report_level": 5,
+
+    # Use typographic quotes, and transform --, ---, and ... into their
+    # typographic counterparts.
+    "smart_quotes": True,
+
+    # Strip all comments from the rendered output.
+    "strip_comments": True,
+
+    # Use the short form of syntax highlighting so that the generated
+    # Pygments CSS can be used to style the output.
+    "syntax_highlight": "short",
+
+    # Maximum width (in characters) for one-column field names.
+    # 0 means "no limit"
+    "field_name_limit": 0,
+}
+
+
+def render(
+    raw: str,
+    stream: Optional[IO[str]] = None,
+    **kwargs: Any
+) -> Optional[str]:
+    if stream is None:
+        # Use a io.StringIO as the warning stream to prevent warnings from
+        # being printed to sys.stderr.
+        stream = io.StringIO()
+
+    settings = SETTINGS.copy()
+    settings["warning_stream"] = stream
+
+    writer = Writer()
+    writer.translator_class = ReadMeHTMLTranslator
+
+    try:
+        parts = publish_parts(raw, writer=writer, settings_overrides=settings)
+    except SystemMessage:
+        rendered = None
+    else:
+        rendered = parts.get("docinfo", "") + parts.get("fragment", "")
+
+    if rendered:
+        return clean(rendered)
+    else:
+        # If the warnings stream is empty, docutils had none, so add ours.
+        if not stream.tell():
+            stream.write("No content rendered from RST source.")
+        return None
@@ -0,0 +1,24 @@
+# Copyright 2015 Donald Stufft
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Optional
+
+from .clean import clean
+
+from html import escape as html_escape
+
+
+def render(raw: str, **kwargs: Any) -> Optional[str]:
+    rendered = html_escape(raw).replace("\n", "<br>")
+    return clean(rendered, tags={"br"})