import argparse
import copy
import html
import logging
import os
import re
import sys
from collections.abc import MutableMapping, MutableSequence
from io import StringIO, TextIOWrapper
from typing import IO, Any, Literal, Optional, Union, cast
from urllib.parse import urldefrag
from mistune import create_markdown
from mistune.markdown import Markdown
from mistune.renderers.html import HTMLRenderer
from .exceptions import SchemaSaladException, ValidationException
from .schema import avro_field_name, extend_and_specialize, get_metaschema
from .utils import add_dictlist, aslist
from .validate import avro_type_name
PluginName = Literal[
"url",
"strikethrough",
"footnotes",
"table",
"task_lists",
"def_list",
"abbr",
]
_logger = logging.getLogger("salad")
def escape_html(s: str) -> str:
"""Escape HTML but otherwise preserve single quotes."""
return html.escape(html.unescape(s)).replace("'", "'")
def vocab_type_name(url: str) -> str:
"""Remove the avro namespace, if any."""
return avro_type_name(url).split(".")[-1]
def has_types(items: Any) -> list[str]:
"""Retrieve all the types of a record."""
r: list[str] = []
if isinstance(items, MutableMapping):
if items["type"] == "https://w3id.org/cwl/salad#record":
return [items["name"]]
for n in ("type", "items", "values"):
if n in items:
r.extend(has_types(items[n]))
return r
if isinstance(items, MutableSequence):
for i in items:
r.extend(has_types(i))
return r
if isinstance(items, str):
return [items]
return []
def linkto(item: str) -> str:
frg = urldefrag(item)[1]
return f"[{frg}](#{to_id(frg)})"
class MyRenderer(HTMLRenderer):
"""Custom renderer with different representations of selected HTML tags."""
def heading(self, text: str, level: int, **attrs: Any) -> str:
"""Override HTML heading creation with text IDs."""
return """
" + html.escape(code, quote=self._escape) + "
\n"
def patch_fenced_code(original_markdown_text: str, modified_markdown_text: str) -> str:
"""Reverts fenced code fragments found in the modified contents back to their original definition."""
# Pattern inspired from 'mistune.block_parser.BlockParser.FENCED_CODE'.
# However, instead of the initial ' {0,3}' part to match any indented fenced-code,
# use any quantity of spaces, as long as they match at the end as well (using '\1').
# Because of nested fenced-code in lists, it can be more indented than "normal".
fenced_code_pattern = re.compile(
r"( *)(`{3,}|~{3,})([^`\n]*)\n(?:|([\s\S]*?)\n)(?:\1\2[~`]* *\n+|$)"
)
matches_original = list(re.finditer(fenced_code_pattern, original_markdown_text))
matches_modified = list(re.finditer(fenced_code_pattern, modified_markdown_text))
if len(matches_original) != len(matches_modified):
raise ValueError("Cannot patch fenced code definitions with inconsistent matches.")
result = ""
begin = 0
for original, modified in zip(matches_original, matches_modified):
ori_s, ori_e = original.start(), original.end()
mod_s, mod_e = modified.start(), modified.end()
result += modified_markdown_text[begin:mod_s] # add text in between matches
result += original_markdown_text[ori_s:ori_e] # revert the fenced code
begin = mod_e # skip over the modified fenced code for next match
result += modified_markdown_text[begin:] # left over text after last match
return result
def to_id(text: str) -> str:
textid = text
if text[0] in ("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"):
try:
textid = text[text.index(" ") + 1 :]
except ValueError:
pass
return textid.replace(" ", "_")
class ToC:
def __init__(self) -> None:
self.first_toc_entry = True
self.numbering = [0]
self.toc = ""
self.start_numbering = True
def add_entry(self, thisdepth: int, title: str) -> str:
"""Add an entry to the table of contents."""
depth = len(self.numbering)
if thisdepth < depth:
self.toc += ""
for _ in range(0, depth - thisdepth):
self.numbering.pop()
self.toc += ""
self.numbering[-1] += 1
elif thisdepth == depth:
if not self.first_toc_entry:
self.toc += ""
else:
self.first_toc_entry = False
self.numbering[-1] += 1
elif thisdepth > depth:
self.numbering.append(1)
num = (
"{}.{}".format(self.numbering[0], ".".join([str(n) for n in self.numbering[1:]]))
if self.start_numbering
else ""
)
self.toc += f"""{}
"
", {}
| {}>".format(
jsonldPredicate["mapSubject"],
jsonldPredicate["mapPredicate"],
self.typefmt(tp["items"], redirects),
)
)
else:
ar += " | "
if len(ar) > 40:
ar += "{}
, {}>".format(
jsonldPredicate["mapSubject"],
self.typefmt(tp["items"], redirects),
)
return ar
if tp["type"] in (
"https://w3id.org/cwl/salad#record",
"https://w3id.org/cwl/salad#enum",
):
frg = vocab_type_name(tp["name"])
if tp["name"] in redirects:
return """{}""".format(redirects[tp["name"]], frg)
if tp["name"] in self.typemap:
return f"""{frg}"""
if tp["type"] == "https://w3id.org/cwl/salad#enum" and len(tp["symbols"]) == 1:
return "constant value {}
".format(
avro_field_name(tp["symbols"][0])
)
return frg
if isinstance(tp["type"], MutableMapping):
return self.typefmt(tp["type"], redirects)
else:
if str(tp) in redirects:
return f"""{redirects[tp]}""" # noqa: B907
if str(tp) in basicTypes:
return """{}""".format(
self.primitiveType, vocab_type_name(str(tp))
)
frg2 = urldefrag(tp)[1]
if frg2 != "":
tp = frg2
return f"""{tp}"""
raise SchemaSaladException("We should not be here!")
def render_type(self, f: dict[str, Any], depth: int) -> None:
"""Render a type declaration."""
if f["name"] in self.rendered or f["name"] in self.redirects:
return
self.rendered.add(f["name"])
if f.get("abstract"):
return
if "doc" not in f:
f["doc"] = ""
f["type"] = copy.deepcopy(f)
f["doc"] = ""
f = f["type"]
if "doc" not in f:
f["doc"] = ""
def extendsfrom(item: dict[str, Any], ex: list[dict[str, Any]]) -> None:
if "extends" in item:
for e in aslist(item["extends"]):
ex.insert(0, self.typemap[e])
extendsfrom(self.typemap[e], ex)
ex = [f]
extendsfrom(f, ex)
enumDesc = {}
if f["type"] == "enum" and isinstance(f["doc"], MutableSequence):
for e in ex:
for i in e["doc"]:
idx = i.find(":")
if idx > -1:
enumDesc[i[:idx]] = i[idx + 1 :]
e["doc"] = [i for i in e["doc"] if i.find(":") == -1 or i.find(" ") < i.find(":")]
f["doc"] = fix_doc(f["doc"])
if f["type"] == "record":
for field in f.get("fields", []):
if "doc" not in field:
field["doc"] = ""
if f["type"] != "documentation":
lines = []
for line in f["doc"].splitlines():
if len(line) > 0 and line[0] == "#":
line = ("#" * depth) + line
lines.append(line)
f["doc"] = "\n".join(lines)
frg = urldefrag(f["name"])[1]
num = self.toc.add_entry(depth, frg)
doc = "{} {} {}\n".format(("#" * depth), num, frg)
else:
doc = ""
# Save the first line of the first type definition for the
# HTML {HTML}
' # we must pass it to both since 'MyRenderer' is predefined escape = False markdown2html: Markdown = create_markdown( renderer=MyRenderer(escape=escape), plugins=plugins, escape=escape, ) doc = cast(str, markdown2html(doc)) if f["type"] == "record": doc += "{}
symbol | description |
---|---|
{} | {} | ".format( efrg, enumDesc.get(efrg, "") ) doc += "