"""Utility module"""
import os
import json
from shutil import which
from subprocess import Popen, PIPE
import sys
import panflute as pf
from panflute.elements import from_json
from innoconv_mintmod.constants import (
REGEX_PATTERNS,
ENCODING,
INDEX_LABEL_PREFIX,
SITE_UXID_PREFIX,
PANZER_TIMEOUT,
)
from innoconv_mintmod.errors import ParseError
[docs]def log(msg_string, level="INFO"):
"""Log messages when running as a panzer filter.
:param msg_string: Message that is logged
:type msg_string: str
:param level: Log level (``INFO``, ``WARNING``, ``ERROR`` OR ``CRITICAL``)
:type level: str
"""
outgoing = {"level": level, "message": msg_string}
outgoing_json = json.dumps(outgoing) + "\n"
if hasattr(sys.stderr, "buffer"):
outgoing_bytes = outgoing_json.encode(ENCODING)
sys.stderr.buffer.write(outgoing_bytes)
else:
sys.stderr.write(outgoing_json)
sys.stderr.flush()
[docs]def get_panzer_bin():
"""Get path of panzer binary."""
panzer_bin = which("panzer")
if panzer_bin is None or not os.path.exists(panzer_bin):
raise OSError("panzer executable not found!")
return panzer_bin
[docs]def parse_fragment(parse_string, lang, as_doc=False, from_format="latex+raw_tex"):
"""Parse a source fragment using panzer.
:param parse_string: Source fragment
:type parse_string: str
:param lang: Language code
:type lang: str
:param as_doc: Return elements as :class:`panflute.elements.Doc`
:type as_doc: bool
:param from_format: Source format
:type from_format: str
:rtype: list of :class:`panflute.base.Element` or
:class:`panflute.elements.Doc`
:returns: parsed elements
:raises OSError: if panzer executable is not found
:raises RuntimeError: if panzer recursion depth is exceeded
:raises RuntimeError: if panzer output could not be parsed
"""
root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
panzer_cmd = [
get_panzer_bin(),
"---panzer-support",
os.path.join(root_dir, ".panzer"),
"--from={}".format(from_format),
"--to=json",
"--metadata=style:innoconv",
"--metadata=lang:{}".format(lang),
]
# pass nesting depth as ENV var
recursion_depth = int(os.getenv("INNOCONV_RECURSION_DEPTH", "0"))
env = os.environ.copy()
env["INNOCONV_RECURSION_DEPTH"] = str(recursion_depth + 1)
if recursion_depth > 10:
raise RuntimeError("Panzer recursion depth exceeded!")
proc = Popen(panzer_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env)
out, err = proc.communicate(
input=parse_string.encode(ENCODING), timeout=PANZER_TIMEOUT
)
out = out.decode(ENCODING)
err = err.decode(ENCODING)
if proc.returncode != 0:
log(err, level="ERROR")
raise RuntimeError("panzer process exited with non-zero return code.")
# only print filter messages for better output log
match = REGEX_PATTERNS["PANZER_OUTPUT"].search(err)
if match:
for line in match.group("messages").strip().splitlines():
log("↳ %s" % line.strip(), level="INFO")
else:
raise RuntimeError("Unable to parse panzer output: {}".format(err))
doc = json.loads(out, object_hook=from_json)
if as_doc:
return doc
if isinstance(doc.content, pf.ListContainer):
return list(doc.content)
return doc.content
# pylint: disable=dangerous-default-value
[docs]def to_inline(elem, classes=[], attributes={}):
"""Convert any given pandoc element to inline element(s). Some information
may be lost."""
if not classes:
classes = getattr(elem, "classes", [])
if not attributes:
attributes = getattr(elem, "attributes", {})
if isinstance(elem, pf.Inline):
return elem
if isinstance(elem, pf.CodeBlock):
return pf.Code(elem.text, classes=classes, attributes=attributes)
if isinstance(elem, pf.RawBlock):
return pf.RawInline(elem.text, format=elem.format)
elems = []
if isinstance(elem, pf.Block):
elems = elem.content
elif isinstance(elem, list):
elems = elem
# dont nest too many spans
if len(elems) == 1:
return to_inline(elems[0], classes=classes, attributes=attributes)
ret = [to_inline(x, classes=classes, attributes=attributes) for x in elems]
return pf.Span(*ret, classes=classes, attributes=attributes)
[docs]def destringify(string):
"""Takes a string and transforms it into list of Str and Space objects.
This function breaks down strings with whitespace. It could be done by
calling :func:`parse_fragment` but doesn't have the overhead involed.
:Example:
>>> destringify('foo bar\tbaz')
[Str(foo), Space, Str(bar), Space, Str(baz)]
:param string: String to transform
:type string: str
:rtype: list
:returns: list of :class:`panflute.Str` and :class:`panflute.Space`
"""
ret = []
split = string.split()
for word in split:
ret.append(pf.Str(word))
if split.index(word) != len(split) - 1:
ret.append(pf.Space())
return ret
[docs]def parse_cmd(text):
r"""
Parse a LaTeX command using regular expressions.
Parses a command like: ``\foo{bar}{baz}``
:param text: String to parse
:type text: str
:rtype: (str, list)
:returns: command name and list of command arguments
"""
match = REGEX_PATTERNS["CMD"].match(text)
if not match:
raise ParseError("Could not parse LaTeX command: '%s'" % text)
groups = match.groups()
cmd_name = groups[0]
cmd_args, _ = parse_nested_args(groups[1])
return cmd_name, cmd_args
[docs]def parse_nested_args(to_parse):
r"""
Parse LaTeX command arguments that can have nested commands. Returns
arguments and rest string.
Parses strings like: ``{bar}{baz{}}rest`` into
``[['bar', 'baz{}'], 'rest']``.
:param to_parse: String to parse
:type to_parse: str
:rtype: (list, str)
:returns: parsed arguments and rest string
"""
pargs = []
if to_parse.startswith("{"):
stack = []
for i, cha in enumerate(to_parse):
if not stack and cha != "{":
break
if cha == "{":
stack.append(i)
elif cha == "}" and stack:
start = stack.pop()
if not stack:
start_index = start + 1
pargs.append(to_parse[start_index:i])
chars_to_remove = len("".join(pargs)) + 2 * len(pargs)
to_parse = to_parse[chars_to_remove:]
if not to_parse:
to_parse = None
return (pargs, to_parse)
[docs]def remove_annotations(doc):
"""Remove left-over annotation elements from document.
:param doc: Document
:type doc: :py:class:`panflute.elements.Doc`
"""
def _rem_para(elem, _):
try:
if isinstance(elem, pf.Div) and (
INDEX_LABEL_PREFIX in elem.classes or SITE_UXID_PREFIX in elem.classes
):
return [] # delete element
except AttributeError:
pass
return None
doc.walk(_rem_para)
[docs]def remove_empty_paragraphs(doc):
"""Remove empty paragraphs from document.
:param doc: Document
:type doc: :py:class:`panflute.elements.Doc`
"""
def _rem_para(elem, _):
if isinstance(elem, pf.Para) and not elem.content:
return [] # delete element
return None
doc.walk(_rem_para)
[docs]def remember(doc, key, elem):
"""Rememember an element in the document for later.
To retrieve remembered elements use :py:func:`get_remembered`.
:param doc: Document where to store the memory
:type doc: :py:class:`panflute.elements.Doc`
:param key: Key under which element is stored
:type key: str
:param elem: Element to remember
:type elem: :py:class:`panflute.base.Element`
"""
try:
doc.remembered_element[key] = elem
except AttributeError:
doc.remembered_element = {key: elem}
[docs]def get_remembered(doc, key, keep=False):
"""Retrieve rememembered element from the document and forget it.
To remember elements use :py:func:`remember`.
:param doc: Document where the element is stored
:type doc: :py:class:`panflute.elements.Doc`
:param key: Key under which element is stored
:type key: str
:param keep: If value should be kept after retrieving (default=False)
:type keep: bool
:rtype: :py:class:`panflute.base.Element`
:returns: The remembered element or `None`
"""
try:
elem = doc.remembered_element[key]
except (AttributeError, KeyError):
return None
if not keep:
del doc.remembered_element[key]
return elem
[docs]def block_wrap(elem, orig_elem):
"""Wraps an element in a block if necessary.
If the original element was block panflute expects the return value to be
also block. In many places we need to detect this and wrap an inline.
:param elem: Element to be wrapped
:type elem: :py:class:`panflute.base.Element`
:param orig_elem: Original element
:type orig_elem: :py:class:`panflute.base.Element`
:rtype: :py:class:`panflute.base.Element`
:returns: ``elem`` or ``elem`` wrapped in
:py:class:`panflute.elements.Plain`
"""
if isinstance(orig_elem, pf.Block):
return pf.Plain(elem)
return elem
[docs]def convert_simplification_code(code):
"""Convert binary flags to string flags."""
flags = []
if (code & 15) == 1:
flags.append("no-brackets")
if (code & 15) == 2:
flags.append("factor-notation")
if (code & 15) == 3:
# actually never used in tub_mathe
flags.append("sum-notation")
code_flags = (
(16, "only-one-slash"),
(32, "antiderivative"),
(64, "no-sqrt"),
(128, "no-abs"),
(256, "no-fractions-no-powers"),
(512, "special-support-points"),
(1024, "only-natural-number"),
(2048, "one-power-no-mult-or-div"),
)
for code_flag, str_flag in code_flags:
if (code & code_flag) == code_flag:
flags.append(str_flag)
return ",".join(flags)