Source code for innoconv_mintmod.utils

"""Utility module"""

import os
import json
from shutil import which
from subprocess import Popen, PIPE
import sys

import panflute as pf
from panflute.elements import from_json

from innoconv_mintmod.constants import (
    REGEX_PATTERNS,
    ENCODING,
    INDEX_LABEL_PREFIX,
    SITE_UXID_PREFIX,
    PANZER_TIMEOUT,
)
from innoconv_mintmod.errors import ParseError


[docs]def log(msg_string, level="INFO"):
    """Log messages when running as a panzer filter.

    :param msg_string: Message that is logged
    :type msg_string: str
    :param level: Log level (``INFO``, ``WARNING``, ``ERROR`` OR ``CRITICAL``)
    :type level: str
    """
    outgoing = {"level": level, "message": msg_string}
    outgoing_json = json.dumps(outgoing) + "\n"
    if hasattr(sys.stderr, "buffer"):
        outgoing_bytes = outgoing_json.encode(ENCODING)
        sys.stderr.buffer.write(outgoing_bytes)
    else:
        sys.stderr.write(outgoing_json)
    sys.stderr.flush()


[docs]def get_panzer_bin():
    """Get path of panzer binary."""
    panzer_bin = which("panzer")
    if panzer_bin is None or not os.path.exists(panzer_bin):
        raise OSError("panzer executable not found!")
    return panzer_bin


[docs]def parse_fragment(parse_string, lang, as_doc=False, from_format="latex+raw_tex"):
    """Parse a source fragment using panzer.

    :param parse_string: Source fragment
    :type parse_string: str
    :param lang: Language code
    :type lang: str
    :param as_doc: Return elements as :class:`panflute.elements.Doc`
    :type as_doc: bool
    :param from_format: Source format
    :type from_format: str

    :rtype: list of :class:`panflute.base.Element` or
        :class:`panflute.elements.Doc`
    :returns: parsed elements

    :raises OSError: if panzer executable is not found
    :raises RuntimeError: if panzer recursion depth is exceeded
    :raises RuntimeError: if panzer output could not be parsed
    """

    root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
    panzer_cmd = [
        get_panzer_bin(),
        "---panzer-support",
        os.path.join(root_dir, ".panzer"),
        "--from={}".format(from_format),
        "--to=json",
        "--metadata=style:innoconv",
        "--metadata=lang:{}".format(lang),
    ]

    # pass nesting depth as ENV var
    recursion_depth = int(os.getenv("INNOCONV_RECURSION_DEPTH", "0"))
    env = os.environ.copy()
    env["INNOCONV_RECURSION_DEPTH"] = str(recursion_depth + 1)

    if recursion_depth > 10:
        raise RuntimeError("Panzer recursion depth exceeded!")

    proc = Popen(panzer_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env)
    out, err = proc.communicate(
        input=parse_string.encode(ENCODING), timeout=PANZER_TIMEOUT
    )
    out = out.decode(ENCODING)
    err = err.decode(ENCODING)

    if proc.returncode != 0:
        log(err, level="ERROR")
        raise RuntimeError("panzer process exited with non-zero return code.")

    # only print filter messages for better output log
    match = REGEX_PATTERNS["PANZER_OUTPUT"].search(err)
    if match:
        for line in match.group("messages").strip().splitlines():
            log("↳ %s" % line.strip(), level="INFO")
    else:
        raise RuntimeError("Unable to parse panzer output: {}".format(err))

    doc = json.loads(out, object_hook=from_json)

    if as_doc:
        return doc

    if isinstance(doc.content, pf.ListContainer):
        return list(doc.content)

    return doc.content


# pylint: disable=dangerous-default-value
[docs]def to_inline(elem, classes=[], attributes={}):
    """Convert any given pandoc element to inline element(s). Some information
    may be lost."""

    if not classes:
        classes = getattr(elem, "classes", [])
    if not attributes:
        attributes = getattr(elem, "attributes", {})

    if isinstance(elem, pf.Inline):
        return elem
    if isinstance(elem, pf.CodeBlock):
        return pf.Code(elem.text, classes=classes, attributes=attributes)
    if isinstance(elem, pf.RawBlock):
        return pf.RawInline(elem.text, format=elem.format)

    elems = []
    if isinstance(elem, pf.Block):
        elems = elem.content
    elif isinstance(elem, list):
        elems = elem

    # dont nest too many spans
    if len(elems) == 1:
        return to_inline(elems[0], classes=classes, attributes=attributes)

    ret = [to_inline(x, classes=classes, attributes=attributes) for x in elems]

    return pf.Span(*ret, classes=classes, attributes=attributes)


[docs]def destringify(string):
    """Takes a string and transforms it into list of Str and Space objects.

    This function breaks down strings with whitespace. It could be done by
    calling :func:`parse_fragment` but doesn't have the overhead involed.

    :Example:

        >>> destringify('foo  bar\tbaz')
        [Str(foo), Space, Str(bar), Space, Str(baz)]

    :param string: String to transform
    :type string: str

    :rtype: list
    :returns: list of :class:`panflute.Str` and :class:`panflute.Space`
    """
    ret = []
    split = string.split()
    for word in split:
        ret.append(pf.Str(word))
        if split.index(word) != len(split) - 1:
            ret.append(pf.Space())
    return ret


[docs]def parse_cmd(text):
    r"""
    Parse a LaTeX command using regular expressions.

    Parses a command like: ``\foo{bar}{baz}``

    :param text: String to parse
    :type text: str

    :rtype: (str, list)
    :returns: command name and list of command arguments
    """
    match = REGEX_PATTERNS["CMD"].match(text)
    if not match:
        raise ParseError("Could not parse LaTeX command: '%s'" % text)
    groups = match.groups()
    cmd_name = groups[0]
    cmd_args, _ = parse_nested_args(groups[1])
    return cmd_name, cmd_args


[docs]def parse_nested_args(to_parse):
    r"""
    Parse LaTeX command arguments that can have nested commands. Returns
    arguments and rest string.

    Parses strings like: ``{bar}{baz{}}rest`` into
    ``[['bar', 'baz{}'], 'rest']``.

    :param to_parse: String to parse
    :type to_parse: str

    :rtype: (list, str)
    :returns: parsed arguments and rest string
    """
    pargs = []
    if to_parse.startswith("{"):
        stack = []
        for i, cha in enumerate(to_parse):
            if not stack and cha != "{":
                break
            if cha == "{":
                stack.append(i)
            elif cha == "}" and stack:
                start = stack.pop()
                if not stack:
                    start_index = start + 1
                    pargs.append(to_parse[start_index:i])
        chars_to_remove = len("".join(pargs)) + 2 * len(pargs)
        to_parse = to_parse[chars_to_remove:]
    if not to_parse:
        to_parse = None
    return (pargs, to_parse)


[docs]def extract_identifier(content):
    r"""Extract identifier from content and remove annotation element.

    ``\MLabel``/``MDeclareSiteUXID`` commands that occur within environments
    are parsed in a child process (e.g.
    :py:func:`innoconv_mintmod.mintmod_filter.commands.handle_mlabel`).
    The id attribute can't be set directly as they can't access the whole doc
    tree. As a workaround they create a fake element and add the identifier.

    :param content: List of elements
    :type content: list

    :rtype: str
    :returns: identifier (might be ``None``)
    """
    identifier = None

    def _extract_id(prefix, child):
        if prefix in child.classes:
            match = REGEX_PATTERNS["EXTRACT_ID"](prefix).match(child.identifier)
            if match:
                return match.groups()[0]
        raise ValueError()

    # extract ID (label takes precedence!)
    for prefix in (SITE_UXID_PREFIX, INDEX_LABEL_PREFIX):
        try:
            # check first 3 elements
            for idx in range(3):
                child = content[idx]
                try:
                    identifier = _extract_id(prefix, child)
                except (AttributeError, ValueError):
                    pass
        except IndexError:
            pass

    return identifier


[docs]def remove_annotations(doc):
    """Remove left-over annotation elements from document.

    :param doc: Document
    :type doc: :py:class:`panflute.elements.Doc`
    """

    def _rem_para(elem, _):
        try:
            if isinstance(elem, pf.Div) and (
                INDEX_LABEL_PREFIX in elem.classes or SITE_UXID_PREFIX in elem.classes
            ):
                return []  # delete element
        except AttributeError:
            pass
        return None

    doc.walk(_rem_para)


[docs]def remove_empty_paragraphs(doc):
    """Remove empty paragraphs from document.

    :param doc: Document
    :type doc: :py:class:`panflute.elements.Doc`
    """

    def _rem_para(elem, _):
        if isinstance(elem, pf.Para) and not elem.content:
            return []  # delete element
        return None

    doc.walk(_rem_para)


[docs]def remember(doc, key, elem):
    """Rememember an element in the document for later.

    To retrieve remembered elements use :py:func:`get_remembered`.

    :param doc: Document where to store the memory
    :type doc: :py:class:`panflute.elements.Doc`
    :param key: Key under which element is stored
    :type key: str
    :param elem: Element to remember
    :type elem: :py:class:`panflute.base.Element`
    """
    try:
        doc.remembered_element[key] = elem
    except AttributeError:
        doc.remembered_element = {key: elem}


[docs]def get_remembered(doc, key, keep=False):
    """Retrieve rememembered element from the document and forget it.

    To remember elements use :py:func:`remember`.

    :param doc: Document where the element is stored
    :type doc: :py:class:`panflute.elements.Doc`
    :param key: Key under which element is stored
    :type key: str
    :param keep: If value should be kept after retrieving (default=False)
    :type keep: bool

    :rtype: :py:class:`panflute.base.Element`
    :returns: The remembered element or `None`
    """
    try:
        elem = doc.remembered_element[key]
    except (AttributeError, KeyError):
        return None
    if not keep:
        del doc.remembered_element[key]
    return elem


[docs]def block_wrap(elem, orig_elem):
    """Wraps an element in a block if necessary.

    If the original element was block panflute expects the return value to be
    also block. In many places we need to detect this and wrap an inline.

    :param elem: Element to be wrapped
    :type elem: :py:class:`panflute.base.Element`
    :param orig_elem: Original element
    :type orig_elem: :py:class:`panflute.base.Element`

    :rtype: :py:class:`panflute.base.Element`
    :returns: ``elem`` or ``elem`` wrapped in
        :py:class:`panflute.elements.Plain`
    """
    if isinstance(orig_elem, pf.Block):
        return pf.Plain(elem)
    return elem


[docs]def convert_simplification_code(code):
    """Convert binary flags to string flags."""
    flags = []
    if (code & 15) == 1:
        flags.append("no-brackets")
    if (code & 15) == 2:
        flags.append("factor-notation")
    if (code & 15) == 3:
        # actually never used in tub_mathe
        flags.append("sum-notation")

    code_flags = (
        (16, "only-one-slash"),
        (32, "antiderivative"),
        (64, "no-sqrt"),
        (128, "no-abs"),
        (256, "no-fractions-no-powers"),
        (512, "special-support-points"),
        (1024, "only-natural-number"),
        (2048, "one-power-no-mult-or-div"),
    )
    for code_flag, str_flag in code_flags:
        if (code & code_flag) == code_flag:
            flags.append(str_flag)

    return ",".join(flags)