tools.py 6.38 KB

# builtin
from os import path
import subprocess
import logging
import re

# packages
import yaml
import mistune
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter

# setup logger for this module
logger = logging.getLogger(__name__)


# -------------------------------------------------------------------------
# Markdown to HTML renderer with support for LaTeX equations
# Inline math: $x$
# Block math: $$x$$ or \begin{equation}x\end{equation}
# -------------------------------------------------------------------------
class MathBlockGrammar(mistune.BlockGrammar):
    block_math = re.compile(r"^\$\$(.*?)\$\$", re.DOTALL)
    latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}", re.DOTALL)


class MathBlockLexer(mistune.BlockLexer):
    default_rules = ['block_math', 'latex_environment'] + mistune.BlockLexer.default_rules

    def __init__(self, rules=None, **kwargs):
        if rules is None:
            rules = MathBlockGrammar()
        super().__init__(rules, **kwargs)

    def parse_block_math(self, m):
        """Parse a $$math$$ block"""
        self.tokens.append({
            'type': 'block_math',
            'text': m.group(1)
        })

    def parse_latex_environment(self, m):
        self.tokens.append({
            'type': 'latex_environment',
            'name': m.group(1),
            'text': m.group(2)
        })


class MathInlineGrammar(mistune.InlineGrammar):
    math = re.compile(r"^\$(.+?)\$", re.DOTALL)
    block_math = re.compile(r"^\$\$(.+?)\$\$", re.DOTALL)
    text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~$]|https?://| {2,}\n|$)')


class MathInlineLexer(mistune.InlineLexer):
    default_rules = ['block_math', 'math'] + mistune.InlineLexer.default_rules

    def __init__(self, renderer, rules=None, **kwargs):
        if rules is None:
            rules = MathInlineGrammar()
        super().__init__(renderer, rules, **kwargs)

    def output_math(self, m):
        return self.renderer.inline_math(m.group(1))

    def output_block_math(self, m):
        return self.renderer.block_math(m.group(1))


class MarkdownWithMath(mistune.Markdown):
    def __init__(self, renderer, **kwargs):
        if 'inline' not in kwargs:
            kwargs['inline'] = MathInlineLexer
        if 'block' not in kwargs:
            kwargs['block'] = MathBlockLexer
        super().__init__(renderer, **kwargs)

    def output_block_math(self):
        return self.renderer.block_math(self.token['text'])

    def output_latex_environment(self):
        return self.renderer.latex_environment(self.token['name'], self.token['text'])



class HighlightRenderer(mistune.Renderer):
    def block_code(self, code, lang='text'):
        try:
            lexer = get_lexer_by_name(lang, stripall=False)
        except:
            lexer = get_lexer_by_name('text', stripall=False)

        formatter = HtmlFormatter()
        return highlight(code, lexer, formatter)

    def table(self, header, body):
        return '<table class="table table-sm"><thead class="thead-light">' + header + '</thead><tbody>' + body + "</tbody></table>"

    def image(self, src, title, alt):
        alt = mistune.escape(alt, quote=True)
        title = mistune.escape(title or '', quote=True)
        # if title:
        #     caption = f'<figcaption class="figure-caption">{title}</figcaption>'
        # else:
        #     caption = ''

        # return f'''
        #     <figure class="figure">
        #     <img src="/file/{src}" class="figure-img img-fluid rounded" alt="{alt}" title="{title}">
        #     {caption}
        #     </figure>
        #     '''
        return f'<img src="/file/{src}" class="img-fluid mx-auto d-block" alt="{alt}" title="{title}">'

    # Pass math through unaltered - mathjax does the rendering in the browser
    def block_math(self, text):
        return fr'$$ {text} $$'

    def latex_environment(self, name, text):
        return fr'\begin{{{name}}} {text} \end{{{name}}}'

    def inline_math(self, text):
        return fr'$$$ {text} $$$'


markdown = MarkdownWithMath(HighlightRenderer(escape=True)) # hard_wrap=True to insert <br> on newline

def md_to_html(text, q=None):
    return markdown(text)

# ---------------------------------------------------------------------------
# load data from yaml file
# ---------------------------------------------------------------------------
def load_yaml(filename, default=None):
    filename = path.expanduser(filename)
    try:
        f = open(filename, 'r', encoding='utf-8')
    except FileNotFoundError:
        logger.error(f'Cannot open "{filename}": not found')
    except PermissionError:
        logger.error(f'Cannot open "{filename}": no permission')
    except IOError:
        logger.error(f'Cannot open file "{filename}"')
    else:
        with f:
            try:
                default = yaml.load(f)
            except yaml.YAMLError as e:
                mark = e.problem_mark
                logger.error(f'In YAML file "{filename}" near line {mark.line}, column {mark.column+1}')
    finally:
        return default

# ---------------------------------------------------------------------------
# Runs a script and returns its stdout parsed as yaml, or None on error.
# The script is run in another process but this function blocks waiting
# for its termination.
# ---------------------------------------------------------------------------
def run_script(script, stdin='', timeout=5):
    script = path.expanduser(script)
    try:
        p = subprocess.run([script],
            input=stdin,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
            timeout=timeout,
            )
    except FileNotFoundError:
        logger.error(f'Could not execute script "{script}": not found.')
    except PermissionError:
        logger.error(f'Could not execute script "{script}": wrong permissions.')
    except OSError:
        logger.error(f'Could not execute script "{script}": unknown reason.')
    except subprocess.TimeoutExpired:
        logger.error(f'Timeout exceeded ({timeout}s) while running "{script}".')
    else:
        if p.returncode != 0:
            logger.error(f'Script "{script}" returned error code {p.returncode}.')
        else:
            try:
                output = yaml.load(p.stdout)
            except:
                logger.error(f'Error parsing yaml output of "{script}"')
            else:
                return output