1# -*- coding: utf-8; mode: python -*-
2# pylint: disable=C0103, R0903, R0912, R0915
3"""
4    scalable figure and image handling
5    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6
7    Sphinx extension which implements scalable image handling.
8
9    :copyright:  Copyright (C) 2016  Markus Heiser
10    :license:    GPL Version 2, June 1991 see Linux/COPYING for details.
11
12    The build for image formats depend on image's source format and output's
13    destination format. This extension implement methods to simplify image
14    handling from the author's POV. Directives like ``kernel-figure`` implement
15    methods *to* always get the best output-format even if some tools are not
16    installed. For more details take a look at ``convert_image(...)`` which is
17    the core of all conversions.
18
19    * ``.. kernel-image``: for image handling / a ``.. image::`` replacement
20
21    * ``.. kernel-figure``: for figure handling / a ``.. figure::`` replacement
22
23    * ``.. kernel-render``: for render markup / a concept to embed *render*
24      markups (or languages). Supported markups (see ``RENDER_MARKUP_EXT``)
25
26      - ``DOT``: render embedded Graphviz's **DOC**
27      - ``SVG``: render embedded Scalable Vector Graphics (**SVG**)
28      - ... *developable*
29
30    Used tools:
31
32    * ``dot(1)``: Graphviz (https://www.graphviz.org). If Graphviz is not
33      available, the DOT language is inserted as literal-block.
34      For conversion to PDF, ``rsvg-convert(1)`` of librsvg
35      (https://gitlab.gnome.org/GNOME/librsvg) is used when available.
36
37    * SVG to PDF: To generate PDF, you need at least one of this tools:
38
39      - ``convert(1)``: ImageMagick (https://www.imagemagick.org)
40      - ``inkscape(1)``: Inkscape (https://inkscape.org/)
41
42    List of customizations:
43
44    * generate PDF from SVG / used by PDF (LaTeX) builder
45
46    * generate SVG (html-builder) and PDF (latex-builder) from DOT files.
47      DOT: see https://www.graphviz.org/content/dot-language
48
49    """
50
51import os
52from os import path
53import subprocess
54from hashlib import sha1
55import re
56from docutils import nodes
57from docutils.statemachine import ViewList
58from docutils.parsers.rst import directives
59from docutils.parsers.rst.directives import images
60import sphinx
61from sphinx.util.nodes import clean_astext
62from sphinx.util import logging
63
64Figure = images.Figure
65
66__version__  = '1.0.0'
67
68logger = logging.getLogger('kfigure')
69
70# simple helper
71# -------------
72
73def which(cmd):
74    """Searches the ``cmd`` in the ``PATH`` environment.
75
76    This *which* searches the PATH for executable ``cmd`` . First match is
77    returned, if nothing is found, ``None` is returned.
78    """
79    envpath = os.environ.get('PATH', None) or os.defpath
80    for folder in envpath.split(os.pathsep):
81        fname = folder + os.sep + cmd
82        if path.isfile(fname):
83            return fname
84
85def mkdir(folder, mode=0o775):
86    if not path.isdir(folder):
87        os.makedirs(folder, mode)
88
89def file2literal(fname):
90    with open(fname, "r") as src:
91        data = src.read()
92        node = nodes.literal_block(data, data)
93    return node
94
95def isNewer(path1, path2):
96    """Returns True if ``path1`` is newer than ``path2``
97
98    If ``path1`` exists and is newer than ``path2`` the function returns
99    ``True`` is returned otherwise ``False``
100    """
101    return (path.exists(path1)
102            and os.stat(path1).st_ctime > os.stat(path2).st_ctime)
103
104def pass_handle(self, node):           # pylint: disable=W0613
105    pass
106
107# setup conversion tools and sphinx extension
108# -------------------------------------------
109
110# Graphviz's dot(1) support
111dot_cmd = None
112# dot(1) -Tpdf should be used
113dot_Tpdf = False
114
115# ImageMagick' convert(1) support
116convert_cmd = None
117
118# librsvg's rsvg-convert(1) support
119rsvg_convert_cmd = None
120
121# Inkscape's inkscape(1) support
122inkscape_cmd = None
123# Inkscape prior to 1.0 uses different command options
124inkscape_ver_one = False
125
126
127def setup(app):
128    # check toolchain first
129    app.connect('builder-inited', setupTools)
130
131    # image handling
132    app.add_directive("kernel-image",  KernelImage)
133    app.add_node(kernel_image,
134                 html    = (visit_kernel_image, pass_handle),
135                 latex   = (visit_kernel_image, pass_handle),
136                 texinfo = (visit_kernel_image, pass_handle),
137                 text    = (visit_kernel_image, pass_handle),
138                 man     = (visit_kernel_image, pass_handle), )
139
140    # figure handling
141    app.add_directive("kernel-figure", KernelFigure)
142    app.add_node(kernel_figure,
143                 html    = (visit_kernel_figure, pass_handle),
144                 latex   = (visit_kernel_figure, pass_handle),
145                 texinfo = (visit_kernel_figure, pass_handle),
146                 text    = (visit_kernel_figure, pass_handle),
147                 man     = (visit_kernel_figure, pass_handle), )
148
149    # render handling
150    app.add_directive('kernel-render', KernelRender)
151    app.add_node(kernel_render,
152                 html    = (visit_kernel_render, pass_handle),
153                 latex   = (visit_kernel_render, pass_handle),
154                 texinfo = (visit_kernel_render, pass_handle),
155                 text    = (visit_kernel_render, pass_handle),
156                 man     = (visit_kernel_render, pass_handle), )
157
158    app.connect('doctree-read', add_kernel_figure_to_std_domain)
159
160    return dict(
161        version = __version__,
162        parallel_read_safe = True,
163        parallel_write_safe = True
164    )
165
166
167def setupTools(app):
168    """
169    Check available build tools and log some *verbose* messages.
170
171    This function is called once, when the builder is initiated.
172    """
173    global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd   # pylint: disable=W0603
174    global inkscape_cmd, inkscape_ver_one  # pylint: disable=W0603
175    logger.verbose("kfigure: check installed tools ...")
176
177    dot_cmd = which('dot')
178    convert_cmd = which('convert')
179    rsvg_convert_cmd = which('rsvg-convert')
180    inkscape_cmd = which('inkscape')
181
182    if dot_cmd:
183        logger.verbose("use dot(1) from: " + dot_cmd)
184
185        try:
186            dot_Thelp_list = subprocess.check_output([dot_cmd, '-Thelp'],
187                                    stderr=subprocess.STDOUT)
188        except subprocess.CalledProcessError as err:
189            dot_Thelp_list = err.output
190            pass
191
192        dot_Tpdf_ptn = b'pdf'
193        dot_Tpdf = re.search(dot_Tpdf_ptn, dot_Thelp_list)
194    else:
195        logger.warning(
196            "dot(1) not found, for better output quality install graphviz from https://www.graphviz.org"
197        )
198    if inkscape_cmd:
199        logger.verbose("use inkscape(1) from: " + inkscape_cmd)
200        inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'],
201                                               stderr=subprocess.DEVNULL)
202        ver_one_ptn = b'Inkscape 1'
203        inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver)
204        convert_cmd = None
205        rsvg_convert_cmd = None
206        dot_Tpdf = False
207
208    else:
209        if convert_cmd:
210            logger.verbose("use convert(1) from: " + convert_cmd)
211        else:
212            logger.verbose(
213                "Neither inkscape(1) nor convert(1) found.\n"
214                "For SVG to PDF conversion, install either Inkscape (https://inkscape.org/) (preferred) or\n"
215                "ImageMagick (https://www.imagemagick.org)"
216            )
217
218        if rsvg_convert_cmd:
219            logger.verbose("use rsvg-convert(1) from: " + rsvg_convert_cmd)
220            logger.verbose("use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion")
221            dot_Tpdf = False
222        else:
223            logger.verbose(
224                "rsvg-convert(1) not found.\n"
225                "  SVG rendering of convert(1) is done by ImageMagick-native renderer."
226            )
227            if dot_Tpdf:
228                logger.verbose("use 'dot -Tpdf' for DOT -> PDF conversion")
229            else:
230                logger.verbose("use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion")
231
232
233# integrate conversion tools
234# --------------------------
235
236RENDER_MARKUP_EXT = {
237    # The '.ext' must be handled by convert_image(..) function's *in_ext* input.
238    # <name> : <.ext>
239    'DOT' : '.dot',
240    'SVG' : '.svg'
241}
242
243def convert_image(img_node, translator, src_fname=None):
244    """Convert a image node for the builder.
245
246    Different builder prefer different image formats, e.g. *latex* builder
247    prefer PDF while *html* builder prefer SVG format for images.
248
249    This function handles output image formats in dependence of source the
250    format (of the image) and the translator's output format.
251    """
252    app = translator.builder.app
253
254    fname, in_ext = path.splitext(path.basename(img_node['uri']))
255    if src_fname is None:
256        src_fname = path.join(translator.builder.srcdir, img_node['uri'])
257        if not path.exists(src_fname):
258            src_fname = path.join(translator.builder.outdir, img_node['uri'])
259
260    dst_fname = None
261
262    # in kernel builds, use 'make SPHINXOPTS=-v' to see verbose messages
263
264    logger.verbose('assert best format for: ' + img_node['uri'])
265
266    if in_ext == '.dot':
267
268        if not dot_cmd:
269            logger.verbose("dot from graphviz not available / include DOT raw.")
270            img_node.replace_self(file2literal(src_fname))
271
272        elif translator.builder.format == 'latex':
273            dst_fname = path.join(translator.builder.outdir, fname + '.pdf')
274            img_node['uri'] = fname + '.pdf'
275            img_node['candidates'] = {'*': fname + '.pdf'}
276
277
278        elif translator.builder.format == 'html':
279            dst_fname = path.join(
280                translator.builder.outdir,
281                translator.builder.imagedir,
282                fname + '.svg')
283            img_node['uri'] = path.join(
284                translator.builder.imgpath, fname + '.svg')
285            img_node['candidates'] = {
286                '*': path.join(translator.builder.imgpath, fname + '.svg')}
287
288        else:
289            # all other builder formats will include DOT as raw
290            img_node.replace_self(file2literal(src_fname))
291
292    elif in_ext == '.svg':
293
294        if translator.builder.format == 'latex':
295            if not inkscape_cmd and convert_cmd is None:
296                logger.warning(
297                    "no SVG to PDF conversion available / include SVG raw.\n"
298                    "Including large raw SVGs can cause xelatex error.\n"
299                    "Install Inkscape (preferred) or ImageMagick."
300                )
301                img_node.replace_self(file2literal(src_fname))
302            else:
303                dst_fname = path.join(translator.builder.outdir, fname + '.pdf')
304                img_node['uri'] = fname + '.pdf'
305                img_node['candidates'] = {'*': fname + '.pdf'}
306
307    if dst_fname:
308        # the builder needs not to copy one more time, so pop it if exists.
309        translator.builder.images.pop(img_node['uri'], None)
310        _name = dst_fname[len(str(translator.builder.outdir)) + 1:]
311
312        if isNewer(dst_fname, src_fname):
313            logger.verbose("convert: {out}/%s already exists and is newer" % _name)
314
315        else:
316            ok = False
317            mkdir(path.dirname(dst_fname))
318
319            if in_ext == '.dot':
320                logger.verbose('convert DOT to: {out}/' + _name)
321                if translator.builder.format == 'latex' and not dot_Tpdf:
322                    svg_fname = path.join(translator.builder.outdir, fname + '.svg')
323                    ok1 = dot2format(app, src_fname, svg_fname)
324                    ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname)
325                    ok = ok1 and ok2
326
327                else:
328                    ok = dot2format(app, src_fname, dst_fname)
329
330            elif in_ext == '.svg':
331                logger.verbose('convert SVG to: {out}/' + _name)
332                ok = svg2pdf(app, src_fname, dst_fname)
333
334            if not ok:
335                img_node.replace_self(file2literal(src_fname))
336
337
338def dot2format(app, dot_fname, out_fname):
339    """Converts DOT file to ``out_fname`` using ``dot(1)``.
340
341    * ``dot_fname`` pathname of the input DOT file, including extension ``.dot``
342    * ``out_fname`` pathname of the output file, including format extension
343
344    The *format extension* depends on the ``dot`` command (see ``man dot``
345    option ``-Txxx``). Normally you will use one of the following extensions:
346
347    - ``.ps`` for PostScript,
348    - ``.svg`` or ``svgz`` for Structured Vector Graphics,
349    - ``.fig`` for XFIG graphics and
350    - ``.png`` or ``gif`` for common bitmap graphics.
351
352    """
353    out_format = path.splitext(out_fname)[1][1:]
354    cmd = [dot_cmd, '-T%s' % out_format, dot_fname]
355    exit_code = 42
356
357    with open(out_fname, "w") as out:
358        exit_code = subprocess.call(cmd, stdout = out)
359        if exit_code != 0:
360            logger.warning(
361                          "Error #%d when calling: %s" % (exit_code, " ".join(cmd)))
362    return bool(exit_code == 0)
363
364def svg2pdf(app, svg_fname, pdf_fname):
365    """Converts SVG to PDF with ``inkscape(1)`` or ``convert(1)`` command.
366
367    Uses ``inkscape(1)`` from Inkscape (https://inkscape.org/) or ``convert(1)``
368    from ImageMagick (https://www.imagemagick.org) for conversion.
369    Returns ``True`` on success and ``False`` if an error occurred.
370
371    * ``svg_fname`` pathname of the input SVG file with extension (``.svg``)
372    * ``pdf_name``  pathname of the output PDF file with extension (``.pdf``)
373
374    """
375    cmd = [convert_cmd, svg_fname, pdf_fname]
376    cmd_name = 'convert(1)'
377
378    if inkscape_cmd:
379        cmd_name = 'inkscape(1)'
380        if inkscape_ver_one:
381            cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname]
382        else:
383            cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname]
384
385    try:
386        warning_msg = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
387        exit_code = 0
388    except subprocess.CalledProcessError as err:
389        warning_msg = err.output
390        exit_code = err.returncode
391        pass
392
393    if exit_code != 0:
394        logger.warning("Error #%d when calling: %s" %
395                            (exit_code, " ".join(cmd)))
396        if warning_msg:
397            logger.warning( "Warning msg from %s: %s" %
398                                (cmd_name, str(warning_msg, 'utf-8')))
399    elif warning_msg:
400        logger.verbose("Warning msg from %s (likely harmless):\n%s" %
401                            (cmd_name, str(warning_msg, 'utf-8')))
402
403    return bool(exit_code == 0)
404
405def svg2pdf_by_rsvg(app, svg_fname, pdf_fname):
406    """Convert SVG to PDF with ``rsvg-convert(1)`` command.
407
408    * ``svg_fname`` pathname of input SVG file, including extension ``.svg``
409    * ``pdf_fname`` pathname of output PDF file, including extension ``.pdf``
410
411    Input SVG file should be the one generated by ``dot2format()``.
412    SVG -> PDF conversion is done by ``rsvg-convert(1)``.
413
414    If ``rsvg-convert(1)`` is unavailable, fall back to ``svg2pdf()``.
415
416    """
417
418    if rsvg_convert_cmd is None:
419        ok = svg2pdf(app, svg_fname, pdf_fname)
420    else:
421        cmd = [rsvg_convert_cmd, '--format=pdf', '-o', pdf_fname, svg_fname]
422        # use stdout and stderr from parent
423        exit_code = subprocess.call(cmd)
424        if exit_code != 0:
425            logger.warning("Error #%d when calling: %s" %
426                                (exit_code, " ".join(cmd)))
427        ok = bool(exit_code == 0)
428
429    return ok
430
431
432# image handling
433# ---------------------
434
435def visit_kernel_image(self, node):    # pylint: disable=W0613
436    """Visitor of the ``kernel_image`` Node.
437
438    Handles the ``image`` child-node with the ``convert_image(...)``.
439    """
440    img_node = node[0]
441    convert_image(img_node, self)
442
443class kernel_image(nodes.image):
444    """Node for ``kernel-image`` directive."""
445    pass
446
447class KernelImage(images.Image):
448    """KernelImage directive
449
450    Earns everything from ``.. image::`` directive, except *remote URI* and
451    *glob* pattern. The KernelImage wraps a image node into a
452    kernel_image node. See ``visit_kernel_image``.
453    """
454
455    def run(self):
456        uri = self.arguments[0]
457        if uri.endswith('.*') or uri.find('://') != -1:
458            raise self.severe(
459                'Error in "%s: %s": glob pattern and remote images are not allowed'
460                % (self.name, uri))
461        result = images.Image.run(self)
462        if len(result) == 2 or isinstance(result[0], nodes.system_message):
463            return result
464        (image_node,) = result
465        # wrap image node into a kernel_image node / see visitors
466        node = kernel_image('', image_node)
467        return [node]
468
469# figure handling
470# ---------------------
471
472def visit_kernel_figure(self, node):   # pylint: disable=W0613
473    """Visitor of the ``kernel_figure`` Node.
474
475    Handles the ``image`` child-node with the ``convert_image(...)``.
476    """
477    img_node = node[0][0]
478    convert_image(img_node, self)
479
480class kernel_figure(nodes.figure):
481    """Node for ``kernel-figure`` directive."""
482
483class KernelFigure(Figure):
484    """KernelImage directive
485
486    Earns everything from ``.. figure::`` directive, except *remote URI* and
487    *glob* pattern.  The KernelFigure wraps a figure node into a kernel_figure
488    node. See ``visit_kernel_figure``.
489    """
490
491    def run(self):
492        uri = self.arguments[0]
493        if uri.endswith('.*') or uri.find('://') != -1:
494            raise self.severe(
495                'Error in "%s: %s":'
496                ' glob pattern and remote images are not allowed'
497                % (self.name, uri))
498        result = Figure.run(self)
499        if len(result) == 2 or isinstance(result[0], nodes.system_message):
500            return result
501        (figure_node,) = result
502        # wrap figure node into a kernel_figure node / see visitors
503        node = kernel_figure('', figure_node)
504        return [node]
505
506
507# render handling
508# ---------------------
509
510def visit_kernel_render(self, node):
511    """Visitor of the ``kernel_render`` Node.
512
513    If rendering tools available, save the markup of the ``literal_block`` child
514    node into a file and replace the ``literal_block`` node with a new created
515    ``image`` node, pointing to the saved markup file. Afterwards, handle the
516    image child-node with the ``convert_image(...)``.
517    """
518    app = self.builder.app
519    srclang = node.get('srclang')
520
521    logger.verbose('visit kernel-render node lang: "%s"' % srclang)
522
523    tmp_ext = RENDER_MARKUP_EXT.get(srclang, None)
524    if tmp_ext is None:
525        logger.warning( 'kernel-render: "%s" unknown / include raw.' % srclang)
526        return
527
528    if not dot_cmd and tmp_ext == '.dot':
529        logger.verbose("dot from graphviz not available / include raw.")
530        return
531
532    literal_block = node[0]
533
534    code      = literal_block.astext()
535    hashobj   = code.encode('utf-8') #  str(node.attributes)
536    fname     = path.join('%s-%s' % (srclang, sha1(hashobj).hexdigest()))
537
538    tmp_fname = path.join(
539        self.builder.outdir, self.builder.imagedir, fname + tmp_ext)
540
541    if not path.isfile(tmp_fname):
542        mkdir(path.dirname(tmp_fname))
543        with open(tmp_fname, "w") as out:
544            out.write(code)
545
546    img_node = nodes.image(node.rawsource, **node.attributes)
547    img_node['uri'] = path.join(self.builder.imgpath, fname + tmp_ext)
548    img_node['candidates'] = {
549        '*': path.join(self.builder.imgpath, fname + tmp_ext)}
550
551    literal_block.replace_self(img_node)
552    convert_image(img_node, self, tmp_fname)
553
554
555class kernel_render(nodes.General, nodes.Inline, nodes.Element):
556    """Node for ``kernel-render`` directive."""
557    pass
558
559class KernelRender(Figure):
560    """KernelRender directive
561
562    Render content by external tool.  Has all the options known from the
563    *figure*  directive, plus option ``caption``.  If ``caption`` has a
564    value, a figure node with the *caption* is inserted. If not, a image node is
565    inserted.
566
567    The KernelRender directive wraps the text of the directive into a
568    literal_block node and wraps it into a kernel_render node. See
569    ``visit_kernel_render``.
570    """
571    has_content = True
572    required_arguments = 1
573    optional_arguments = 0
574    final_argument_whitespace = False
575
576    # earn options from 'figure'
577    option_spec = Figure.option_spec.copy()
578    option_spec['caption'] = directives.unchanged
579
580    def run(self):
581        return [self.build_node()]
582
583    def build_node(self):
584
585        srclang = self.arguments[0].strip()
586        if srclang not in RENDER_MARKUP_EXT.keys():
587            return [self.state_machine.reporter.warning(
588                'Unknown source language "%s", use one of: %s.' % (
589                    srclang, ",".join(RENDER_MARKUP_EXT.keys())),
590                line=self.lineno)]
591
592        code = '\n'.join(self.content)
593        if not code.strip():
594            return [self.state_machine.reporter.warning(
595                'Ignoring "%s" directive without content.' % (
596                    self.name),
597                line=self.lineno)]
598
599        node = kernel_render()
600        node['alt'] = self.options.get('alt','')
601        node['srclang'] = srclang
602        literal_node = nodes.literal_block(code, code)
603        node += literal_node
604
605        caption = self.options.get('caption')
606        if caption:
607            # parse caption's content
608            parsed = nodes.Element()
609            self.state.nested_parse(
610                ViewList([caption], source=''), self.content_offset, parsed)
611            caption_node = nodes.caption(
612                parsed[0].rawsource, '', *parsed[0].children)
613            caption_node.source = parsed[0].source
614            caption_node.line = parsed[0].line
615
616            figure_node = nodes.figure('', node)
617            for k,v in self.options.items():
618                figure_node[k] = v
619            figure_node += caption_node
620
621            node = figure_node
622
623        return node
624
625def add_kernel_figure_to_std_domain(app, doctree):
626    """Add kernel-figure anchors to 'std' domain.
627
628    The ``StandardDomain.process_doc(..)`` method does not know how to resolve
629    the caption (label) of ``kernel-figure`` directive (it only knows about
630    standard nodes, e.g. table, figure etc.). Without any additional handling
631    this will result in a 'undefined label' for kernel-figures.
632
633    This handle adds labels of kernel-figure to the 'std' domain labels.
634    """
635
636    std = app.env.domains["std"]
637    docname = app.env.docname
638    labels = std.data["labels"]
639
640    for name, explicit in doctree.nametypes.items():
641        if not explicit:
642            continue
643        labelid = doctree.nameids[name]
644        if labelid is None:
645            continue
646        node = doctree.ids[labelid]
647
648        if node.tagname == 'kernel_figure':
649            for n in node.next_node():
650                if n.tagname == 'caption':
651                    sectname = clean_astext(n)
652                    # add label to std domain
653                    labels[name] = docname, labelid, sectname
654                    break
655