1########################################################################
2##
3## Copyright (C) 2010-2021 The Octave Project Developers
4##
5## See the file COPYRIGHT.md in the top-level directory of this
6## distribution or <https://octave.org/copyright/>.
7##
8## This file is part of Octave.
9##
10## Octave is free software: you can redistribute it and/or modify it
11## under the terms of the GNU General Public License as published by
12## the Free Software Foundation, either version 3 of the License, or
13## (at your option) any later version.
14##
15## Octave is distributed in the hope that it will be useful, but
16## WITHOUT ANY WARRANTY; without even the implied warranty of
17## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18## GNU General Public License for more details.
19##
20## You should have received a copy of the GNU General Public License
21## along with Octave; see the file COPYING.  If not, see
22## <https://www.gnu.org/licenses/>.
23##
24########################################################################
25
26## -*- texinfo -*-
27## @deftypefn  {} {} publish (@var{file})
28## @deftypefnx {} {} publish (@var{file}, @var{output_format})
29## @deftypefnx {} {} publish (@var{file}, @var{option1}, @var{value1}, @dots{})
30## @deftypefnx {} {} publish (@var{file}, @var{options})
31## @deftypefnx {} {@var{output_file} =} publish (@var{file}, @dots{})
32##
33## Generate a report from the Octave script file @var{file} in one of several
34## output formats.
35##
36## The generated reports interpret any Publishing Markup in comments, which is
37## explained in detail in the GNU Octave manual.  Assume the following example,
38## using some Publishing Markup, to be the contents of the script file
39## @file{pub_example.m}:
40##
41## @example
42## @group
43## ## Headline title
44## #
45## # Some *bold*, _italic_, or |monospaced| Text with
46## # a <https://www.octave.org link to *GNU Octave*>.
47## ##
48##
49## # "Real" Octave commands to be evaluated
50## sombrero ()
51##
52## %% @sc{matlab} comment style ('%') is supported as well
53## %
54## % * Bulleted list item 1
55## % * Bulleted list item 2
56## %
57## % # Numbered list item 1
58## % # Numbered list item 2
59## @end group
60## @end example
61##
62## To publish this script file, type @code{publish ("pub_example.m")}.
63##
64## With only @var{file} given, a HTML report is generated in a subdirectory
65## @file{html} relative to the current working directory.  The Octave commands
66## are evaluated in a separate context and any figures created while executing
67## the script file are included in the report.  All formatting syntax of
68## @var{file} is treated according to the specified output format and included
69## in the report.
70##
71## Using @code{publish (@var{file}, @var{output_format})} is equivalent to the
72## function call using a structure
73##
74## @example
75## @group
76## @var{options}.format = @var{output_format};
77## publish (@var{file}, @var{options})
78## @end group
79## @end example
80##
81## @noindent
82## which is described below.  The same holds for using option/value pairs
83##
84## @example
85## @group
86## @var{options}.@var{option1} = @var{value1};
87## publish (@var{file}, @var{options})
88## @end group
89## @end example
90##
91## The structure @var{options} can have the following field names.  If a field
92## name is not specified, the default value is used:
93##
94## @itemize @bullet
95## @item
96## @samp{format} --- Output format of the published script file, one of
97##
98## @samp{html} (default), @samp{doc}, @samp{latex}, @samp{ppt},
99## @samp{pdf}, or @samp{xml}.
100##
101## The output formats @samp{doc}, @samp{ppt}, and @samp{xml} are not currently
102## supported.  To generate a @samp{doc} report, open a generated @samp{html}
103## report with your office suite.
104##
105## In Octave custom formats are supported by implementing all callback
106## subfunctions in a function file named
107## @samp{__publish_<custom format>_output__.m}.  To obtain a template for the
108## HTML format type:
109##
110## @example
111## @group
112## edit (fullfile (fileparts (which ("publish")), ...
113##       "private", "__publish_html_output__.m"))
114## @end group
115## @end example
116##
117## @item
118## @samp{outputDir} --- Full path of the directory where the generated report
119## will be located.  If no directory is given, the report is generated in a
120## subdirectory @file{html} relative to the current working directory.
121##
122## @item
123## @samp{stylesheet} --- Not supported, only for @sc{matlab} compatibility.
124##
125## @item
126## @samp{createThumbnail} --- Not supported, only for @sc{matlab}
127## compatibility.
128##
129## @item
130## @samp{figureSnapMethod} --- Not supported, only for @sc{matlab}
131## compatibility.
132##
133## @item
134## @samp{imageFormat} --- Desired format for any images produced while
135## evaluating the code.  The allowed image formats depend on the output format:
136##
137## @itemize @bullet
138## @item @samp{html}, @samp{xml} --- @samp{png} (default), any image format
139## supported by Octave
140##
141## @item @samp{latex} --- @samp{epsc2} (default), any image format supported by
142## Octave
143##
144## @item @samp{pdf} --- @samp{jpg} (default) or @samp{bmp}, note @sc{matlab}
145## uses  @samp{bmp} as default
146##
147## @item @samp{doc} or @samp{ppt} --- @samp{png} (default), @samp{jpg},
148## @samp{bmp}, or @samp{tiff}
149## @end itemize
150##
151## @item
152## @samp{maxWidth} and @samp{maxHeight} --- Maximum width (height) of the
153## produced images in pixels.  An empty value means no restriction.  Both
154## values must be set in order for the option to work properly.
155##
156## @samp{[]} (default), integer value @geq{} 0
157##
158## @item
159## @samp{useNewFigure} --- Use a new figure window for figures created by the
160## evaluated code.  This avoids side effects with already opened figure
161## windows.
162##
163## @samp{true} (default) or @samp{false}
164##
165## @item
166## @samp{evalCode} --- Evaluate code of the Octave source file
167##
168## @samp{true} (default) or @samp{false}
169##
170## @item
171## @samp{catchError} --- Catch errors while evaluating code and continue
172##
173## @samp{true} (default) or @samp{false}
174##
175## @item
176## @samp{codeToEvaluate} --- Octave commands that should be evaluated prior to
177## publishing the script file.  These Octave commands do not appear in the
178## generated report.
179##
180## @item
181## @samp{maxOutputLines} --- Maximum number of output lines from code
182## evaluation which are included in output.
183##
184## @samp{Inf} (default) or integer value > 0
185##
186## @item
187## @samp{showCode} --- Show the evaluated Octave commands in the generated
188## report
189##
190## @samp{true} (default) or @samp{false}
191## @end itemize
192##
193## The option output @var{output_file} is a string with path and file name
194## of the generated report.
195##
196## @seealso{grabcode}
197## @end deftypefn
198
199function output_file = publish (file, varargin)
200
201  if (nargin < 1)
202    print_usage ();
203  endif
204
205  if (exist (file, "file") != 2)
206    error ("publish: FILE does not exist");
207  endif
208
209  ## Check file to be in Octave's load path
210  [file_path, file_name, file_ext] = fileparts (file);
211  if (isempty (file_path))
212    file_path = pwd;
213  endif
214  if (exist ([file_name, file_ext]) != 2)
215    error (["publish: " file " is not in the load path"]);
216  endif
217
218  ## Check file extension and that file is an Octave script
219  file_info = __which__ (file_name);
220  if (! strcmp (file_ext, ".m") || ! strcmp (file_info.type, "script"))
221    error ("publish: only script files can be published");
222  endif
223
224  ## Check file to be parsable
225  __parse_file__ (file);
226
227  ## Get structure with necessary options
228  options = struct ();
229  if (numel (varargin) == 1)
230    ## Call: publish (file, format)
231    if (ischar (varargin{1}))
232      options.format = varargin{1};
233    ## Call: publish (file, options)
234    elseif (isstruct (varargin{1}))
235      options = varargin{1};
236    else
237      error ("publish: second argument must be OUTPUT_FORMAT or OPTIONS");
238    endif
239  ## Call: publish (file, Name1, Value1, Name2, Value2, ...)
240  elseif (rem (numel (varargin), 2) == 0
241          && all (cellfun (@ischar, varargin(1:2:end))))
242    options = cell2struct (varargin(2:2:end), varargin(1:2:end), 2);
243  else
244    error ("publish: invalid arguments");
245  endif
246
247  ## Validate options struct
248
249  ## Options for the output
250  if (! isfield (options, "format"))
251    options.format = "html";
252  else
253    ## FIXME: Implement remaining formats
254    if (any (strcmpi (options.format, {"doc", "ppt", "xml"})))
255      error ('publish: Output format "%s" is not yet supported',
256             options.format);
257    endif
258    ## Supported or custom output format
259    supported_formats = {"html", "doc", "latex", "ppt", "xml", "pdf"};
260    if (! any (strcmpi (options.format, supported_formats)))
261      ## Check existence of custom formatter
262      custom_formatter = ["__publish_", options.format, "_output__"];
263      if (! exist (custom_formatter, "file"))
264        error (['publish: Custom output format "%s" requires the ', ...
265                "formatter function:\n\n\t%s\n\n\t", ...
266                'See "help publish" for more information.'],
267                options.format, custom_formatter);
268      endif
269    else
270      options.format = validatestring (options.format, supported_formats);
271    endif
272  endif
273
274  if (! isfield (options, "outputDir"))
275    ## Matlab R2016a doc says default is "", but specifies to create a
276    ## subdirectory named "html" in the current working directory.
277    options.outputDir = fullfile (file_path, "html");
278  elseif (! ischar (options.outputDir))
279    error ("publish: OUTPUTDIR must be a string");
280  endif
281
282  if (! isfield (options, "stylesheet"))
283    options.stylesheet = "";
284  elseif (! ischar (options.stylesheet))
285    error ("publish: STYLESHEET must be a string");
286  endif
287
288  ## Options for the figures
289  if (! isfield (options, "createThumbnail"))
290    options.createThumbnail = true;
291  elseif (! isscalar (options.createThumbnail)
292          || ! isreal (options.createThumbnail))
293    error ("publish: CREATETHUMBNAIL must be TRUE or FALSE");
294  endif
295
296  if (! isfield (options, "figureSnapMethod"))
297    options.figureSnapMethod = "entireGUIWindow";
298  else
299    options.figureSnapMethod = validatestring (options.figureSnapMethod, ...
300      {"entireGUIWindow", "print", "getframe", "entireFigureWindow"});
301    ## FIXME: implement other SnapMethods
302    warning ("publish: option FIGURESNAPMETHOD currently not supported");
303  endif
304
305  if (! isfield (options, "imageFormat"))
306    switch (options.format)
307      case "latex"
308        options.imageFormat = "epsc2";
309      case "pdf"
310        ## Note: Matlab R2016a uses bmp as default
311        options.imageFormat = "jpg";
312      otherwise
313        options.imageFormat = "png";
314    endswitch
315  elseif (! ischar (options.imageFormat))
316    error ("publish: IMAGEFORMAT must be a string");
317  else
318    ## Check valid imageFormat for chosen format
319    ##   html, latex, and xml accept any imageFormat
320    switch (options.format)
321      case {"doc", "ppt"}
322        options.imageFormat = validatestring (options.imageFormat,
323                                              {"png", "jpg", "bmp", "tiff"});
324      case "pdf"
325        options.imageFormat = validatestring (options.imageFormat,
326                                              {"bmp", "jpg"});
327    endswitch
328  endif
329
330  if (! isfield (options, "maxHeight"))
331    options.maxHeight = [];
332  elseif (! isscalar (options.maxHeight) || options.maxHeight < 1)
333    error ("publish: MAXHEIGHT must be a positive integer");
334  else
335    options.maxHeight = uint64 (options.maxHeight);
336  endif
337
338  if (! isfield (options, "maxWidth"))
339    options.maxWidth = [];
340  elseif (! isscalar (options.maxWidth) || options.maxWidth < 1)
341    error ("publish: MAXWIDTH must be a positive integer");
342  else
343    options.maxWidth = uint64 (options.maxWidth);
344  endif
345
346  if (! isfield (options, "useNewFigure"))
347    options.useNewFigure = true;
348  elseif (! isscalar (options.useNewFigure) || ! isreal (options.useNewFigure))
349    error ("publish: USENEWFIGURE must be TRUE or FALSE");
350  endif
351
352  ## Options for the code
353  if (! isfield (options, "evalCode"))
354    options.evalCode = true;
355  elseif (! isscalar (options.evalCode) || ! isreal (options.evalCode))
356    error ("publish: EVALCODE must be TRUE or FALSE");
357  endif
358
359  if (! isfield (options, "catchError"))
360    options.catchError = true;
361  elseif (! isscalar (options.catchError) || ! isreal (options.catchError))
362    error ("publish: CATCHERROR must be TRUE or FALSE");
363  endif
364
365  if (! isfield (options, "codeToEvaluate"))
366    options.codeToEvaluate = "";
367  elseif (! ischar (options.codeToEvaluate))
368    error ("publish: CODETOEVALUTE must be a string");
369  endif
370
371  if (! isfield (options, "maxOutputLines"))
372    options.maxOutputLines = Inf;
373  elseif (! isscalar (options.maxOutputLines) || options.maxOutputLines < 0)
374    error ("publish: MAXOUTPUTLINES must be an integer >= 0");
375  else
376    options.maxOutputLines = uint64 (options.maxOutputLines);
377  endif
378
379  if (! isfield (options, "showCode"))
380    options.showCode = true;
381  elseif (! isscalar (options.showCode) || ! isreal (options.showCode))
382    error ("publish: SHOWCODE must be TRUE or FALSE");
383  endif
384
385  doc.title = "";
386  doc.intro = "";
387  doc.body = cell ();
388  doc.m_source = deblank (read_file_to_cellstr (file));
389  doc.m_source_file_name = file;
390
391  ## Split code and paragraphs, find formatting
392  doc = parse_m_source (doc);
393
394  ## Create output directory
395  [status, msg] = mkdir (options.outputDir);
396  if (status != 1)
397    error ("publish: cannot create output directory: %s", msg);
398  endif
399
400  if (options.evalCode)
401    doc = eval_code (doc, options);
402    eval_context ("clear");
403  endif
404
405  output_file = create_output (doc, options);
406
407endfunction
408
409
410function doc = parse_m_source (doc)
411  ## PARSE_M_SOURCE First parsing level
412  ##   This function extracts the overall structure (paragraphs and code
413  ##   sections) given in doc.m_source.
414  ##
415  ##   The result is written to doc.body, which then contains a cell
416  ##   vector of structs, either of
417  ##
418  ##     a) {struct ("type", "code", ...
419  ##                 "lines", [a, b], ...
420  ##                 "output", [])}
421  ##     b) {struct ("type", "section", ...
422  ##                 "content", title_str)}
423  ##
424  ##   Second parsing level is invoked for the paragraph contents, resulting
425  ##   in more elements for doc.body.
426
427  if (isempty (doc.m_source))
428    return;  # Nothing to parse
429  endif
430
431  ## Parsing helper functions
432  ##
433  ## Checks line to have N "%" or "#" lines
434  ## followed either by a space or end of string
435  function r = is_publish_markup (cstr, N)
436    str = char (cstr);
437
438    r = any (strncmp (str, {"%%%", "##"}, N));
439    if (r)
440      len = length (str);
441      if (len == N)
442        r = true;
443      elseif (len > N && str(N+1) == " ")
444        r = true;
445      else
446        r = false;
447      endif
448    endif
449
450    return;
451  endfunction
452  ## Checks line of cellstring to be a paragraph line
453  is_paragraph = @(cstr) is_publish_markup (cstr, 1);
454  ## Checks line of cellstring to be a section headline
455  is_head = @(cstr) is_publish_markup (cstr, 2);
456  ## Checks line of cellstring to be a headline without section break, using
457  ## the cell mode in Matlab (for compatibility), just treated as a new head.
458  is_no_break_head = @(cstr) is_publish_markup (cstr, 3);
459
460  ## Find the indices of paragraphs starting with "%%", "##", or "%%%"
461  par_start_idx = find (cellfun (is_head, doc.m_source)
462                        | cellfun (is_no_break_head, doc.m_source));
463
464  ## If the whole document is code
465  if (isempty (par_start_idx))
466    doc.body{end+1}.type = "code";
467    doc.body{end}.content = strtrim (strjoin (doc.m_source, "\n"));
468    doc.body{end}.lines = [1, length(doc.m_source)];
469    doc.body{end}.output = {};
470    return;
471  endif
472
473  ## Determine continuous range of paragraphs
474  par_end_idx = [par_start_idx(2:end) - 1, length(doc.m_source)];
475  for i = 1:numel (par_end_idx)
476    idx = find (! cellfun (is_paragraph,
477                           doc.m_source(par_start_idx(i) + 1:par_end_idx(i))));
478    if (! isempty (idx))
479      par_end_idx(i) = par_start_idx(i) + idx(1) - 1;
480    endif
481  endfor
482  ## Code sections between paragraphs
483  code_start_idx = par_end_idx(1:end-1) + 1;
484  code_end_idx = par_start_idx(2:end) - 1;
485  ## Code at the beginning?
486  if (par_start_idx(1) > 1)
487    code_start_idx = [1, code_start_idx];
488    code_end_idx = [par_start_idx(1) - 1, code_end_idx];
489  endif
490  ## Code at the end?
491  if (par_end_idx(end) < length (doc.m_source))
492    code_start_idx = [code_start_idx, par_end_idx(end) + 1];
493    code_end_idx = [code_end_idx, length(doc.m_source)];
494  endif
495  ## Remove overlaps
496  idx = code_start_idx > code_end_idx;
497  code_start_idx(idx) = [];
498  code_end_idx(idx) = [];
499  ## Remove empty code blocks
500  idx = [];
501  for i = 1:numel (code_start_idx)
502    if (all (cellfun (@(cstr) isempty (char (cstr)),
503                      doc.m_source(code_start_idx(i):code_end_idx(i)))))
504      idx = [idx, i];
505    endif
506  endfor
507  code_start_idx(idx) = [];
508  code_end_idx(idx) = [];
509
510  ## Try to find a document title and introduction text
511  ##   1. First paragraph must start in first line
512  ##   2. Second paragraph must start before any code
513  title_offset = 0;
514  if (is_head (doc.m_source{1})
515      && ! isempty (par_start_idx)
516      && par_start_idx(1) == 1
517      && (isempty (code_start_idx)
518          || (length (par_start_idx) > 1
519              && par_start_idx(2) < code_start_idx(1))))
520    doc.title = doc.m_source{1};
521    doc.title = doc.title(4:end);
522    content = doc.m_source(2:par_end_idx(1));
523    ## Strip leading "# "
524    content = cellfun (@(c) cellstr (c(3:end)), content);
525    doc.intro = parse_paragraph_content (content);
526    title_offset = 1;
527  endif
528
529  ## Add non-empty paragraphs and code to doc
530  j = 1;
531  i = (1 + title_offset);
532  while (i <= numel (par_start_idx) || j <= numel (code_start_idx))
533    ## Add code while there is code left
534    ##   and code is before the next paragraph or there are no more paragraphs
535    while (j <= numel (code_start_idx)
536           && (i > numel (par_start_idx)
537               || par_start_idx(i) > code_start_idx(j)))
538      doc.body{end+1}.type = "code";
539      lines = [code_start_idx(j), code_end_idx(j)];
540      doc.body{end}.content = ...
541        strtrim (strjoin (doc.m_source(lines(1):lines(2)), "\n"));
542      doc.body{end}.lines = lines;
543      doc.body{end}.output = {};
544      j++;
545    endwhile
546
547    if (i <= numel (par_start_idx))
548      type_str = "section";
549      title_str = doc.m_source{par_start_idx(i)};
550      if (is_head (doc.m_source(par_start_idx(i))))
551        title_str = title_str(4:end);
552      else
553        title_str = title_str(5:end);
554      endif
555      ## Append, if paragraph title is given
556      if (! isempty (title_str))
557        doc.body{end+1}.type = type_str;
558        doc.body{end}.content = title_str;
559      endif
560
561      content = doc.m_source(par_start_idx(i) + 1:par_end_idx(i));
562      ## Strip leading "# "
563      content = cellfun (@(c) cellstr (c(3:end)), content);
564      doc.body = [doc.body, parse_paragraph_content(content)];
565      i++;
566    endif
567  endwhile
568
569endfunction
570
571
572function p_content = parse_paragraph_content (content)
573  ## PARSE_PARAGRAPH_CONTENT second parsing level
574  ##
575  ##   Parses the content of a paragraph (without potential title) and
576  ##   returns a cell vector of structs, that can be appended to doc.body,
577  ##   either of
578  ##
579  ##     a) {struct ("type", "preformatted_code", ...
580  ##                 "content", code_str)}
581  ##     b) {struct ("type", "preformatted_text", ...
582  ##                 "content", text_str)}
583  ##     c) {struct ("type", "bulleted_list", ...
584  ##                 "content", {"item1", "item2", ..})}
585  ##     d) {struct ("type", "numbered_list", ...
586  ##                 "content", {"item1", "item2", ..})}
587  ##     e) {struct ("type", "include", ...
588  ##                 "content", file_str)}
589  ##     f) {struct ("type", "graphic", ...
590  ##                 "content", file_str)}
591  ##     g) {struct ("type", "html", ...
592  ##                 "content", html_str)}
593  ##     h) {struct ("type", "latex", ...
594  ##                 "content", latex_str)}
595  ##     i) {struct ("type", "text", ...
596  ##                 "content", text_str)}
597  ##
598  ##   Option i) might contain:
599  ##
600  ##     * Italic "_", bold "*", and monospaced "|" text
601  ##     * Inline "$" and block "$$" LaTeX math
602  ##     * Links
603  ##     * Trademark symbols
604
605  p_content = cell ();
606
607  if (isempty (content))
608    return;
609  endif
610
611  ## Extract <html> and <latex> blocks recursively.
612  content_str = strjoin (content, "\n");
613  tags = {"html", "latex"};
614  for i = 1:length(tags)
615    tok = regexp (content_str, ...
616      ['(.*?)(^|\n\n)(<', tags{i}, '>)\n(.*?)\n(<\/', ...
617        tags{i}, '>)($|\n\n)(.*)'], "tokens", "once");
618    if (! isempty (tok))
619      ## If there was some text before that block --> recursion
620      if (! strcmpi (tok{1}, ["<", tags{i}, ">"]))
621        p_content = parse_paragraph_content (strsplit (tok{1}, "\n"));
622        tok(1:2) = [];
623      endif
624      ## Extract the block content
625      p_content{end+1}.type = tags{i};
626      p_content{end}.content = tok{2};
627      ## If there was some text after that block --> recursion
628      if (length (tok) == 5)
629        p_content = [p_content, ...
630          parse_paragraph_content(strsplit (tok{5}, "\n"))];
631      endif
632      return;
633    endif
634  endfor
635
636  ## Split into blocks separated by empty lines
637  idx = [0, find(cellfun (@isempty, content)), length(content) + 1];
638
639  ## For each block
640  for i = find (diff (idx) > 1)
641    block = content(idx(i) + 1:idx(i+1) - 1);
642
643    ## Octave code (two leading spaces)
644    if (all (cellfun (@(c) strncmp (char (c), "  ", 2), block)))
645      p_content{end+1}.type = "preformatted_code";
646      block = cellfun (@(c) cellstr (c(3:end)), block);
647      p_content{end}.content = strjoin (block, "\n");
648      continue;
649    endif
650
651    ## Preformatted text (one leading space)
652    if (all (cellfun (@(c) strncmp (char (c), " ", 1), block)))
653      p_content{end+1}.type = "preformatted_text";
654      block = cellfun (@(c) cellstr (c(2:end)), block);
655      p_content{end}.content = strjoin (block, "\n");
656      continue;
657    endif
658
659    ## Bulleted list starts with "* "
660    if (strncmp (block{1}, "* ", 2))
661      p_content{end+1}.type = "bulleted_list";
662      tmpstr = strjoin (block, "\n");
663      ## Remove first "* "
664      tmpstr = tmpstr(3:end);
665      ## Split items
666      p_content{end}.content = strsplit (tmpstr, "\n* ");
667      continue;
668    endif
669
670    ## Numbered list starts with "# "
671    if (strncmp (block{1}, "# ", 2))
672      p_content{end+1}.type = "numbered_list";
673      tmpstr = strjoin (block, "\n");
674      ## Remove first "# "
675      tmpstr = tmpstr(3:end);
676      ## Split items
677      p_content{end}.content = strsplit (tmpstr, "\n# ");
678      continue;
679    endif
680
681    ## Include <include>fname.m</include>
682    if (! isempty (fname = regexpi (strjoin (block, ""),
683                                    '^<include>(.*)</include>$',
684                                    "tokens")))
685      ## Includes result in preformatted code
686      p_content{end+1}.type = "preformatted_code";
687      include_code = read_file_to_cellstr (strtrim ((fname{1}){1}));
688      p_content{end}.content = strjoin (include_code, "\n");
689
690      continue;
691    endif
692
693    ## Graphic <<myGraphic.png>>
694    if (! isempty (fname = regexpi (strjoin (block, ""),
695                                    '^<<(.*)>>$',
696                                    "tokens")))
697      p_content{end+1}.type = "graphic";
698      p_content{end}.content = strtrim ((fname{1}){1});
699      continue;
700    endif
701
702    ## Now it can be only normal text or markups belonging to normal text
703    ## that are handled while output generation:
704    ##
705    ## * Italic "_", bold "*", and monospaced "|" text
706    ## * Inline "$" and block "$$" LaTeX math
707    ## * Links
708    ## * Trademark symbols
709    p_content{end+1}.type = "text";
710    p_content{end}.content = strjoin (block, "\n");
711  endfor
712endfunction
713
714
715function m_source = read_file_to_cellstr (file)
716  ## READ_FILE_TO_CELLSTR reads a given file line by line into a cellstring
717
718  fid = fopen (file, "r");
719  i = 0;
720  do
721    m_source{++i} = fgetl (fid);
722  until (! ischar (m_source{i}))
723  fclose (fid);
724  m_source = m_source(1:end-1);  # No EOL
725endfunction
726
727
728function ofile = create_output (doc, options)
729  ## CREATE_OUTPUT creates the desired output file
730
731  formatter = [];
732  switch (options.format)
733    case "html"
734      formatter = @__publish_html_output__;
735    case {"latex", "pdf"}
736      formatter = @__publish_latex_output__;
737    otherwise
738      ## Custom formatter
739      formatter = eval (["@__publish_", options.format, "_output__"]);
740  endswitch
741
742  ## Use title, or if not given, the m-file name
743  title_str = doc.title;
744  if (isempty (title_str))
745    [~, title_str] = fileparts (doc.m_source_file_name);
746  endif
747
748  content = formatter ("header",
749                       formatter ("escape_special_chars", title_str),
750                       format_output (doc.intro, formatter, options),
751                       get_toc (doc.body, formatter));
752  content = [content, format_output(doc.body, formatter, options)];
753  content = [content, formatter("footer", strjoin (doc.m_source, "\n"))];
754
755  ## Write file
756  [~, ofile] = fileparts (doc.m_source_file_name);
757  ofile_name = [ofile, formatter("output_file_extension")];
758  ofile = fullfile (options.outputDir, ofile_name);
759  fid = fopen (ofile, "w");
760  fputs (fid, content);
761  fclose (fid);
762
763  ## Compile LaTeX, if compiler found
764  if (strcmp (options.format, "pdf"))
765    status = system ("pdflatex --version");
766    if (status == 0)
767      for i = 1:2
768        ## FIXME: This looks very likely to break when switching OS
769        system (["cd ", options.outputDir," && pdflatex ", ofile_name]);
770      endfor
771    endif
772  endif
773endfunction
774
775
776function toc_cstr = get_toc (cstr, formatter)
777  ## GET_TOC extracts the table of contents from a cellstring (e.g., doc.body)
778  ## with each section headline as a cell in a returned cellstring.
779
780  toc_cstr = cell ();
781  for i = 1:numel (cstr)
782    if (strcmp (cstr{i}.type, "section"))
783      toc_cstr{end+1} = format_text (cstr{i}.content, formatter);
784    endif
785  endfor
786endfunction
787
788
789function str = format_output (cstr, formatter, options)
790  ## FORMAT_OUTPUT steps through all blocks (doc.intro or doc.body) in cstr and
791  ## produces a single result string with the source code in the desired output
792  ## format.
793  ##
794  ##   formatter has the only knowledge how to enforce the target format
795  ##   and produces for each block the necessary target format source string.
796
797  str = "";
798  for i = 1:numel (cstr)
799    switch (cstr{i}.type)
800      case "code"
801        if (options.showCode)
802          str = [str, formatter("code", cstr{i}.content)];
803        endif
804        if ((options.evalCode) && (! isempty (cstr{i}.output)))
805          str = [str, formatter("code_output", cstr{i}.output)];
806        endif
807      case {"text", "section"}
808        str = [str, formatter(cstr{i}.type, ...
809                              format_text (cstr{i}.content, formatter))];
810      case {"bulleted_list", "numbered_list"}
811        items = cellfun (@(str) format_text(str, formatter), ...
812                         cstr{i}.content, "UniformOutput", false);
813        str = [str, formatter(cstr{i}.type, items)];
814      otherwise
815        str = [str, formatter(cstr{i}.type, cstr{i}.content)];
816    endswitch
817  endfor
818
819endfunction
820
821
822function str = format_text (str, formatter)
823  ## FORMAT_TEXT formats inline formats in strings.
824  ##   These are: links, block/inline math, bold, italic, monospaced, (TM), (R)
825
826  ## Helper to clarify the following regular expressions.  It is suitable for
827  ## inline formatting, that is delimited literally at start and end by
828  ## 'delim'.  'term' is an indicating character for the end delimiter.
829  ##
830  ## Best explained by example ('^' start and '$' end of input):
831  ##
832  ##  Positive matches:
833  ##
834  ##    ^*bold*$
835  ##    ^*bold*.$
836  ##    ^(*bold*)$
837  ##    ^ *bold* $
838  ##    ^Text *bold* text$
839  ##    ^*bold text*$
840  ##
841  ##  Negative matches:
842  ##
843  ##    ^Text*bold*text$
844  ##    ^*bold *$
845  ##    ^* bold* $
846  ##    ^*bold text *$
847  ##
848  regex_helper = @(delim, term) ['(^|(?<=\s)|(?=\W))', delim, ...
849    '(?!\s)[^', term, ']*(?<!\s)', delim, '($|(?=\s)|(?=\W))'];
850
851  ## Regular expressions for the formats:
852  ##
853  ## 1) Links "<http://www.someurl.com>"
854  ## 2) Links "<octave:Function SOME TEXT>"
855  ## 3) Links "<http://www.someurl.com SOME TEXT>"
856  ## 4) LaTeX block math "$$x^2$$"
857  ## 5) LaTeX inline math "$x^2$"
858  ## 6) Bold *text*
859  ## 7) Italic _text_
860  ## 8) Monospaced |text|
861  ## 9) (TM) or (R)
862  regexes = {'<\S{3,}[^\s<>]*>', ...
863             '<octave:[^\s<>]* *[^<>$]*>', ...
864             '<\S{3,}[^\s<>]* *[^<>$]*>', ...
865             regex_helper('\$\$', '$'), ...
866             regex_helper('\$', '$'), ...
867             regex_helper('\*', '*'), ...
868             regex_helper('_', '_'), ...
869             regex_helper('\|', '|'), ...
870             '\((TM|R)\)'};
871
872  ## Function to escape some special characters for the GNU Octave manual,
873  ## see https://www.gnu.org/software/texinfo/manual/texinfo/html_node/HTML-Xref-Node-Name-Expansion.html
874  texinfo_esc = @(str) strrep (strrep (str, "-", "_002d"), "_", "_005f");
875
876  ## Substitute all occurrences with placeholders
877  placeholder_cstr = {};
878  plh = 0;
879  for i = 1:numel (regexes)
880    cstr = regexp (str, regexes{i}, "match");
881    for j = 1:numel (cstr)
882      plh += 1;
883      str = regexprep (str, regexes{i}, ["PUBLISHPLACEHOLDER" num2str(plh)],
884                       "once");
885      switch (i)
886        case 1
887          ## Links "<http://www.someurl.com>"
888          url = cstr{j};
889          cstr{j} = formatter ("link", url(2:end-1), url(2:end-1));
890        case 2
891          ## Links "<octave:Function SOME TEXT>"
892          idx = strfind (cstr{j}, " ")(1);
893          url = cstr{j};
894          url = texinfo_esc (url(9:idx-1));
895          v = version ();
896          if (v(end) == '+')
897            v = "interpreter";
898          endif
899          url = sprintf ( ...
900            "https://www.gnu.org/software/octave/doc/%s/XREF%s.html", v, url);
901          txt = cstr{j};
902          txt = format_text (txt(idx+1:end-1), formatter);
903          cstr{j} = formatter ("link", url, txt);
904        case 3
905          ## Links "<http://www.someurl.com SOME TEXT>"
906          idx = strfind (cstr{j}, " ")(1);
907          url = cstr{j};
908          url = url(2:idx-1);
909          txt = cstr{j};
910          txt = format_text (txt(idx+1:end-1), formatter);
911          cstr{j} = formatter ("link", url, txt);
912        case 4
913          ## LaTeX block math "$$"
914          txt = cstr{j};
915          cstr{j} = formatter ("blockmath", txt(3:end-2));
916        case 5
917          ## LaTeX inline math "$"
918          txt = cstr{j};
919          cstr{j} = formatter ("inlinemath", txt(2:end-1));
920        case 6
921          ## Bold
922          txt = cstr{j};
923          cstr{j} = formatter ("bold", format_text (txt(2:end-1), formatter));
924        case 7
925          ## Italic
926          txt = cstr{j};
927          cstr{j} = formatter ("italic", format_text (txt(2:end-1), formatter));
928        case 8
929          ## Monospaced
930          txt = cstr{j};
931          cstr{j} = formatter ("monospaced", format_text (txt(2:end-1), ...
932                               formatter));
933        case 9
934          ## (TM) or (R)
935          txt = cstr{j};
936          cstr{j} = formatter (txt(2:end-1));
937      endswitch
938    endfor
939    placeholder_cstr = [placeholder_cstr, cstr];
940  endfor
941
942  ## Replace special symbols
943  str = formatter ("escape_special_chars", str);
944
945  ## Restore placeholders
946  for i = plh:-1:1
947    str = strrep (str, ["PUBLISHPLACEHOLDER" sprintf("%d", i)],
948                       placeholder_cstr{i});
949  endfor
950
951endfunction
952
953
954function doc = eval_code (doc, options)
955  ## EVAL_CODE Third level parsing
956  ##
957  ##   Generates the output of the script code and takes care of generated
958  ##   figures.
959
960  ## Necessary as the code does not run interactively
961  page_screen_output (false, "local");
962
963  ## Remember previously opened figures
964  fig_ids = findall (0, "type", "figure");
965  [~, fig_name] = fileparts (doc.m_source_file_name);
966  fig_num = 1;
967  fig_list = struct ();
968
969  ## Evaluate code, that does not appear in the output.
970  eval_code_helper (options.codeToEvaluate);
971
972  ## Create a new figure, if there are existing plots
973  if (! isempty (fig_ids) && options.useNewFigure)
974    figure ();
975  endif
976
977  for i = 1:numel (doc.body)
978    if (strcmp (doc.body{i}.type, "code"))
979      r = doc.body{i}.lines;
980      code_str = strjoin (doc.m_source(r(1):r(2)), "\n");
981      if (options.catchError)
982        try
983          doc.body{i}.output = eval_code_helper (code_str);
984         catch err
985          doc.body{i}.output = cellstr (["error: ", err.message, ...
986                                                 "\n\tin:\n\n", code_str]);
987        end_try_catch
988      else
989        doc.body{i}.output = eval_code_helper (code_str);
990      endif
991
992      ## Check for newly created figures ...
993      fig_ids_new = setdiff (findall (0, "type", "figure"), fig_ids);
994      ## ... and save them
995      for j = 1:numel (fig_ids_new)
996        drawnow ();
997        if (isempty (get (fig_ids_new(j), "children")))
998          continue;
999        endif
1000        fname = [fig_name, "-", sprintf("%d", fig_num)];
1001        if (strncmp (options.imageFormat, "eps", 3))
1002          fname = [fname ".eps"];
1003        else
1004          fname = [fname "." options.imageFormat];
1005        endif
1006        print_opts = {fig_ids_new(j), ...
1007                      fullfile(options.outputDir, fname), ...
1008                      ["-d" options.imageFormat], "-color"};
1009        if (! isempty (options.maxWidth) && ! isempty (options.maxHeight))
1010          print_opts{end+1} = sprintf ("-S%d,%d", options.maxWidth,
1011                                                  options.maxHeight);
1012        elseif (! isempty (options.maxWidth) || ! isempty (options.maxHeight))
1013          warning (["publish: specify both options.maxWidth ", ...
1014                              "and options.maxHeight"]);
1015        endif
1016        print (print_opts{:});
1017        fig_num++;
1018        delete (fig_ids_new(j));
1019        fig_elem = cell ();
1020        fig_elem{1} = struct ("type", "graphic", "content", fname);
1021        if (isfield (fig_list, num2str (i)))
1022          fig_elem = [getfield(fig_list, sprintf ("%d", i)), fig_elem];
1023        endif
1024        fig_list = setfield (fig_list, sprintf ("%d", i), fig_elem);
1025        ## Create a new figure, if there are existing plots
1026        if (isempty (setdiff (findall (0, "type", "figure"), fig_ids)) ...
1027            && ! isempty (fig_ids) && options.useNewFigure)
1028          figure ();
1029        endif
1030      endfor
1031
1032      ## Truncate output to desired length
1033      if (options.maxOutputLines < length (doc.body{i}.output))
1034        doc.body{i}.output = doc.body{i}.output(1:options.maxOutputLines);
1035      endif
1036      doc.body{i}.output = strjoin (doc.body{i}.output, "\n");
1037    endif
1038  endfor
1039
1040  ## Close any figures opened by publish function
1041  delete (setdiff (findall (0, "type", "figure"), fig_ids));
1042
1043  ## Insert figures to document
1044  fig_code_blocks = fieldnames (fig_list);
1045  body_offset = 0;
1046  for i = 1:numel (fig_code_blocks)
1047    elems = getfield (fig_list, fig_code_blocks{i});
1048    ## Compute index where the figure(s) has to be inserted
1049    j = str2double (fig_code_blocks{i}) + body_offset;
1050    doc.body = [doc.body(1:j), elems, doc.body(j+1:end)];
1051    body_offset = body_offset + numel (elems);
1052  endfor
1053
1054endfunction
1055
1056
1057function cstr = eval_code_helper (__code__)
1058  ## EVAL_CODE_HELPER evaluates a given string with Octave code in an extra
1059  ## temporary context and returns a cellstring with the eval output.
1060
1061  if (isempty (__code__))
1062    return;
1063  endif
1064
1065  eval_context ("load");
1066  cstr = evalc (__code__);
1067  ## Split string by lines and preserve blank lines.
1068  cstr = strsplit (strrep (cstr, "\n\n", "\n \n"), "\n");
1069  eval_context ("save");
1070endfunction
1071
1072
1073function cstr = eval_context (op)
1074  ## EVAL_CONTEXT temporary evaluation context.
1075  persistent ctext
1076
1077  ## Variable cstr in "eval_code_helper" is newly created anyways.
1078  forbidden_var_names = {"__code__"};
1079
1080  switch (op)
1081    case "save"
1082      ## Clear previous context
1083      ctext = containers.Map;
1084      ## Get variable names
1085      var_names = evalin ("caller", "whos");
1086      var_names = {var_names.name};
1087      ## Store all variables to context
1088      for i = 1:length (var_names)
1089        if (! any (strcmp (var_names{i}, forbidden_var_names)))
1090          ctext(var_names{i}) = evalin ("caller", var_names{i});
1091        end
1092      endfor
1093
1094    case "load"
1095      if (! isempty (ctext))
1096        keys = ctext.keys ();
1097        for i = 1:length (keys)
1098          assignin ("caller", keys{i}, ctext(keys{i}));
1099        endfor
1100      endif
1101
1102    case "clear"
1103      ## Clear any context
1104      ctext = [];
1105
1106    otherwise
1107      ## Do nothing
1108
1109  endswitch
1110endfunction
1111
1112
1113## Note: Functional BIST tests are located in the 'test/publish' directory.
1114
1115## Test input validation
1116%!error publish ()
1117%!error publish (1)
1118%!error <FILE does not exist> publish ("%%_non_existent_file_%%.m")
1119%!error <only script files can be published> publish ("publish.m")
1120%!error publish ("test_script.m", "format", "html", "showCode")
1121