1defmodule Earmark.Scanner do
2
3  import Earmark.Helpers.StringHelpers, only: [behead: 2]
4
5  @backtix_rgx    ~r/\A(`+)(.*)/
6  @blockquote_rgx ~r/\A>(?!\S)/
7  @code_fence_rgx ~r/\A(\s*)~~~/
8  @headline_rgx   ~r/\A(\#{1,6})(\s+)(.*)/
9#  @id_close_rgx   ~r/\[(.*?)\](?!:)/
10  @id_open_rgx    ~r/\A(\s{0,3})\[(.*?)\]:\s+(.*)\z/
11  @indent_rgx     ~r/\A\s{4,}/
12  @list_item_rgx  ~r/\A(\s{0,3})(\d+\.|\*|-)\s+/
13  @ruler_rgx      ~r/\A \s{0,3} (?:([-_*])\s?)(?:\1\s?){2,} \z/x
14  @under_l1_head_rgx ~r/\A=+\s*\z/
15  @under_l2_head_rgx ~r/\A-{1,2}\s*\z/
16
17  @text_rgx ~r/(?:[^`]|\\`)*/
18
19  defmodule Backtix,       do: defstruct count: 1
20  defmodule Blockquote,    do: defstruct []
21  defmodule CodeFence,     do: defstruct []
22  defmodule Headline,      do: defstruct level: 1..6
23  defmodule IdClose,       do: defstruct id: "content of [...]"
24  defmodule IdOpen,        do: defstruct id: "content of [...]", href: "word after ]:\\s+"
25  defmodule Indent,        do: defstruct count: 4
26  defmodule ListItem,      do: defstruct type: :ul_ol, bullet: "* or - or empty"
27  defmodule RulerFat,      do: defstruct []
28  defmodule RulerMedium,   do: defstruct []
29  defmodule RulerThin,     do: defstruct []
30  defmodule Text,          do: defstruct content: ""
31  defmodule UnderHeadline, do: defstruct level: 1..2
32
33  @type token :: %Backtix{} | %Blockquote{} | %CodeFence{} | %Headline{} | %IdClose{} | %IdOpen{} | %Indent{} | %ListItem{} | %RulerFat{} | %RulerMedium{} | %RulerThin{} | %Text{} | %UnderHeadline{}
34
35  @type tokens :: list(token)
36  @type t_continuation :: {token, String.t, boolean()}
37
38
39  @spec scan_line( String.t ) :: tokens
40  @doc """
41  Scans a line into a list of tokens
42  """
43  def scan_line line do
44    scan_line_into_tokens( line, [], true )
45    |> Enum.reverse
46  end
47
48  @spec scan_line_into_tokens( String.t, tokens, boolean() ) :: tokens
49  # Empty Line
50  defp scan_line_into_tokens "", [], _beg do
51    []
52  end
53  # Line consumed
54  defp scan_line_into_tokens( "", tokens, _beg), do: tokens
55  # Line not consumed
56  defp scan_line_into_tokens line, tokens, beg do
57    {token, rest, still_at_beg} = scan_next_token( line, beg )
58    scan_line_into_tokens( rest, [token|tokens], still_at_beg )
59  end
60
61  @spec scan_next_token( String.t, boolean ) :: false | t_continuation
62  defp scan_next_token line, beg_of_line
63  defp scan_next_token line, true do
64    cond do
65      Regex.run( @blockquote_rgx, line ) ->
66        {%Blockquote{}, behead(line, 1), false}
67      matches = Regex.run( @list_item_rgx, line) ->
68        [content, ws, bullet] = matches
69        prefixed_with_ws(line, ws) ||
70          {make_list_item(bullet), behead(line,content), false}
71
72      matches = Regex.run( @id_open_rgx, line ) ->
73        [_content, ws, id, rest ] = matches
74        prefixed_with_ws(line, ws) ||
75          {%IdOpen{id: id}, rest, false}
76      _matches = Regex.run( @under_l1_head_rgx, line ) ->
77        {%UnderHeadline{level: 1}, "", false}
78
79      _matches = Regex.run( @under_l2_head_rgx, line ) ->
80        {%UnderHeadline{level: 2}, "", false}
81
82      matches = Regex.run( @code_fence_rgx, line ) ->
83        [_line, ws] = matches
84        prefixed_with_ws(line, ws) ||
85          {%CodeFence{}, behead(line, 3), false}
86
87      matches = Regex.run( @indent_rgx, line ) ->
88        count = String.length(hd matches)
89        {%Indent{count: count}, behead(line, count), false}
90
91      matches = Regex.run( @headline_rgx, line ) ->
92        [_line, levelstr, _ws, rest] = matches
93        {%Headline{level: String.length(levelstr)}, rest, false}
94
95      matches =  Regex.run( @ruler_rgx, line ) ->
96        [_content, type] = matches
97        {make_ruler_from(type), "", false}
98
99      true ->
100        scan_next_token( line, false )
101    end
102  end
103  defp scan_next_token line, false do
104    scan_token_not_at_beg( line )
105    |> Tuple.append( false )
106  end
107
108  @spec scan_token_not_at_beg( String.t ) :: {} | t_continuation
109  defp scan_token_not_at_beg line do
110    cond do
111      matches = Regex.run( @backtix_rgx, line ) ->
112        [_line, backtix, rest] = matches
113        {%Backtix{count: String.length(backtix)}, rest}
114      # matches = Regex.run( @id_close_rgx, line ) ->
115      #   [text, id, does_open] = matches
116      #   {%IdDef{id: id, type:
117      #      (if does_open == "", do: "close", else: "open")
118      #     }, behead(line, text)}
119      matches = Regex.run( @text_rgx, line ) ->
120        text = hd matches
121        {%Text{content: text}, behead(line, text)}
122      true -> {}
123    end
124  end
125
126  @spec make_ruler_from( String.t ) :: token
127  defp make_ruler_from type do
128    case type do
129      "*" -> %RulerFat{}
130      "_" -> %RulerMedium{}
131      "-" -> %RulerThin{}
132    end
133  end
134
135  @spec make_list_item( String.t ) :: %ListItem{}
136  defp make_list_item bullet do
137    case bullet do
138      "*" -> %ListItem{type: :ul, bullet: "*"}
139      "-" -> %ListItem{type: :ul, bullet: "-"}
140      _   -> %ListItem{type: :ol, bullet: ""}
141    end
142  end
143
144  @spec prefixed_with_ws(String.t, String.t) :: false | { %Text{}, String.t, true}
145  defp prefixed_with_ws line, ws do
146    if ws == "" do
147      false
148    else
149      rest = behead( line, ws )
150      {%Text{content: ws}, rest, true}
151    end
152  end
153end
154
155# SPDX-License-Identifier: Apache-2.0
156