1defmodule Earmark.Scanner do 2 3 import Earmark.Helpers.StringHelpers, only: [behead: 2] 4 5 @backtix_rgx ~r/\A(`+)(.*)/ 6 @blockquote_rgx ~r/\A>(?!\S)/ 7 @code_fence_rgx ~r/\A(\s*)~~~/ 8 @headline_rgx ~r/\A(\#{1,6})(\s+)(.*)/ 9# @id_close_rgx ~r/\[(.*?)\](?!:)/ 10 @id_open_rgx ~r/\A(\s{0,3})\[(.*?)\]:\s+(.*)\z/ 11 @indent_rgx ~r/\A\s{4,}/ 12 @list_item_rgx ~r/\A(\s{0,3})(\d+\.|\*|-)\s+/ 13 @ruler_rgx ~r/\A \s{0,3} (?:([-_*])\s?)(?:\1\s?){2,} \z/x 14 @under_l1_head_rgx ~r/\A=+\s*\z/ 15 @under_l2_head_rgx ~r/\A-{1,2}\s*\z/ 16 17 @text_rgx ~r/(?:[^`]|\\`)*/ 18 19 defmodule Backtix, do: defstruct count: 1 20 defmodule Blockquote, do: defstruct [] 21 defmodule CodeFence, do: defstruct [] 22 defmodule Headline, do: defstruct level: 1..6 23 defmodule IdClose, do: defstruct id: "content of [...]" 24 defmodule IdOpen, do: defstruct id: "content of [...]", href: "word after ]:\\s+" 25 defmodule Indent, do: defstruct count: 4 26 defmodule ListItem, do: defstruct type: :ul_ol, bullet: "* or - or empty" 27 defmodule RulerFat, do: defstruct [] 28 defmodule RulerMedium, do: defstruct [] 29 defmodule RulerThin, do: defstruct [] 30 defmodule Text, do: defstruct content: "" 31 defmodule UnderHeadline, do: defstruct level: 1..2 32 33 @type token :: %Backtix{} | %Blockquote{} | %CodeFence{} | %Headline{} | %IdClose{} | %IdOpen{} | %Indent{} | %ListItem{} | %RulerFat{} | %RulerMedium{} | %RulerThin{} | %Text{} | %UnderHeadline{} 34 35 @type tokens :: list(token) 36 @type t_continuation :: {token, String.t, boolean()} 37 38 39 @spec scan_line( String.t ) :: tokens 40 @doc """ 41 Scans a line into a list of tokens 42 """ 43 def scan_line line do 44 scan_line_into_tokens( line, [], true ) 45 |> Enum.reverse 46 end 47 48 @spec scan_line_into_tokens( String.t, tokens, boolean() ) :: tokens 49 # Empty Line 50 defp scan_line_into_tokens "", [], _beg do 51 [] 52 end 53 # Line consumed 54 defp scan_line_into_tokens( "", tokens, _beg), do: tokens 55 # Line not consumed 56 defp scan_line_into_tokens line, tokens, beg do 57 {token, rest, still_at_beg} = scan_next_token( line, beg ) 58 scan_line_into_tokens( rest, [token|tokens], still_at_beg ) 59 end 60 61 @spec scan_next_token( String.t, boolean ) :: false | t_continuation 62 defp scan_next_token line, beg_of_line 63 defp scan_next_token line, true do 64 cond do 65 Regex.run( @blockquote_rgx, line ) -> 66 {%Blockquote{}, behead(line, 1), false} 67 matches = Regex.run( @list_item_rgx, line) -> 68 [content, ws, bullet] = matches 69 prefixed_with_ws(line, ws) || 70 {make_list_item(bullet), behead(line,content), false} 71 72 matches = Regex.run( @id_open_rgx, line ) -> 73 [_content, ws, id, rest ] = matches 74 prefixed_with_ws(line, ws) || 75 {%IdOpen{id: id}, rest, false} 76 _matches = Regex.run( @under_l1_head_rgx, line ) -> 77 {%UnderHeadline{level: 1}, "", false} 78 79 _matches = Regex.run( @under_l2_head_rgx, line ) -> 80 {%UnderHeadline{level: 2}, "", false} 81 82 matches = Regex.run( @code_fence_rgx, line ) -> 83 [_line, ws] = matches 84 prefixed_with_ws(line, ws) || 85 {%CodeFence{}, behead(line, 3), false} 86 87 matches = Regex.run( @indent_rgx, line ) -> 88 count = String.length(hd matches) 89 {%Indent{count: count}, behead(line, count), false} 90 91 matches = Regex.run( @headline_rgx, line ) -> 92 [_line, levelstr, _ws, rest] = matches 93 {%Headline{level: String.length(levelstr)}, rest, false} 94 95 matches = Regex.run( @ruler_rgx, line ) -> 96 [_content, type] = matches 97 {make_ruler_from(type), "", false} 98 99 true -> 100 scan_next_token( line, false ) 101 end 102 end 103 defp scan_next_token line, false do 104 scan_token_not_at_beg( line ) 105 |> Tuple.append( false ) 106 end 107 108 @spec scan_token_not_at_beg( String.t ) :: {} | t_continuation 109 defp scan_token_not_at_beg line do 110 cond do 111 matches = Regex.run( @backtix_rgx, line ) -> 112 [_line, backtix, rest] = matches 113 {%Backtix{count: String.length(backtix)}, rest} 114 # matches = Regex.run( @id_close_rgx, line ) -> 115 # [text, id, does_open] = matches 116 # {%IdDef{id: id, type: 117 # (if does_open == "", do: "close", else: "open") 118 # }, behead(line, text)} 119 matches = Regex.run( @text_rgx, line ) -> 120 text = hd matches 121 {%Text{content: text}, behead(line, text)} 122 true -> {} 123 end 124 end 125 126 @spec make_ruler_from( String.t ) :: token 127 defp make_ruler_from type do 128 case type do 129 "*" -> %RulerFat{} 130 "_" -> %RulerMedium{} 131 "-" -> %RulerThin{} 132 end 133 end 134 135 @spec make_list_item( String.t ) :: %ListItem{} 136 defp make_list_item bullet do 137 case bullet do 138 "*" -> %ListItem{type: :ul, bullet: "*"} 139 "-" -> %ListItem{type: :ul, bullet: "-"} 140 _ -> %ListItem{type: :ol, bullet: ""} 141 end 142 end 143 144 @spec prefixed_with_ws(String.t, String.t) :: false | { %Text{}, String.t, true} 145 defp prefixed_with_ws line, ws do 146 if ws == "" do 147 false 148 else 149 rest = behead( line, ws ) 150 {%Text{content: ws}, rest, true} 151 end 152 end 153end 154 155# SPDX-License-Identifier: Apache-2.0 156