1 // FIXME: we have some long lines that could be refactored, but it's not a big deal.
2 // ignore-tidy-linelength
3 
4 use regex::{Captures, Regex};
5 use std::collections::HashMap;
6 use std::io;
7 use std::io::{Read, Write};
8 
main()9 fn main() {
10     write_md(parse_links(parse_references(read_md())));
11 }
12 
read_md() -> String13 fn read_md() -> String {
14     let mut buffer = String::new();
15     match io::stdin().read_to_string(&mut buffer) {
16         Ok(_) => buffer,
17         Err(error) => panic!("{}", error),
18     }
19 }
20 
write_md(output: String)21 fn write_md(output: String) {
22     write!(io::stdout(), "{}", output).unwrap();
23 }
24 
parse_references(buffer: String) -> (String, HashMap<String, String>)25 fn parse_references(buffer: String) -> (String, HashMap<String, String>) {
26     let mut ref_map = HashMap::new();
27     // FIXME: currently doesn't handle "title" in following line.
28     let re = Regex::new(r###"(?m)\n?^ {0,3}\[([^]]+)\]:[[:blank:]]*(.*)$"###)
29         .unwrap();
30     let output = re
31         .replace_all(&buffer, |caps: &Captures<'_>| {
32             let key = caps.get(1).unwrap().as_str().to_uppercase();
33             let val = caps.get(2).unwrap().as_str().to_string();
34             if ref_map.insert(key, val).is_some() {
35                 panic!(
36                     "Did not expect markdown page to have duplicate reference"
37                 );
38             }
39             "".to_string()
40         })
41         .to_string();
42     (output, ref_map)
43 }
44 
parse_links((buffer, ref_map): (String, HashMap<String, String>)) -> String45 fn parse_links((buffer, ref_map): (String, HashMap<String, String>)) -> String {
46     // FIXME: check which punctuation is allowed by spec.
47     let re = Regex::new(r###"(?:(?P<pre>(?:```(?:[^`]|`[^`])*`?\n```\n)|(?:[^\[]`[^`\n]+[\n]?[^`\n]*`))|(?:\[(?P<name>[^]]+)\](?:(?:\([[:blank:]]*(?P<val>[^")]*[^ ])(?:[[:blank:]]*"[^"]*")?\))|(?:\[(?P<key>[^]]*)\]))?))"###).expect("could not create regex");
48     let error_code =
49         Regex::new(r###"^E\d{4}$"###).expect("could not create regex");
50     let output = re.replace_all(&buffer, |caps: &Captures<'_>| {
51         match caps.name("pre") {
52             Some(pre_section) => format!("{}", pre_section.as_str()),
53             None => {
54                 let name = caps.name("name").expect("could not get name").as_str();
55                 // Really we should ignore text inside code blocks,
56                 // this is a hack to not try to treat `#[derive()]`,
57                 // `[profile]`, `[test]`, or `[E\d\d\d\d]` like a link.
58                 if name.starts_with("derive(") ||
59                    name.starts_with("profile") ||
60                    name.starts_with("test") ||
61                    name.starts_with("no_mangle") ||
62                    error_code.is_match(name) {
63                     return name.to_string()
64                 }
65 
66                 let val = match caps.name("val") {
67                     // `[name](link)`
68                     Some(value) => value.as_str().to_string(),
69                     None => {
70                         match caps.name("key") {
71                             Some(key) => {
72                                 match key.as_str() {
73                                     // `[name][]`
74                                     "" => format!("{}", ref_map.get(&name.to_uppercase()).expect(&format!("could not find url for the link text `{}`", name))),
75                                     // `[name][reference]`
76                                     _ => format!("{}", ref_map.get(&key.as_str().to_uppercase()).expect(&format!("could not find url for the link text `{}`", key.as_str()))),
77                                 }
78                             }
79                             // `[name]` as reference
80                             None => format!("{}", ref_map.get(&name.to_uppercase()).expect(&format!("could not find url for the link text `{}`", name))),
81                         }
82                     }
83                 };
84                 format!("{} at *{}*", name, val)
85             }
86         }
87     });
88     output.to_string()
89 }
90 
91 #[cfg(test)]
92 mod tests {
parse(source: String) -> String93     fn parse(source: String) -> String {
94         super::parse_links(super::parse_references(source))
95     }
96 
97     #[test]
parses_inline_link()98     fn parses_inline_link() {
99         let source =
100             r"This is a [link](http://google.com) that should be expanded"
101                 .to_string();
102         let target =
103             r"This is a link at *http://google.com* that should be expanded"
104                 .to_string();
105         assert_eq!(parse(source), target);
106     }
107 
108     #[test]
parses_multiline_links()109     fn parses_multiline_links() {
110         let source = r"This is a [link](http://google.com) that
111 should appear expanded. Another [location](/here/) and [another](http://gogogo)"
112             .to_string();
113         let target = r"This is a link at *http://google.com* that
114 should appear expanded. Another location at */here/* and another at *http://gogogo*"
115             .to_string();
116         assert_eq!(parse(source), target);
117     }
118 
119     #[test]
parses_reference()120     fn parses_reference() {
121         let source = r"This is a [link][theref].
122 [theref]: http://example.com/foo
123 more text"
124             .to_string();
125         let target = r"This is a link at *http://example.com/foo*.
126 more text"
127             .to_string();
128         assert_eq!(parse(source), target);
129     }
130 
131     #[test]
parses_implicit_link()132     fn parses_implicit_link() {
133         let source = r"This is an [implicit][] link.
134 [implicit]: /The Link/"
135             .to_string();
136         let target = r"This is an implicit at */The Link/* link.".to_string();
137         assert_eq!(parse(source), target);
138     }
139     #[test]
parses_refs_with_one_space_indentation()140     fn parses_refs_with_one_space_indentation() {
141         let source = r"This is a [link][ref]
142  [ref]: The link"
143             .to_string();
144         let target = r"This is a link at *The link*".to_string();
145         assert_eq!(parse(source), target);
146     }
147 
148     #[test]
parses_refs_with_two_space_indentation()149     fn parses_refs_with_two_space_indentation() {
150         let source = r"This is a [link][ref]
151   [ref]: The link"
152             .to_string();
153         let target = r"This is a link at *The link*".to_string();
154         assert_eq!(parse(source), target);
155     }
156 
157     #[test]
parses_refs_with_three_space_indentation()158     fn parses_refs_with_three_space_indentation() {
159         let source = r"This is a [link][ref]
160    [ref]: The link"
161             .to_string();
162         let target = r"This is a link at *The link*".to_string();
163         assert_eq!(parse(source), target);
164     }
165 
166     #[test]
167     #[should_panic]
rejects_refs_with_four_space_indentation()168     fn rejects_refs_with_four_space_indentation() {
169         let source = r"This is a [link][ref]
170     [ref]: The link"
171             .to_string();
172         let target = r"This is a link at *The link*".to_string();
173         assert_eq!(parse(source), target);
174     }
175 
176     #[test]
ignores_optional_inline_title()177     fn ignores_optional_inline_title() {
178         let source =
179             r###"This is a titled [link](http://example.com "My title")."###
180                 .to_string();
181         let target =
182             r"This is a titled link at *http://example.com*.".to_string();
183         assert_eq!(parse(source), target);
184     }
185 
186     #[test]
parses_title_with_puctuation()187     fn parses_title_with_puctuation() {
188         let source =
189             r###"[link](http://example.com "It's Title")"###.to_string();
190         let target = r"link at *http://example.com*".to_string();
191         assert_eq!(parse(source), target);
192     }
193 
194     #[test]
parses_name_with_punctuation()195     fn parses_name_with_punctuation() {
196         let source = r###"[I'm here](there)"###.to_string();
197         let target = r###"I'm here at *there*"###.to_string();
198         assert_eq!(parse(source), target);
199     }
200     #[test]
parses_name_with_utf8()201     fn parses_name_with_utf8() {
202         let source = r###"[user’s forum](the user’s forum)"###.to_string();
203         let target = r###"user’s forum at *the user’s forum*"###.to_string();
204         assert_eq!(parse(source), target);
205     }
206 
207     #[test]
parses_reference_with_punctuation()208     fn parses_reference_with_punctuation() {
209         let source = r###"[link][the ref-ref]
210 [the ref-ref]:http://example.com/ref-ref"###
211             .to_string();
212         let target = r###"link at *http://example.com/ref-ref*"###.to_string();
213         assert_eq!(parse(source), target);
214     }
215 
216     #[test]
parses_reference_case_insensitively()217     fn parses_reference_case_insensitively() {
218         let source = r"[link][Ref]
219 [ref]: The reference"
220             .to_string();
221         let target = r"link at *The reference*".to_string();
222         assert_eq!(parse(source), target);
223     }
224     #[test]
parses_link_as_reference_when_reference_is_empty()225     fn parses_link_as_reference_when_reference_is_empty() {
226         let source = r"[link as reference][]
227 [link as reference]: the actual reference"
228             .to_string();
229         let target = r"link as reference at *the actual reference*".to_string();
230         assert_eq!(parse(source), target);
231     }
232 
233     #[test]
parses_link_without_reference_as_reference()234     fn parses_link_without_reference_as_reference() {
235         let source = r"[link] is alone
236 [link]: The contents"
237             .to_string();
238         let target = r"link at *The contents* is alone".to_string();
239         assert_eq!(parse(source), target);
240     }
241 
242     #[test]
243     #[ignore]
parses_link_without_reference_as_reference_with_asterisks()244     fn parses_link_without_reference_as_reference_with_asterisks() {
245         let source = r"*[link]* is alone
246 [link]: The contents"
247             .to_string();
248         let target = r"*link* at *The contents* is alone".to_string();
249         assert_eq!(parse(source), target);
250     }
251     #[test]
ignores_links_in_pre_sections()252     fn ignores_links_in_pre_sections() {
253         let source = r###"```toml
254 [package]
255 name = "hello_cargo"
256 version = "0.1.0"
257 
258 [dependencies]
259 ```
260 "###
261         .to_string();
262         let target = source.clone();
263         assert_eq!(parse(source), target);
264     }
265 
266     #[test]
ignores_links_in_quoted_sections()267     fn ignores_links_in_quoted_sections() {
268         let source = r###"do not change `[package]`."###.to_string();
269         let target = source.clone();
270         assert_eq!(parse(source), target);
271     }
272     #[test]
ignores_links_in_quoted_sections_containing_newlines()273     fn ignores_links_in_quoted_sections_containing_newlines() {
274         let source = r"do not change `this [package]
275 is still here` [link](ref)"
276             .to_string();
277         let target = r"do not change `this [package]
278 is still here` link at *ref*"
279             .to_string();
280         assert_eq!(parse(source), target);
281     }
282 
283     #[test]
ignores_links_in_pre_sections_while_still_handling_links()284     fn ignores_links_in_pre_sections_while_still_handling_links() {
285         let source = r###"```toml
286 [package]
287 name = "hello_cargo"
288 version = "0.1.0"
289 
290 [dependencies]
291 ```
292 Another [link]
293 more text
294 [link]: http://gohere
295 "###
296         .to_string();
297         let target = r###"```toml
298 [package]
299 name = "hello_cargo"
300 version = "0.1.0"
301 
302 [dependencies]
303 ```
304 Another link at *http://gohere*
305 more text
306 "###
307         .to_string();
308         assert_eq!(parse(source), target);
309     }
310     #[test]
ignores_quotes_in_pre_sections()311     fn ignores_quotes_in_pre_sections() {
312         let source = r###"```bash
313 $ cargo build
314    Compiling guessing_game v0.1.0 (file:///projects/guessing_game)
315 src/main.rs:23:21: 23:35 error: mismatched types [E0308]
316 src/main.rs:23     match guess.cmp(&secret_number) {
317                                    ^~~~~~~~~~~~~~
318 src/main.rs:23:21: 23:35 help: run `rustc --explain E0308` to see a detailed explanation
319 src/main.rs:23:21: 23:35 note: expected type `&std::string::String`
320 src/main.rs:23:21: 23:35 note:    found type `&_`
321 error: aborting due to previous error
322 Could not compile `guessing_game`.
323 ```
324 "###
325             .to_string();
326         let target = source.clone();
327         assert_eq!(parse(source), target);
328     }
329     #[test]
ignores_short_quotes()330     fn ignores_short_quotes() {
331         let source = r"to `1` at index `[0]` i".to_string();
332         let target = source.clone();
333         assert_eq!(parse(source), target);
334     }
335     #[test]
ignores_pre_sections_with_final_quote()336     fn ignores_pre_sections_with_final_quote() {
337         let source = r###"```bash
338 $ cargo run
339    Compiling points v0.1.0 (file:///projects/points)
340 error: the trait bound `Point: std::fmt::Display` is not satisfied [--explain E0277]
341  --> src/main.rs:8:29
342 8 |>     println!("Point 1: {}", p1);
343   |>                             ^^
344 <std macros>:2:27: 2:58: note: in this expansion of format_args!
345 <std macros>:3:1: 3:54: note: in this expansion of print! (defined in <std macros>)
346 src/main.rs:8:5: 8:33: note: in this expansion of println! (defined in <std macros>)
347 note: `Point` cannot be formatted with the default formatter; try using `:?` instead if you are using a format string
348 note: required by `std::fmt::Display::fmt`
349 ```
350 `here` is another [link](the ref)
351 "###.to_string();
352         let target = r###"```bash
353 $ cargo run
354    Compiling points v0.1.0 (file:///projects/points)
355 error: the trait bound `Point: std::fmt::Display` is not satisfied [--explain E0277]
356  --> src/main.rs:8:29
357 8 |>     println!("Point 1: {}", p1);
358   |>                             ^^
359 <std macros>:2:27: 2:58: note: in this expansion of format_args!
360 <std macros>:3:1: 3:54: note: in this expansion of print! (defined in <std macros>)
361 src/main.rs:8:5: 8:33: note: in this expansion of println! (defined in <std macros>)
362 note: `Point` cannot be formatted with the default formatter; try using `:?` instead if you are using a format string
363 note: required by `std::fmt::Display::fmt`
364 ```
365 `here` is another link at *the ref*
366 "###.to_string();
367         assert_eq!(parse(source), target);
368     }
369     #[test]
parses_adam_p_cheatsheet()370     fn parses_adam_p_cheatsheet() {
371         let source = r###"[I'm an inline-style link](https://www.google.com)
372 
373 [I'm an inline-style link with title](https://www.google.com "Google's Homepage")
374 
375 [I'm a reference-style link][Arbitrary case-insensitive reference text]
376 
377 [I'm a relative reference to a repository file](../blob/master/LICENSE)
378 
379 [You can use numbers for reference-style link definitions][1]
380 
381 Or leave it empty and use the [link text itself][].
382 
383 URLs and URLs in angle brackets will automatically get turned into links.
384 http://www.example.com or <http://www.example.com> and sometimes
385 example.com (but not on Github, for example).
386 
387 Some text to show that the reference links can follow later.
388 
389 [arbitrary case-insensitive reference text]: https://www.mozilla.org
390 [1]: http://slashdot.org
391 [link text itself]: http://www.reddit.com"###
392             .to_string();
393 
394         let target = r###"I'm an inline-style link at *https://www.google.com*
395 
396 I'm an inline-style link with title at *https://www.google.com*
397 
398 I'm a reference-style link at *https://www.mozilla.org*
399 
400 I'm a relative reference to a repository file at *../blob/master/LICENSE*
401 
402 You can use numbers for reference-style link definitions at *http://slashdot.org*
403 
404 Or leave it empty and use the link text itself at *http://www.reddit.com*.
405 
406 URLs and URLs in angle brackets will automatically get turned into links.
407 http://www.example.com or <http://www.example.com> and sometimes
408 example.com (but not on Github, for example).
409 
410 Some text to show that the reference links can follow later.
411 "###
412             .to_string();
413         assert_eq!(parse(source), target);
414     }
415 }
416