1/*
2 * Copyright © 2018-2020 A Bunch Tell LLC.
3 *
4 * This file is part of WriteFreely.
5 *
6 * WriteFreely is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Affero General Public License, included
8 * in the LICENSE file in this source code package.
9 */
10
11// Package parse assists in the parsing of plain text posts
12package parse
13
14import (
15	"github.com/writeas/web-core/stringmanip"
16	"regexp"
17	"strings"
18)
19
20var (
21	titleElementReg = regexp.MustCompile("</?p>")
22	urlReg          = regexp.MustCompile("https?://")
23	imgReg          = regexp.MustCompile(`!\[([^]]+)\]\([^)]+\)`)
24)
25
26// PostLede attempts to extract the first thought of the given post, generally
27// contained within the first line or sentence of text.
28func PostLede(t string, includePunc bool) string {
29	// Adjust where we truncate if we want to include punctuation
30	iAdj := 0
31	if includePunc {
32		iAdj = 1
33	}
34
35	// Find lede within first line of text
36	nl := strings.IndexRune(t, '\n')
37	if nl > -1 {
38		t = t[:nl]
39	}
40
41	// Strip certain HTML tags
42	t = titleElementReg.ReplaceAllString(t, "")
43
44	// Strip URL protocols
45	t = urlReg.ReplaceAllString(t, "")
46
47	// Strip image URL, leaving only alt text
48	t = imgReg.ReplaceAllString(t, " $1 ")
49
50	// Find lede within first sentence
51	punc := strings.Index(t, ". ")
52	if punc > -1 {
53		t = t[:punc+iAdj]
54	}
55	punc = stringmanip.IndexRune(t, '。')
56	if punc > -1 {
57		c := []rune(t)
58		t = string(c[:punc+iAdj])
59	}
60	punc = stringmanip.IndexRune(t, '?')
61	if punc > -1 {
62		c := []rune(t)
63		t = string(c[:punc+iAdj])
64	}
65
66	return t
67}
68
69// TruncToWord truncates the given text to the provided limit.
70func TruncToWord(s string, l int) (string, bool) {
71	truncated := false
72	c := []rune(s)
73	if len(c) > l {
74		truncated = true
75		s = string(c[:l])
76		spaceIdx := strings.LastIndexByte(s, ' ')
77		if spaceIdx > -1 {
78			s = s[:spaceIdx]
79		}
80	}
81	return s, truncated
82}
83