1/* 2 * Copyright © 2018-2020 A Bunch Tell LLC. 3 * 4 * This file is part of WriteFreely. 5 * 6 * WriteFreely is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU Affero General Public License, included 8 * in the LICENSE file in this source code package. 9 */ 10 11// Package parse assists in the parsing of plain text posts 12package parse 13 14import ( 15 "github.com/writeas/web-core/stringmanip" 16 "regexp" 17 "strings" 18) 19 20var ( 21 titleElementReg = regexp.MustCompile("</?p>") 22 urlReg = regexp.MustCompile("https?://") 23 imgReg = regexp.MustCompile(`!\[([^]]+)\]\([^)]+\)`) 24) 25 26// PostLede attempts to extract the first thought of the given post, generally 27// contained within the first line or sentence of text. 28func PostLede(t string, includePunc bool) string { 29 // Adjust where we truncate if we want to include punctuation 30 iAdj := 0 31 if includePunc { 32 iAdj = 1 33 } 34 35 // Find lede within first line of text 36 nl := strings.IndexRune(t, '\n') 37 if nl > -1 { 38 t = t[:nl] 39 } 40 41 // Strip certain HTML tags 42 t = titleElementReg.ReplaceAllString(t, "") 43 44 // Strip URL protocols 45 t = urlReg.ReplaceAllString(t, "") 46 47 // Strip image URL, leaving only alt text 48 t = imgReg.ReplaceAllString(t, " $1 ") 49 50 // Find lede within first sentence 51 punc := strings.Index(t, ". ") 52 if punc > -1 { 53 t = t[:punc+iAdj] 54 } 55 punc = stringmanip.IndexRune(t, '。') 56 if punc > -1 { 57 c := []rune(t) 58 t = string(c[:punc+iAdj]) 59 } 60 punc = stringmanip.IndexRune(t, '?') 61 if punc > -1 { 62 c := []rune(t) 63 t = string(c[:punc+iAdj]) 64 } 65 66 return t 67} 68 69// TruncToWord truncates the given text to the provided limit. 70func TruncToWord(s string, l int) (string, bool) { 71 truncated := false 72 c := []rune(s) 73 if len(c) > l { 74 truncated = true 75 s = string(c[:l]) 76 spaceIdx := strings.LastIndexByte(s, ' ') 77 if spaceIdx > -1 { 78 s = s[:spaceIdx] 79 } 80 } 81 return s, truncated 82} 83