1package search
2
3import (
4	"context"
5	"fmt"
6	"regexp"
7	"strings"
8
9	"github.com/araddon/dateparse"
10	mapset "github.com/deckarep/golang-set"
11	"github.com/keybase/client/go/chat/globals"
12	"github.com/keybase/client/go/chat/utils"
13	"github.com/keybase/client/go/protocol/chat1"
14	"github.com/keybase/client/go/protocol/gregor1"
15	porterstemmer "github.com/keybase/go-porterstemmer"
16)
17
18// Split on whitespace, punctuation, code and quote markdown separators
19var splitExpr = regexp.MustCompile(`[\s\.,\?!]`)
20
21// Strip the following separators to create tokens
22var stripSeps = []string{
23	// groupings
24	"<", ">",
25	"\\(", "\\)",
26	"\\[", "\\]",
27	"\\{", "\\}",
28	"\"",
29	"'",
30	// phone number delimiter
31	"-",
32	// mentions
33	"@",
34	"#",
35	// markdown
36	"\\*",
37	"_",
38	"~",
39	"`",
40}
41var stripExpr = regexp.MustCompile(strings.Join(stripSeps, "|"))
42
43func prefixes(token string) (res []string) {
44	if len(token) < MinTokenLength {
45		return nil
46	}
47	for i := range token {
48		if i < MinTokenLength {
49			continue
50		}
51		// Skip any prefixes longer than `maxPrefixLength` to limit the index size.
52		if i > maxPrefixLength {
53			break
54		}
55		res = append(res, token[:i])
56	}
57	return res
58}
59
60type tokenMap map[string]map[string]chat1.EmptyStruct
61
62// getIndexTokens splits the content of the given message on whitespace and
63// special characters returning a map of tokens to aliases  normalized to lowercase.
64func tokenize(msgText string) tokenMap {
65	if msgText == "" {
66		return nil
67	}
68
69	// split the message text up on basic punctuation/spaces
70	tokens := splitExpr.Split(msgText, -1)
71	tokenMap := tokenMap{}
72	for _, token := range tokens {
73		if len(token) < MinTokenLength {
74			continue
75		}
76
77		token = strings.ToLower(token)
78		if _, ok := tokenMap[token]; !ok {
79			tokenMap[token] = map[string]chat1.EmptyStruct{}
80		}
81
82		// strip separators to raw tokens which we count as an alias to the
83		// original token
84		stripped := stripExpr.Split(token, -1)
85		for _, s := range stripped {
86			if s == "" {
87				continue
88			}
89			tokenMap[token][s] = chat1.EmptyStruct{}
90
91			// add the stem as an alias
92			stemmed := porterstemmer.StemWithoutLowerCasing([]rune(s))
93			tokenMap[token][string(stemmed)] = chat1.EmptyStruct{}
94
95			// calculate prefixes to alias to the token
96			for _, prefix := range prefixes(s) {
97				tokenMap[token][prefix] = chat1.EmptyStruct{}
98			}
99		}
100		// drop the original token from the set of aliases
101		delete(tokenMap[token], token)
102	}
103	return tokenMap
104}
105
106func tokensFromMsg(msg chat1.MessageUnboxed) tokenMap {
107	return tokenize(msg.SearchableText())
108}
109
110func msgIDsFromSet(set mapset.Set) []chat1.MessageID {
111	if set == nil {
112		return nil
113	}
114	msgIDSlice := []chat1.MessageID{}
115	for _, el := range set.ToSlice() {
116		msgID, ok := el.(chat1.MessageID)
117		if ok {
118			msgIDSlice = append(msgIDSlice, msgID)
119		}
120	}
121	return msgIDSlice
122}
123
124func searchMatches(msg chat1.MessageUnboxed, queryRe *regexp.Regexp) (validMatches []chat1.ChatSearchMatch) {
125	msgText := msg.SearchableText()
126	matches := queryRe.FindAllStringIndex(msgText, -1)
127	for _, m := range matches {
128		if len(m) != 2 {
129			// sanity check but regex package should always return a two
130			// element slice
131			continue
132		}
133		startIndex := m[0]
134		endIndex := m[1]
135		if startIndex != endIndex {
136			validMatches = append(validMatches, chat1.ChatSearchMatch{
137				StartIndex: startIndex,
138				EndIndex:   endIndex,
139				Match:      msgText[startIndex:endIndex],
140			})
141		}
142	}
143	return validMatches
144}
145
146// Order messages ascending by ID for presentation
147func getUIMsgs(ctx context.Context, g *globals.Context, convID chat1.ConversationID,
148	uid gregor1.UID, msgs []chat1.MessageUnboxed) (uiMsgs []chat1.UIMessage) {
149	for i := len(msgs) - 1; i >= 0; i-- {
150		msg := msgs[i]
151		uiMsg := utils.PresentMessageUnboxed(ctx, g, msg, uid, convID)
152		uiMsgs = append(uiMsgs, uiMsg)
153	}
154	return uiMsgs
155}
156
157const beforeFilter = "before:"
158const afterFilter = "after:"
159const fromFilter = "from:"
160const toFilter = "to:"
161
162var senderRegex = regexp.MustCompile(fmt.Sprintf(
163	"(%s|%s)(@?[a-z0-9][a-z0-9_]+)", fromFilter, toFilter))
164var dateRangeRegex = regexp.MustCompile(fmt.Sprintf(
165	`(%s|%s)(\d{1,4}[-/\.]+\d{1,2}[-/\.]+\d{1,4})`, beforeFilter, afterFilter))
166
167func UpgradeSearchOptsFromQuery(query string, opts chat1.SearchOpts, username string) (string, chat1.SearchOpts) {
168	query = strings.Trim(query, " ")
169	var hasQueryOpts bool
170
171	// To/From
172	matches := senderRegex.FindAllStringSubmatch(query, 2)
173	for _, match := range matches {
174		// [fullMatch, filter, sender]
175		if len(match) != 3 {
176			continue
177		}
178		hasQueryOpts = true
179		query = strings.TrimSpace(strings.Replace(query, match[0], "", 1))
180		sender := strings.TrimSpace(strings.Replace(match[2], "@", "", -1))
181		if sender == "me" {
182			sender = username
183		}
184		switch match[1] {
185		case fromFilter:
186			opts.SentBy = sender
187		case toFilter:
188			opts.SentTo = sender
189		}
190	}
191	if opts.SentTo == username {
192		opts.MatchMentions = true
193	}
194
195	matches = dateRangeRegex.FindAllStringSubmatch(query, 2)
196	for _, match := range matches {
197		// [fullMatch, filter, dateRange]
198		if len(match) != 3 {
199			continue
200		}
201		hasQueryOpts = true
202		query = strings.TrimSpace(strings.Replace(query, match[0], "", 1))
203		time, err := dateparse.ParseAny(strings.TrimSpace(match[2]))
204		if err != nil {
205			continue
206		}
207
208		gtime := gregor1.ToTime(time)
209		switch match[1] {
210		case beforeFilter:
211			opts.SentBefore = gtime
212		case afterFilter:
213			opts.SentAfter = gtime
214		}
215	}
216
217	if hasQueryOpts && len(query) == 0 {
218		query = "/.*/"
219	}
220	// IsRegex
221	if len(query) > 2 && query[0] == '/' && query[len(query)-1] == '/' {
222		query = query[1 : len(query)-1]
223		opts.IsRegex = true
224	}
225	return query, opts
226}
227
228func MinMaxIDs(conv chat1.Conversation) (min, max chat1.MessageID) {
229	// lowest msgID we care about
230	min = conv.GetMaxDeletedUpTo()
231	if min == 0 {
232		min = 1
233	}
234	// highest msgID we care about
235	max = conv.GetMaxMessageID()
236	return min, max
237}
238