1 #include "database-private.h"
2 
3 /* Parse a References header value, putting a (talloc'ed under 'ctx')
4  * copy of each referenced message-id into 'hash'.
5  *
6  * We explicitly avoid including any reference identical to
7  * 'message_id' in the result (to avoid mass confusion when a single
8  * message references itself cyclically---and yes, mail messages are
9  * not infrequent in the wild that do this---don't ask me why).
10  *
11  * Return the last reference parsed, if it is not equal to message_id.
12  */
13 static char *
parse_references(void * ctx,const char * message_id,GHashTable * hash,const char * refs)14 parse_references (void *ctx,
15 		  const char *message_id,
16 		  GHashTable *hash,
17 		  const char *refs)
18 {
19     char *ref, *last_ref = NULL;
20 
21     if (refs == NULL || *refs == '\0')
22 	return NULL;
23 
24     while (*refs) {
25 	ref = _notmuch_message_id_parse (ctx, refs, &refs);
26 
27 	if (ref && strcmp (ref, message_id)) {
28 	    g_hash_table_add (hash, ref);
29 	    last_ref = ref;
30 	}
31     }
32 
33     /* The return value of this function is used to add a parent
34      * reference to the database.  We should avoid making a message
35      * its own parent, thus the above check.
36      */
37     return talloc_strdup (ctx, last_ref);
38 }
39 
40 static const char *
_notmuch_database_generate_thread_id(notmuch_database_t * notmuch)41 _notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
42 {
43 
44     notmuch->last_thread_id++;
45 
46     sprintf (notmuch->thread_id_str, "%016" PRIx64, notmuch->last_thread_id);
47 
48     notmuch->writable_xapian_db->set_metadata ("last_thread_id", notmuch->thread_id_str);
49 
50     return notmuch->thread_id_str;
51 }
52 
53 static char *
_get_metadata_thread_id_key(void * ctx,const char * message_id)54 _get_metadata_thread_id_key (void *ctx, const char *message_id)
55 {
56     if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
57 	message_id = _notmuch_message_id_compressed (ctx, message_id);
58 
59     return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s",
60 			    message_id);
61 }
62 
63 
64 static notmuch_status_t
65 _resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
66 				      void *ctx,
67 				      const char *message_id,
68 				      const char **thread_id_ret);
69 
70 
71 /* Find the thread ID to which the message with 'message_id' belongs.
72  *
73  * Note: 'thread_id_ret' must not be NULL!
74  * On success '*thread_id_ret' is set to a newly talloced string belonging to
75  * 'ctx'.
76  *
77  * Note: If there is no message in the database with the given
78  * 'message_id' then a new thread_id will be allocated for this
79  * message ID and stored in the database metadata so that the
80  * thread ID can be looked up if the message is added to the database
81  * later.
82  */
83 static notmuch_status_t
_resolve_message_id_to_thread_id(notmuch_database_t * notmuch,void * ctx,const char * message_id,const char ** thread_id_ret)84 _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
85 				  void *ctx,
86 				  const char *message_id,
87 				  const char **thread_id_ret)
88 {
89     notmuch_private_status_t status;
90     notmuch_message_t *message;
91 
92     if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS))
93 	return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id,
94 						     thread_id_ret);
95 
96     /* Look for this message (regular or ghost) */
97     message = _notmuch_message_create_for_message_id (
98 	notmuch, message_id, &status);
99     if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) {
100 	/* Message exists */
101 	*thread_id_ret = talloc_steal (
102 	    ctx, notmuch_message_get_thread_id (message));
103     } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
104 	/* Message did not exist.  Give it a fresh thread ID and
105 	 * populate this message as a ghost message. */
106 	*thread_id_ret = talloc_strdup (
107 	    ctx, _notmuch_database_generate_thread_id (notmuch));
108 	if (! *thread_id_ret) {
109 	    status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY;
110 	} else {
111 	    status = _notmuch_message_initialize_ghost (message, *thread_id_ret);
112 	    if (status == 0)
113 		/* Commit the new ghost message */
114 		_notmuch_message_sync (message);
115 	}
116     } else {
117 	/* Create failed. Fall through. */
118     }
119 
120     notmuch_message_destroy (message);
121 
122     return COERCE_STATUS (status, "Error creating ghost message");
123 }
124 
125 /* Pre-ghost messages _resolve_message_id_to_thread_id */
126 static notmuch_status_t
_resolve_message_id_to_thread_id_old(notmuch_database_t * notmuch,void * ctx,const char * message_id,const char ** thread_id_ret)127 _resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
128 				      void *ctx,
129 				      const char *message_id,
130 				      const char **thread_id_ret)
131 {
132     notmuch_status_t status;
133     notmuch_message_t *message;
134     std::string thread_id_string;
135     char *metadata_key;
136     Xapian::WritableDatabase *db;
137 
138     status = notmuch_database_find_message (notmuch, message_id, &message);
139 
140     if (status)
141 	return status;
142 
143     if (message) {
144 	*thread_id_ret = talloc_steal (ctx,
145 				       notmuch_message_get_thread_id (message));
146 
147 	notmuch_message_destroy (message);
148 
149 	return NOTMUCH_STATUS_SUCCESS;
150     }
151 
152     /* Message has not been seen yet.
153      *
154      * We may have seen a reference to it already, in which case, we
155      * can return the thread ID stored in the metadata. Otherwise, we
156      * generate a new thread ID and store it there.
157      */
158     db = notmuch->writable_xapian_db;
159     metadata_key = _get_metadata_thread_id_key (ctx, message_id);
160     thread_id_string = notmuch->xapian_db->get_metadata (metadata_key);
161 
162     if (thread_id_string.empty ()) {
163 	*thread_id_ret = talloc_strdup (ctx,
164 					_notmuch_database_generate_thread_id (notmuch));
165 	db->set_metadata (metadata_key, *thread_id_ret);
166     } else {
167 	*thread_id_ret = talloc_strdup (ctx, thread_id_string.c_str ());
168     }
169 
170     talloc_free (metadata_key);
171 
172     return NOTMUCH_STATUS_SUCCESS;
173 }
174 
175 static notmuch_status_t
_merge_threads(notmuch_database_t * notmuch,const char * winner_thread_id,const char * loser_thread_id)176 _merge_threads (notmuch_database_t *notmuch,
177 		const char *winner_thread_id,
178 		const char *loser_thread_id)
179 {
180     Xapian::PostingIterator loser, loser_end;
181     notmuch_message_t *message = NULL;
182     notmuch_private_status_t private_status;
183     notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
184 
185     _notmuch_database_find_doc_ids (notmuch, "thread", loser_thread_id, &loser, &loser_end);
186 
187     for (; loser != loser_end; loser++) {
188 	message = _notmuch_message_create (notmuch, notmuch,
189 					   *loser, &private_status);
190 	if (message == NULL) {
191 	    ret = COERCE_STATUS (private_status,
192 				 "Cannot find document for doc_id from query");
193 	    goto DONE;
194 	}
195 
196 	_notmuch_message_remove_term (message, "thread", loser_thread_id);
197 	_notmuch_message_add_term (message, "thread", winner_thread_id);
198 	_notmuch_message_sync (message);
199 
200 	notmuch_message_destroy (message);
201 	message = NULL;
202     }
203 
204   DONE:
205     if (message)
206 	notmuch_message_destroy (message);
207 
208     return ret;
209 }
210 
211 static void
_my_talloc_free_for_g_hash(void * ptr)212 _my_talloc_free_for_g_hash (void *ptr)
213 {
214     talloc_free (ptr);
215 }
216 
217 notmuch_status_t
_notmuch_database_link_message_to_parents(notmuch_database_t * notmuch,notmuch_message_t * message,notmuch_message_file_t * message_file,const char ** thread_id)218 _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch,
219 					   notmuch_message_t *message,
220 					   notmuch_message_file_t *message_file,
221 					   const char **thread_id)
222 {
223     GHashTable *parents = NULL;
224     const char *refs, *in_reply_to, *in_reply_to_message_id, *strict_message_id = NULL;
225     const char *last_ref_message_id, *this_message_id;
226     GList *l, *keys = NULL;
227     notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
228 
229     parents = g_hash_table_new_full (g_str_hash, g_str_equal,
230 				     _my_talloc_free_for_g_hash, NULL);
231     this_message_id = notmuch_message_get_message_id (message);
232 
233     refs = _notmuch_message_file_get_header (message_file, "references");
234     last_ref_message_id = parse_references (message,
235 					    this_message_id,
236 					    parents, refs);
237 
238     in_reply_to = _notmuch_message_file_get_header (message_file, "in-reply-to");
239     if (in_reply_to)
240 	strict_message_id = _notmuch_message_id_parse_strict (message,
241 							      in_reply_to);
242 
243     in_reply_to_message_id = parse_references (message,
244 					       this_message_id,
245 					       parents, in_reply_to);
246 
247     /* For the parent of this message, use
248      * 1) the In-Reply-To header, if it looks sane, otherwise
249      * 2) the last message ID of the References header, if available.
250      * 3) Otherwise, fall back to the first message ID in
251      * the In-Reply-To header.
252      */
253 
254     if (strict_message_id) {
255 	_notmuch_message_add_term (message, "replyto", strict_message_id);
256     } else if (last_ref_message_id) {
257 	_notmuch_message_add_term (message, "replyto",
258 				   last_ref_message_id);
259     } else if (in_reply_to_message_id) {
260 	_notmuch_message_add_term (message, "replyto",
261 				   in_reply_to_message_id);
262     }
263 
264     keys = g_hash_table_get_keys (parents);
265     for (l = keys; l; l = l->next) {
266 	char *parent_message_id;
267 	const char *parent_thread_id = NULL;
268 
269 	parent_message_id = (char *) l->data;
270 
271 	_notmuch_message_add_term (message, "reference",
272 				   parent_message_id);
273 
274 	ret = _resolve_message_id_to_thread_id (notmuch,
275 						message,
276 						parent_message_id,
277 						&parent_thread_id);
278 	if (ret)
279 	    goto DONE;
280 
281 	if (*thread_id == NULL) {
282 	    *thread_id = talloc_strdup (message, parent_thread_id);
283 	    _notmuch_message_add_term (message, "thread", *thread_id);
284 	} else if (strcmp (*thread_id, parent_thread_id)) {
285 	    ret = _merge_threads (notmuch, *thread_id, parent_thread_id);
286 	    if (ret)
287 		goto DONE;
288 	}
289     }
290 
291   DONE:
292     if (keys)
293 	g_list_free (keys);
294     if (parents)
295 	g_hash_table_unref (parents);
296 
297     return ret;
298 }
299 
300 static notmuch_status_t
_notmuch_database_link_message_to_children(notmuch_database_t * notmuch,notmuch_message_t * message,const char ** thread_id)301 _notmuch_database_link_message_to_children (notmuch_database_t *notmuch,
302 					    notmuch_message_t *message,
303 					    const char **thread_id)
304 {
305     const char *message_id = notmuch_message_get_message_id (message);
306     Xapian::PostingIterator child, children_end;
307     notmuch_message_t *child_message = NULL;
308     const char *child_thread_id;
309     notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
310     notmuch_private_status_t private_status;
311 
312     _notmuch_database_find_doc_ids (notmuch, "reference", message_id, &child, &children_end);
313 
314     for (; child != children_end; child++) {
315 
316 	child_message = _notmuch_message_create (message, notmuch,
317 						 *child, &private_status);
318 	if (child_message == NULL) {
319 	    ret = COERCE_STATUS (private_status,
320 				 "Cannot find document for doc_id from query");
321 	    goto DONE;
322 	}
323 
324 	child_thread_id = notmuch_message_get_thread_id (child_message);
325 	if (*thread_id == NULL) {
326 	    *thread_id = talloc_strdup (message, child_thread_id);
327 	    _notmuch_message_add_term (message, "thread", *thread_id);
328 	} else if (strcmp (*thread_id, child_thread_id)) {
329 	    _notmuch_message_remove_term (child_message, "reference",
330 					  message_id);
331 	    _notmuch_message_sync (child_message);
332 	    ret = _merge_threads (notmuch, *thread_id, child_thread_id);
333 	    if (ret)
334 		goto DONE;
335 	}
336 
337 	notmuch_message_destroy (child_message);
338 	child_message = NULL;
339     }
340 
341   DONE:
342     if (child_message)
343 	notmuch_message_destroy (child_message);
344 
345     return ret;
346 }
347 
348 /* Fetch and clear the stored thread_id for message, or NULL if none. */
349 static char *
_consume_metadata_thread_id(void * ctx,notmuch_database_t * notmuch,notmuch_message_t * message)350 _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
351 			     notmuch_message_t *message)
352 {
353     const char *message_id;
354     std::string stored_id;
355     char *metadata_key;
356 
357     message_id = notmuch_message_get_message_id (message);
358     metadata_key = _get_metadata_thread_id_key (ctx, message_id);
359 
360     /* Check if we have already seen related messages to this one.
361      * If we have then use the thread_id that we stored at that time.
362      */
363     stored_id = notmuch->xapian_db->get_metadata (metadata_key);
364     if (stored_id.empty ()) {
365 	return NULL;
366     } else {
367 	/* Clear the metadata for this message ID. We don't need it
368 	 * anymore. */
369 	notmuch->writable_xapian_db->set_metadata (metadata_key, "");
370 
371 	return talloc_strdup (ctx, stored_id.c_str ());
372     }
373 }
374 
375 /* Given a blank or ghost 'message' and its corresponding
376  * 'message_file' link it to existing threads in the database.
377  *
378  * First, if is_ghost, this retrieves the thread ID already stored in
379  * the message (which will be the case if a message was previously
380  * added that referenced this one).  If the message is blank
381  * (!is_ghost), it doesn't have a thread ID yet (we'll generate one
382  * later in this function).  If the database does not support ghost
383  * messages, this checks for a thread ID stored in database metadata
384  * for this message ID.
385  *
386  * Second, we look at 'message_file' and its link-relevant headers
387  * (References and In-Reply-To) for message IDs.
388  *
389  * Finally, we look in the database for existing message that
390  * reference 'message'.
391  *
392  * In all cases, we assign to the current message the first thread ID
393  * found. We will also merge any existing, distinct threads where this
394  * message belongs to both, (which is not uncommon when messages are
395  * processed out of order).
396  *
397  * Finally, if no thread ID has been found through referenced messages, we
398  * call _notmuch_message_generate_thread_id to generate a new thread
399  * ID. This should only happen for new, top-level messages, (no
400  * References or In-Reply-To header in this message, and no previously
401  * added message refers to this message).
402  */
403 static notmuch_status_t
_notmuch_database_link_message(notmuch_database_t * notmuch,notmuch_message_t * message,notmuch_message_file_t * message_file,bool is_ghost,bool is_new)404 _notmuch_database_link_message (notmuch_database_t *notmuch,
405 				notmuch_message_t *message,
406 				notmuch_message_file_t *message_file,
407 				bool is_ghost,
408 				bool is_new)
409 {
410     void *local = talloc_new (NULL);
411     notmuch_status_t status;
412     const char *thread_id = NULL;
413 
414     /* Check if the message already had a thread ID */
415     if (! is_new) {
416 	thread_id = notmuch_message_get_thread_id (message);
417     } else if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) {
418 	if (is_ghost)
419 	    thread_id = notmuch_message_get_thread_id (message);
420     } else {
421 	thread_id = _consume_metadata_thread_id (local, notmuch, message);
422 	if (thread_id)
423 	    _notmuch_message_add_term (message, "thread", thread_id);
424     }
425 
426     status = _notmuch_database_link_message_to_parents (notmuch, message,
427 							message_file,
428 							&thread_id);
429     if (status)
430 	goto DONE;
431 
432     if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) {
433 	/* In general, it shouldn't be necessary to link children,
434 	 * since the earlier indexing of those children will have
435 	 * stored a thread ID for the missing parent.  However, prior
436 	 * to ghost messages, these stored thread IDs were NOT
437 	 * rewritten during thread merging (and there was no
438 	 * performant way to do so), so if indexed children were
439 	 * pulled into a different thread ID by a merge, it was
440 	 * necessary to pull them *back* into the stored thread ID of
441 	 * the parent.  With ghost messages, we just rewrite the
442 	 * stored thread IDs during merging, so this workaround isn't
443 	 * necessary. */
444 	status = _notmuch_database_link_message_to_children (notmuch, message,
445 							     &thread_id);
446 	if (status)
447 	    goto DONE;
448     }
449 
450     /* If not part of any existing thread, generate a new thread ID. */
451     if (thread_id == NULL) {
452 	thread_id = _notmuch_database_generate_thread_id (notmuch);
453 
454 	_notmuch_message_add_term (message, "thread", thread_id);
455     }
456 
457   DONE:
458     talloc_free (local);
459 
460     return status;
461 }
462 
463 notmuch_status_t
notmuch_database_index_file(notmuch_database_t * notmuch,const char * filename,notmuch_indexopts_t * indexopts,notmuch_message_t ** message_ret)464 notmuch_database_index_file (notmuch_database_t *notmuch,
465 			     const char *filename,
466 			     notmuch_indexopts_t *indexopts,
467 			     notmuch_message_t **message_ret)
468 {
469     notmuch_message_file_t *message_file;
470     notmuch_message_t *message = NULL;
471     notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2;
472     notmuch_private_status_t private_status;
473     notmuch_bool_t is_ghost = false, is_new = false;
474     notmuch_indexopts_t *def_indexopts = NULL;
475 
476     const char *date;
477     const char *from, *to, *subject;
478     char *message_id = NULL;
479 
480     if (message_ret)
481 	*message_ret = NULL;
482 
483     ret = _notmuch_database_ensure_writable (notmuch);
484     if (ret)
485 	return ret;
486 
487     message_file = _notmuch_message_file_open (notmuch, filename);
488     if (message_file == NULL)
489 	return NOTMUCH_STATUS_FILE_ERROR;
490 
491     /* Adding a message may change many documents.  Do this all
492      * atomically. */
493     ret = notmuch_database_begin_atomic (notmuch);
494     if (ret)
495 	goto DONE;
496 
497     ret = _notmuch_message_file_get_headers (message_file,
498 					     &from, &subject, &to, &date,
499 					     &message_id);
500     if (ret)
501 	goto DONE;
502 
503     try {
504 	/* Now that we have a message ID, we get a message object,
505 	 * (which may or may not reference an existing document in the
506 	 * database). */
507 
508 	message = _notmuch_message_create_for_message_id (notmuch,
509 							  message_id,
510 							  &private_status);
511 
512 	talloc_free (message_id);
513 
514 	/* We cannot call notmuch_message_get_flag for a new message */
515 	switch (private_status) {
516 	case NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND:
517 	    is_ghost = false;
518 	    is_new = true;
519 	    break;
520 	case NOTMUCH_PRIVATE_STATUS_SUCCESS:
521 	    ret = notmuch_message_get_flag_st (message, NOTMUCH_MESSAGE_FLAG_GHOST, &is_ghost);
522 	    if (ret)
523 		goto DONE;
524 	    is_new = false;
525 	    break;
526 	default:
527 	    ret = COERCE_STATUS (private_status,
528 				 "Unexpected status value from _notmuch_message_create_for_message_id");
529 	    goto DONE;
530 	}
531 
532 	ret = _notmuch_message_add_filename (message, filename);
533 	if (ret)
534 	    goto DONE;
535 
536 	if (is_new || is_ghost) {
537 	    _notmuch_message_add_term (message, "type", "mail");
538 	    if (is_ghost)
539 		/* Convert ghost message to a regular message */
540 		_notmuch_message_remove_term (message, "type", "ghost");
541 	}
542 
543 	ret = _notmuch_database_link_message (notmuch, message,
544 					      message_file, is_ghost, is_new);
545 	if (ret)
546 	    goto DONE;
547 
548 	if (is_new || is_ghost)
549 	    _notmuch_message_set_header_values (message, date, from, subject);
550 
551 	if (! indexopts) {
552 	    def_indexopts = notmuch_database_get_default_indexopts (notmuch);
553 	    indexopts = def_indexopts;
554 	}
555 
556 	ret = _notmuch_message_index_file (message, indexopts, message_file);
557 	if (ret)
558 	    goto DONE;
559 
560 	if (! is_new && ! is_ghost)
561 	    ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
562 
563 	_notmuch_message_sync (message);
564     } catch (const Xapian::Error &error) {
565 	_notmuch_database_log (notmuch, "A Xapian exception occurred adding message: %s.\n",
566 			       error.get_msg ().c_str ());
567 	notmuch->exception_reported = true;
568 	ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
569 	goto DONE;
570     }
571 
572   DONE:
573     if (def_indexopts)
574 	notmuch_indexopts_destroy (def_indexopts);
575 
576     if (message) {
577 	if ((ret == NOTMUCH_STATUS_SUCCESS ||
578 	     ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && message_ret)
579 	    *message_ret = message;
580 	else
581 	    notmuch_message_destroy (message);
582     }
583 
584     if (message_file)
585 	_notmuch_message_file_close (message_file);
586 
587     ret2 = notmuch_database_end_atomic (notmuch);
588     if ((ret == NOTMUCH_STATUS_SUCCESS ||
589 	 ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) &&
590 	ret2 != NOTMUCH_STATUS_SUCCESS)
591 	ret = ret2;
592 
593     return ret;
594 }
595 
596 notmuch_status_t
notmuch_database_add_message(notmuch_database_t * notmuch,const char * filename,notmuch_message_t ** message_ret)597 notmuch_database_add_message (notmuch_database_t *notmuch,
598 			      const char *filename,
599 			      notmuch_message_t **message_ret)
600 {
601     return notmuch_database_index_file (notmuch, filename,
602 					NULL,
603 					message_ret);
604 
605 }
606