1 #include "database-private.h"
2
3 /* Parse a References header value, putting a (talloc'ed under 'ctx')
4 * copy of each referenced message-id into 'hash'.
5 *
6 * We explicitly avoid including any reference identical to
7 * 'message_id' in the result (to avoid mass confusion when a single
8 * message references itself cyclically---and yes, mail messages are
9 * not infrequent in the wild that do this---don't ask me why).
10 *
11 * Return the last reference parsed, if it is not equal to message_id.
12 */
13 static char *
parse_references(void * ctx,const char * message_id,GHashTable * hash,const char * refs)14 parse_references (void *ctx,
15 const char *message_id,
16 GHashTable *hash,
17 const char *refs)
18 {
19 char *ref, *last_ref = NULL;
20
21 if (refs == NULL || *refs == '\0')
22 return NULL;
23
24 while (*refs) {
25 ref = _notmuch_message_id_parse (ctx, refs, &refs);
26
27 if (ref && strcmp (ref, message_id)) {
28 g_hash_table_add (hash, ref);
29 last_ref = ref;
30 }
31 }
32
33 /* The return value of this function is used to add a parent
34 * reference to the database. We should avoid making a message
35 * its own parent, thus the above check.
36 */
37 return talloc_strdup (ctx, last_ref);
38 }
39
40 static const char *
_notmuch_database_generate_thread_id(notmuch_database_t * notmuch)41 _notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
42 {
43
44 notmuch->last_thread_id++;
45
46 sprintf (notmuch->thread_id_str, "%016" PRIx64, notmuch->last_thread_id);
47
48 notmuch->writable_xapian_db->set_metadata ("last_thread_id", notmuch->thread_id_str);
49
50 return notmuch->thread_id_str;
51 }
52
53 static char *
_get_metadata_thread_id_key(void * ctx,const char * message_id)54 _get_metadata_thread_id_key (void *ctx, const char *message_id)
55 {
56 if (strlen (message_id) > NOTMUCH_MESSAGE_ID_MAX)
57 message_id = _notmuch_message_id_compressed (ctx, message_id);
58
59 return talloc_asprintf (ctx, NOTMUCH_METADATA_THREAD_ID_PREFIX "%s",
60 message_id);
61 }
62
63
64 static notmuch_status_t
65 _resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
66 void *ctx,
67 const char *message_id,
68 const char **thread_id_ret);
69
70
71 /* Find the thread ID to which the message with 'message_id' belongs.
72 *
73 * Note: 'thread_id_ret' must not be NULL!
74 * On success '*thread_id_ret' is set to a newly talloced string belonging to
75 * 'ctx'.
76 *
77 * Note: If there is no message in the database with the given
78 * 'message_id' then a new thread_id will be allocated for this
79 * message ID and stored in the database metadata so that the
80 * thread ID can be looked up if the message is added to the database
81 * later.
82 */
83 static notmuch_status_t
_resolve_message_id_to_thread_id(notmuch_database_t * notmuch,void * ctx,const char * message_id,const char ** thread_id_ret)84 _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
85 void *ctx,
86 const char *message_id,
87 const char **thread_id_ret)
88 {
89 notmuch_private_status_t status;
90 notmuch_message_t *message;
91
92 if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS))
93 return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id,
94 thread_id_ret);
95
96 /* Look for this message (regular or ghost) */
97 message = _notmuch_message_create_for_message_id (
98 notmuch, message_id, &status);
99 if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) {
100 /* Message exists */
101 *thread_id_ret = talloc_steal (
102 ctx, notmuch_message_get_thread_id (message));
103 } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
104 /* Message did not exist. Give it a fresh thread ID and
105 * populate this message as a ghost message. */
106 *thread_id_ret = talloc_strdup (
107 ctx, _notmuch_database_generate_thread_id (notmuch));
108 if (! *thread_id_ret) {
109 status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY;
110 } else {
111 status = _notmuch_message_initialize_ghost (message, *thread_id_ret);
112 if (status == 0)
113 /* Commit the new ghost message */
114 _notmuch_message_sync (message);
115 }
116 } else {
117 /* Create failed. Fall through. */
118 }
119
120 notmuch_message_destroy (message);
121
122 return COERCE_STATUS (status, "Error creating ghost message");
123 }
124
125 /* Pre-ghost messages _resolve_message_id_to_thread_id */
126 static notmuch_status_t
_resolve_message_id_to_thread_id_old(notmuch_database_t * notmuch,void * ctx,const char * message_id,const char ** thread_id_ret)127 _resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
128 void *ctx,
129 const char *message_id,
130 const char **thread_id_ret)
131 {
132 notmuch_status_t status;
133 notmuch_message_t *message;
134 std::string thread_id_string;
135 char *metadata_key;
136 Xapian::WritableDatabase *db;
137
138 status = notmuch_database_find_message (notmuch, message_id, &message);
139
140 if (status)
141 return status;
142
143 if (message) {
144 *thread_id_ret = talloc_steal (ctx,
145 notmuch_message_get_thread_id (message));
146
147 notmuch_message_destroy (message);
148
149 return NOTMUCH_STATUS_SUCCESS;
150 }
151
152 /* Message has not been seen yet.
153 *
154 * We may have seen a reference to it already, in which case, we
155 * can return the thread ID stored in the metadata. Otherwise, we
156 * generate a new thread ID and store it there.
157 */
158 db = notmuch->writable_xapian_db;
159 metadata_key = _get_metadata_thread_id_key (ctx, message_id);
160 thread_id_string = notmuch->xapian_db->get_metadata (metadata_key);
161
162 if (thread_id_string.empty ()) {
163 *thread_id_ret = talloc_strdup (ctx,
164 _notmuch_database_generate_thread_id (notmuch));
165 db->set_metadata (metadata_key, *thread_id_ret);
166 } else {
167 *thread_id_ret = talloc_strdup (ctx, thread_id_string.c_str ());
168 }
169
170 talloc_free (metadata_key);
171
172 return NOTMUCH_STATUS_SUCCESS;
173 }
174
175 static notmuch_status_t
_merge_threads(notmuch_database_t * notmuch,const char * winner_thread_id,const char * loser_thread_id)176 _merge_threads (notmuch_database_t *notmuch,
177 const char *winner_thread_id,
178 const char *loser_thread_id)
179 {
180 Xapian::PostingIterator loser, loser_end;
181 notmuch_message_t *message = NULL;
182 notmuch_private_status_t private_status;
183 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
184
185 _notmuch_database_find_doc_ids (notmuch, "thread", loser_thread_id, &loser, &loser_end);
186
187 for (; loser != loser_end; loser++) {
188 message = _notmuch_message_create (notmuch, notmuch,
189 *loser, &private_status);
190 if (message == NULL) {
191 ret = COERCE_STATUS (private_status,
192 "Cannot find document for doc_id from query");
193 goto DONE;
194 }
195
196 _notmuch_message_remove_term (message, "thread", loser_thread_id);
197 _notmuch_message_add_term (message, "thread", winner_thread_id);
198 _notmuch_message_sync (message);
199
200 notmuch_message_destroy (message);
201 message = NULL;
202 }
203
204 DONE:
205 if (message)
206 notmuch_message_destroy (message);
207
208 return ret;
209 }
210
211 static void
_my_talloc_free_for_g_hash(void * ptr)212 _my_talloc_free_for_g_hash (void *ptr)
213 {
214 talloc_free (ptr);
215 }
216
217 notmuch_status_t
_notmuch_database_link_message_to_parents(notmuch_database_t * notmuch,notmuch_message_t * message,notmuch_message_file_t * message_file,const char ** thread_id)218 _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch,
219 notmuch_message_t *message,
220 notmuch_message_file_t *message_file,
221 const char **thread_id)
222 {
223 GHashTable *parents = NULL;
224 const char *refs, *in_reply_to, *in_reply_to_message_id, *strict_message_id = NULL;
225 const char *last_ref_message_id, *this_message_id;
226 GList *l, *keys = NULL;
227 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
228
229 parents = g_hash_table_new_full (g_str_hash, g_str_equal,
230 _my_talloc_free_for_g_hash, NULL);
231 this_message_id = notmuch_message_get_message_id (message);
232
233 refs = _notmuch_message_file_get_header (message_file, "references");
234 last_ref_message_id = parse_references (message,
235 this_message_id,
236 parents, refs);
237
238 in_reply_to = _notmuch_message_file_get_header (message_file, "in-reply-to");
239 if (in_reply_to)
240 strict_message_id = _notmuch_message_id_parse_strict (message,
241 in_reply_to);
242
243 in_reply_to_message_id = parse_references (message,
244 this_message_id,
245 parents, in_reply_to);
246
247 /* For the parent of this message, use
248 * 1) the In-Reply-To header, if it looks sane, otherwise
249 * 2) the last message ID of the References header, if available.
250 * 3) Otherwise, fall back to the first message ID in
251 * the In-Reply-To header.
252 */
253
254 if (strict_message_id) {
255 _notmuch_message_add_term (message, "replyto", strict_message_id);
256 } else if (last_ref_message_id) {
257 _notmuch_message_add_term (message, "replyto",
258 last_ref_message_id);
259 } else if (in_reply_to_message_id) {
260 _notmuch_message_add_term (message, "replyto",
261 in_reply_to_message_id);
262 }
263
264 keys = g_hash_table_get_keys (parents);
265 for (l = keys; l; l = l->next) {
266 char *parent_message_id;
267 const char *parent_thread_id = NULL;
268
269 parent_message_id = (char *) l->data;
270
271 _notmuch_message_add_term (message, "reference",
272 parent_message_id);
273
274 ret = _resolve_message_id_to_thread_id (notmuch,
275 message,
276 parent_message_id,
277 &parent_thread_id);
278 if (ret)
279 goto DONE;
280
281 if (*thread_id == NULL) {
282 *thread_id = talloc_strdup (message, parent_thread_id);
283 _notmuch_message_add_term (message, "thread", *thread_id);
284 } else if (strcmp (*thread_id, parent_thread_id)) {
285 ret = _merge_threads (notmuch, *thread_id, parent_thread_id);
286 if (ret)
287 goto DONE;
288 }
289 }
290
291 DONE:
292 if (keys)
293 g_list_free (keys);
294 if (parents)
295 g_hash_table_unref (parents);
296
297 return ret;
298 }
299
300 static notmuch_status_t
_notmuch_database_link_message_to_children(notmuch_database_t * notmuch,notmuch_message_t * message,const char ** thread_id)301 _notmuch_database_link_message_to_children (notmuch_database_t *notmuch,
302 notmuch_message_t *message,
303 const char **thread_id)
304 {
305 const char *message_id = notmuch_message_get_message_id (message);
306 Xapian::PostingIterator child, children_end;
307 notmuch_message_t *child_message = NULL;
308 const char *child_thread_id;
309 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
310 notmuch_private_status_t private_status;
311
312 _notmuch_database_find_doc_ids (notmuch, "reference", message_id, &child, &children_end);
313
314 for (; child != children_end; child++) {
315
316 child_message = _notmuch_message_create (message, notmuch,
317 *child, &private_status);
318 if (child_message == NULL) {
319 ret = COERCE_STATUS (private_status,
320 "Cannot find document for doc_id from query");
321 goto DONE;
322 }
323
324 child_thread_id = notmuch_message_get_thread_id (child_message);
325 if (*thread_id == NULL) {
326 *thread_id = talloc_strdup (message, child_thread_id);
327 _notmuch_message_add_term (message, "thread", *thread_id);
328 } else if (strcmp (*thread_id, child_thread_id)) {
329 _notmuch_message_remove_term (child_message, "reference",
330 message_id);
331 _notmuch_message_sync (child_message);
332 ret = _merge_threads (notmuch, *thread_id, child_thread_id);
333 if (ret)
334 goto DONE;
335 }
336
337 notmuch_message_destroy (child_message);
338 child_message = NULL;
339 }
340
341 DONE:
342 if (child_message)
343 notmuch_message_destroy (child_message);
344
345 return ret;
346 }
347
348 /* Fetch and clear the stored thread_id for message, or NULL if none. */
349 static char *
_consume_metadata_thread_id(void * ctx,notmuch_database_t * notmuch,notmuch_message_t * message)350 _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
351 notmuch_message_t *message)
352 {
353 const char *message_id;
354 std::string stored_id;
355 char *metadata_key;
356
357 message_id = notmuch_message_get_message_id (message);
358 metadata_key = _get_metadata_thread_id_key (ctx, message_id);
359
360 /* Check if we have already seen related messages to this one.
361 * If we have then use the thread_id that we stored at that time.
362 */
363 stored_id = notmuch->xapian_db->get_metadata (metadata_key);
364 if (stored_id.empty ()) {
365 return NULL;
366 } else {
367 /* Clear the metadata for this message ID. We don't need it
368 * anymore. */
369 notmuch->writable_xapian_db->set_metadata (metadata_key, "");
370
371 return talloc_strdup (ctx, stored_id.c_str ());
372 }
373 }
374
375 /* Given a blank or ghost 'message' and its corresponding
376 * 'message_file' link it to existing threads in the database.
377 *
378 * First, if is_ghost, this retrieves the thread ID already stored in
379 * the message (which will be the case if a message was previously
380 * added that referenced this one). If the message is blank
381 * (!is_ghost), it doesn't have a thread ID yet (we'll generate one
382 * later in this function). If the database does not support ghost
383 * messages, this checks for a thread ID stored in database metadata
384 * for this message ID.
385 *
386 * Second, we look at 'message_file' and its link-relevant headers
387 * (References and In-Reply-To) for message IDs.
388 *
389 * Finally, we look in the database for existing message that
390 * reference 'message'.
391 *
392 * In all cases, we assign to the current message the first thread ID
393 * found. We will also merge any existing, distinct threads where this
394 * message belongs to both, (which is not uncommon when messages are
395 * processed out of order).
396 *
397 * Finally, if no thread ID has been found through referenced messages, we
398 * call _notmuch_message_generate_thread_id to generate a new thread
399 * ID. This should only happen for new, top-level messages, (no
400 * References or In-Reply-To header in this message, and no previously
401 * added message refers to this message).
402 */
403 static notmuch_status_t
_notmuch_database_link_message(notmuch_database_t * notmuch,notmuch_message_t * message,notmuch_message_file_t * message_file,bool is_ghost,bool is_new)404 _notmuch_database_link_message (notmuch_database_t *notmuch,
405 notmuch_message_t *message,
406 notmuch_message_file_t *message_file,
407 bool is_ghost,
408 bool is_new)
409 {
410 void *local = talloc_new (NULL);
411 notmuch_status_t status;
412 const char *thread_id = NULL;
413
414 /* Check if the message already had a thread ID */
415 if (! is_new) {
416 thread_id = notmuch_message_get_thread_id (message);
417 } else if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) {
418 if (is_ghost)
419 thread_id = notmuch_message_get_thread_id (message);
420 } else {
421 thread_id = _consume_metadata_thread_id (local, notmuch, message);
422 if (thread_id)
423 _notmuch_message_add_term (message, "thread", thread_id);
424 }
425
426 status = _notmuch_database_link_message_to_parents (notmuch, message,
427 message_file,
428 &thread_id);
429 if (status)
430 goto DONE;
431
432 if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS)) {
433 /* In general, it shouldn't be necessary to link children,
434 * since the earlier indexing of those children will have
435 * stored a thread ID for the missing parent. However, prior
436 * to ghost messages, these stored thread IDs were NOT
437 * rewritten during thread merging (and there was no
438 * performant way to do so), so if indexed children were
439 * pulled into a different thread ID by a merge, it was
440 * necessary to pull them *back* into the stored thread ID of
441 * the parent. With ghost messages, we just rewrite the
442 * stored thread IDs during merging, so this workaround isn't
443 * necessary. */
444 status = _notmuch_database_link_message_to_children (notmuch, message,
445 &thread_id);
446 if (status)
447 goto DONE;
448 }
449
450 /* If not part of any existing thread, generate a new thread ID. */
451 if (thread_id == NULL) {
452 thread_id = _notmuch_database_generate_thread_id (notmuch);
453
454 _notmuch_message_add_term (message, "thread", thread_id);
455 }
456
457 DONE:
458 talloc_free (local);
459
460 return status;
461 }
462
463 notmuch_status_t
notmuch_database_index_file(notmuch_database_t * notmuch,const char * filename,notmuch_indexopts_t * indexopts,notmuch_message_t ** message_ret)464 notmuch_database_index_file (notmuch_database_t *notmuch,
465 const char *filename,
466 notmuch_indexopts_t *indexopts,
467 notmuch_message_t **message_ret)
468 {
469 notmuch_message_file_t *message_file;
470 notmuch_message_t *message = NULL;
471 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2;
472 notmuch_private_status_t private_status;
473 notmuch_bool_t is_ghost = false, is_new = false;
474 notmuch_indexopts_t *def_indexopts = NULL;
475
476 const char *date;
477 const char *from, *to, *subject;
478 char *message_id = NULL;
479
480 if (message_ret)
481 *message_ret = NULL;
482
483 ret = _notmuch_database_ensure_writable (notmuch);
484 if (ret)
485 return ret;
486
487 message_file = _notmuch_message_file_open (notmuch, filename);
488 if (message_file == NULL)
489 return NOTMUCH_STATUS_FILE_ERROR;
490
491 /* Adding a message may change many documents. Do this all
492 * atomically. */
493 ret = notmuch_database_begin_atomic (notmuch);
494 if (ret)
495 goto DONE;
496
497 ret = _notmuch_message_file_get_headers (message_file,
498 &from, &subject, &to, &date,
499 &message_id);
500 if (ret)
501 goto DONE;
502
503 try {
504 /* Now that we have a message ID, we get a message object,
505 * (which may or may not reference an existing document in the
506 * database). */
507
508 message = _notmuch_message_create_for_message_id (notmuch,
509 message_id,
510 &private_status);
511
512 talloc_free (message_id);
513
514 /* We cannot call notmuch_message_get_flag for a new message */
515 switch (private_status) {
516 case NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND:
517 is_ghost = false;
518 is_new = true;
519 break;
520 case NOTMUCH_PRIVATE_STATUS_SUCCESS:
521 ret = notmuch_message_get_flag_st (message, NOTMUCH_MESSAGE_FLAG_GHOST, &is_ghost);
522 if (ret)
523 goto DONE;
524 is_new = false;
525 break;
526 default:
527 ret = COERCE_STATUS (private_status,
528 "Unexpected status value from _notmuch_message_create_for_message_id");
529 goto DONE;
530 }
531
532 ret = _notmuch_message_add_filename (message, filename);
533 if (ret)
534 goto DONE;
535
536 if (is_new || is_ghost) {
537 _notmuch_message_add_term (message, "type", "mail");
538 if (is_ghost)
539 /* Convert ghost message to a regular message */
540 _notmuch_message_remove_term (message, "type", "ghost");
541 }
542
543 ret = _notmuch_database_link_message (notmuch, message,
544 message_file, is_ghost, is_new);
545 if (ret)
546 goto DONE;
547
548 if (is_new || is_ghost)
549 _notmuch_message_set_header_values (message, date, from, subject);
550
551 if (! indexopts) {
552 def_indexopts = notmuch_database_get_default_indexopts (notmuch);
553 indexopts = def_indexopts;
554 }
555
556 ret = _notmuch_message_index_file (message, indexopts, message_file);
557 if (ret)
558 goto DONE;
559
560 if (! is_new && ! is_ghost)
561 ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
562
563 _notmuch_message_sync (message);
564 } catch (const Xapian::Error &error) {
565 _notmuch_database_log (notmuch, "A Xapian exception occurred adding message: %s.\n",
566 error.get_msg ().c_str ());
567 notmuch->exception_reported = true;
568 ret = NOTMUCH_STATUS_XAPIAN_EXCEPTION;
569 goto DONE;
570 }
571
572 DONE:
573 if (def_indexopts)
574 notmuch_indexopts_destroy (def_indexopts);
575
576 if (message) {
577 if ((ret == NOTMUCH_STATUS_SUCCESS ||
578 ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) && message_ret)
579 *message_ret = message;
580 else
581 notmuch_message_destroy (message);
582 }
583
584 if (message_file)
585 _notmuch_message_file_close (message_file);
586
587 ret2 = notmuch_database_end_atomic (notmuch);
588 if ((ret == NOTMUCH_STATUS_SUCCESS ||
589 ret == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) &&
590 ret2 != NOTMUCH_STATUS_SUCCESS)
591 ret = ret2;
592
593 return ret;
594 }
595
596 notmuch_status_t
notmuch_database_add_message(notmuch_database_t * notmuch,const char * filename,notmuch_message_t ** message_ret)597 notmuch_database_add_message (notmuch_database_t *notmuch,
598 const char *filename,
599 notmuch_message_t **message_ret)
600 {
601 return notmuch_database_index_file (notmuch, filename,
602 NULL,
603 message_ret);
604
605 }
606