1 /* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */
2 
3 /*
4    IMAP clients can work in many different ways. There are basically 2
5    types:
6 
7    1. Online clients that ask for the same information multiple times (e.g.
8       webmails, Pine)
9 
10    2. Offline clients that usually download first some of the interesting
11       message headers and only after that the message bodies (possibly
12       automatically, or possibly only when the user opens the mail). Most
13       non-webmail IMAP clients behave like this.
14 
15    Cache file is extremely helpful with the type 1 clients. The first time
16    that client requests message headers or some other metadata they're
17    stored into the cache file. The second time they ask for the same
18    information Dovecot can now get it quickly from the cache file instead
19    of opening the message and parsing the headers.
20 
21    For type 2 clients the cache file is also somewhat helpful if client
22    fetches any initial metadata. Some of the information is helpful in any
23    case, for example it's required to know the message's virtual size when
24    downloading the message with IMAP. Without the virtual size being in cache
25    Dovecot first has to read the whole message first to calculate it, which
26    increases CPU usage.
27 
28    Only the specified fields that client(s) have asked for earlier are
29    stored into cache file. This allows Dovecot to be adaptive to different
30    clients' needs and still not waste disk space (and cause extra disk
31    I/O!) for fields that client never needs.
32 
33    Dovecot can cache fields either permanently or temporarily. Temporarily
34    cached fields are dropped from the cache file after about a week.
35    Dovecot uses two rules to determine when data should be cached
36    permanently instead of temporarily:
37 
38    1. Client accessed messages in non-sequential order within this session.
39       This most likely means it doesn't have a local cache.
40 
41    2. Client accessed a message older than one week.
42 
43    These rules might not always work optimally, so Dovecot also re-evaluates
44    the caching decisions once in a while:
45 
46    - When caching decision is YES (permanently cache the field), the field's
47      last_used is updated only when the caching decision has been verified to
48      be correct.
49 
50    - When caching decision is TEMP, the last_used is updated whenever the field
51      is accessed.
52 
53    - When last_used becomes 30 days old (or unaccessed_field_drop_secs) a
54      YES caching decision is changed to TEMP.
55 
56    - When last_used becomes 60 days old (or 2*unaccessed_field_drop_secs) a
57      TEMP caching decision is changed to NO.
58 */
59 
60 #include "lib.h"
61 #include "ioloop.h"
62 #include "mail-cache-private.h"
63 
mail_cache_decision_to_string(enum mail_cache_decision_type dec)64 const char *mail_cache_decision_to_string(enum mail_cache_decision_type dec)
65 {
66 	switch (dec & ENUM_NEGATE(MAIL_CACHE_DECISION_FORCED)) {
67 	case MAIL_CACHE_DECISION_NO:
68 		return "no";
69 	case MAIL_CACHE_DECISION_TEMP:
70 		return "temp";
71 	case MAIL_CACHE_DECISION_YES:
72 		return "yes";
73 	}
74 	i_unreached();
75 }
76 
77 struct event_passthrough *
mail_cache_decision_changed_event(struct mail_cache * cache,struct event * event,unsigned int field)78 mail_cache_decision_changed_event(struct mail_cache *cache, struct event *event,
79 				  unsigned int field)
80 {
81 	return event_create_passthrough(event)->
82 		set_name("mail_cache_decision_changed")->
83 		add_str("field", cache->fields[field].field.name)->
84 		add_int("last_used", cache->fields[field].field.last_used);
85 }
86 
87 static void
mail_cache_update_last_used(struct mail_cache * cache,unsigned int field)88 mail_cache_update_last_used(struct mail_cache *cache, unsigned int field)
89 {
90 	cache->fields[field].field.last_used = (uint32_t)ioloop_time;
91 	if (cache->field_file_map[field] != (uint32_t)-1)
92 		cache->field_header_write_pending = TRUE;
93 }
94 
mail_cache_decision_state_update(struct mail_cache_view * view,uint32_t seq,unsigned int field)95 void mail_cache_decision_state_update(struct mail_cache_view *view,
96 				      uint32_t seq, unsigned int field)
97 {
98 	struct mail_cache *cache = view->cache;
99 	enum mail_cache_decision_type dec;
100 	const struct mail_index_header *hdr;
101 	uint32_t uid;
102 
103 	i_assert(field < cache->fields_count);
104 
105 	if (view->no_decision_updates)
106 		return;
107 
108 	dec = cache->fields[field].field.decision;
109 	if (dec == (MAIL_CACHE_DECISION_NO | MAIL_CACHE_DECISION_FORCED)) {
110 		/* don't update last_used */
111 		return;
112 	}
113 
114 	/* update last_used about once a day */
115 	bool last_used_need_update =
116 		ioloop_time - cache->fields[field].field.last_used > 3600*24;
117 
118 	if (dec == MAIL_CACHE_DECISION_NO ||
119 	    (dec & MAIL_CACHE_DECISION_FORCED) != 0) {
120 		/* a) forced decision
121 		   b) not cached, mail_cache_decision_add() will handle this */
122 		if (last_used_need_update)
123 			mail_cache_update_last_used(cache, field);
124 		return;
125 	}
126 	if (dec == MAIL_CACHE_DECISION_YES) {
127 		if (!last_used_need_update)
128 			return;
129 		/* update last_used only when we can confirm that the YES
130 		   decision is still correct. */
131 	} else {
132 		/* see if we want to change decision from TEMP to YES */
133 		i_assert(dec == MAIL_CACHE_DECISION_TEMP);
134 		if (last_used_need_update)
135 			mail_cache_update_last_used(cache, field);
136 	}
137 
138 	mail_index_lookup_uid(view->view, seq, &uid);
139 	hdr = mail_index_get_header(view->view);
140 
141 	if (uid >= cache->fields[field].uid_highwater &&
142 	    uid >= hdr->day_first_uid[7]) {
143 		cache->fields[field].uid_highwater = uid;
144 	} else if (dec == MAIL_CACHE_DECISION_YES) {
145 		/* Confirmed that we still want to preserve YES as cache
146 		   decision. We can update last_used now. */
147 		i_assert(last_used_need_update);
148 		mail_cache_update_last_used(cache, field);
149 	} else {
150 		/* a) nonordered access within this session. if client doesn't
151 		      request messages in growing order, we assume it doesn't
152 		      have a permanent local cache.
153 		   b) accessing message older than one week. assume it's a
154 		      client with no local cache. if it was just a new client
155 		      generating the local cache for the first time, we'll
156 		      drop back to TEMP within few months. */
157 		i_assert(dec == MAIL_CACHE_DECISION_TEMP);
158 		cache->fields[field].field.decision = MAIL_CACHE_DECISION_YES;
159 		cache->fields[field].decision_dirty = TRUE;
160 		cache->field_header_write_pending = TRUE;
161 
162 		const char *reason = uid < hdr->day_first_uid[7] ?
163 			"old_mail" : "unordered_access";
164 		struct event_passthrough *e =
165 			mail_cache_decision_changed_event(
166 				view->cache, view->cache->event, field)->
167 			add_str("reason", reason)->
168 			add_int("uid", uid)->
169 			add_str("old_decision", "temp")->
170 			add_str("new_decision", "yes");
171 		e_debug(e->event(), "Changing field %s decision temp -> yes (uid=%u)",
172 			cache->fields[field].field.name, uid);
173 	}
174 }
175 
mail_cache_decision_add(struct mail_cache_view * view,uint32_t seq,unsigned int field)176 void mail_cache_decision_add(struct mail_cache_view *view, uint32_t seq,
177 			     unsigned int field)
178 {
179 	struct mail_cache *cache = view->cache;
180 	struct mail_cache_field_private *priv;
181 	uint32_t uid;
182 
183 	i_assert(field < cache->fields_count);
184 
185 	if (view->no_decision_updates)
186 		return;
187 
188 	priv = &cache->fields[field];
189 	if (priv->field.decision != MAIL_CACHE_DECISION_NO &&
190 	    priv->field.last_used != 0) {
191 		/* a) forced decision
192 		   b) we're already caching it, so it just wasn't in cache */
193 		return;
194 	}
195 
196 	/* field used the first time */
197 	if (priv->field.decision == MAIL_CACHE_DECISION_NO)
198 		priv->field.decision = MAIL_CACHE_DECISION_TEMP;
199 	priv->field.last_used = ioloop_time;
200 	priv->decision_dirty = TRUE;
201 	cache->field_header_write_pending = TRUE;
202 
203 	mail_index_lookup_uid(view->view, seq, &uid);
204 	priv->uid_highwater = uid;
205 
206 	const char *new_decision =
207 		mail_cache_decision_to_string(priv->field.decision);
208 	struct event_passthrough *e =
209 		mail_cache_decision_changed_event(cache, cache->event, field)->
210 		add_str("reason", "add")->
211 		add_int("uid", uid)->
212 		add_str("old_decision", "no")->
213 		add_str("new_decision", new_decision);
214 	e_debug(e->event(), "Adding field %s to cache for the first time (uid=%u)",
215 		priv->field.name, uid);
216 }
217 
mail_cache_decisions_copy(struct mail_cache * src,struct mail_cache * dst)218 int mail_cache_decisions_copy(struct mail_cache *src, struct mail_cache *dst)
219 {
220 	if (mail_cache_open_and_verify(src) < 0)
221 		return -1;
222 	if (MAIL_CACHE_IS_UNUSABLE(src))
223 		return 0; /* no caching decisions */
224 
225 	unsigned int count = 0;
226 	struct mail_cache_field *fields =
227 		mail_cache_register_get_list(src, pool_datastack_create(), &count);
228 	i_assert(fields != NULL || count == 0);
229 	if (count > 0)
230 		mail_cache_register_fields(dst, fields, count);
231 
232 	/* Destination cache isn't expected to exist yet, so use purging
233 	   to create it. Setting field_header_write_pending also guarantees
234 	   that the fields are updated even if the cache was already created
235 	   and no purging was done. */
236 	dst->field_header_write_pending = TRUE;
237 	return mail_cache_purge(dst, 0, "copy cache decisions");
238 }
239