1 /*
2 
3 Copyright (c) 2010-2018, Arvid Norberg
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 
10     * Redistributions of source code must retain the above copyright
11       notice, this list of conditions and the following disclaimer.
12     * Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in
14       the documentation and/or other materials provided with the distribution.
15     * Neither the name of the author nor the names of its
16       contributors may be used to endorse or promote products derived
17       from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 POSSIBILITY OF SUCH DAMAGE.
30 
31 */
32 
33 #include "libtorrent/config.hpp"
34 #include "libtorrent/block_cache.hpp"
35 #include "libtorrent/assert.hpp"
36 #include "libtorrent/disk_io_job.hpp"
37 #include "libtorrent/storage.hpp"
38 #include "libtorrent/error.hpp"
39 #include "libtorrent/disk_io_thread.hpp" // disk_operation_failed
40 #include "libtorrent/invariant_check.hpp"
41 #include "libtorrent/aux_/alloca.hpp"
42 #include "libtorrent/performance_counters.hpp"
43 #include "libtorrent/aux_/time.hpp"
44 #include "libtorrent/aux_/block_cache_reference.hpp"
45 #include "libtorrent/aux_/numeric_cast.hpp"
46 
47 #include "libtorrent/aux_/disable_warnings_push.hpp"
48 #include <boost/variant/get.hpp>
49 #include "libtorrent/aux_/disable_warnings_pop.hpp"
50 
51 /*
52 
53 	The disk cache mimics ARC (adaptive replacement cache).
54 	See paper: http://dbs.uni-leipzig.de/file/ARC.pdf
55 	See slides: http://www-vlsi.stanford.edu/smart_memories/protected/meetings/spring2004/arc-fast.pdf
56 
57 	This cache has a few modifications to make it fit the bittorrent use
58 	case better. It has a few more lists and it defers the eviction
59 	of pieces.
60 
61 	read_lru1
62 		This is a plain LRU for items that have been requested once. If a piece
63 		in this list gets accessed again, by someone other than the first
64 		accessor, the piece is promoted into LRU2. which holds pieces that are
65 		more frequently used, and more important to keep around as this LRU list
66 		takes churn.
67 
68 	read_lru1_ghost
69 		This is a list of pieces that were least recently evicted from read_lru1.
70 		These pieces don't hold any actual blocks in the cache, they are just
71 		here to extend the reach and probability for pieces to be promoted into
72 		read_lru2. Any piece in this list that get one more access is promoted to
73 		read_lru2. This is technically a cache-miss, since there's no cached
74 		blocks here, but for the purposes of promoting the piece from
75 		infrequently used to frequently used), it's considered a cache-hit.
76 
77 	read_lru2
78 		TODO
79 
80 	read_lru2_ghost
81 		TODO
82 
83 	volatile_read_lru
84 		TODO
85 
86 	write_lru
87 		TODO
88 
89 	Cache hits
90 	..........
91 
92 	When a piece get a cache hit, it's promoted, either to the beginning of the
93 	lru2 or into lru2. Since this ARC implementation operates on pieces instead
94 	of blocks, any one peer requesting blocks from one piece would essentially
95 	always produce a "cache hit" the second block it requests. In order to make
96 	the promotions make more sense, and be more in the spirit of the ARC
97 	algorithm, each access contains a token, unique to each peer. If any access
98 	has a different token than the last one, it's considered a cache hit. This
99 	is because at least two peers requested blocks from the same piece.
100 
101 	Deferred evictions
102 	..................
103 
104 	Since pieces and blocks can be pinned in the cache, and it's not always
105 	practical, or possible, to evict a piece at the point where a new block is
106 	allocated (because it's not known what the block will be used for),
107 	evictions are not done at the time of allocating blocks. Instead, whenever
108 	an operation requires to add a new piece to the cache, it also records the
109 	cache event leading to it, in m_last_cache_op. This is one of cache_miss
110 	(piece did not exist in cache), lru1_ghost_hit (the piece was found in
111 	lru1_ghost and it was promoted) or lru2_ghost_hit (the piece was found in
112 	lru2_ghost and it was promoted). This cache operation then guides the cache
113 	eviction algorithm to know which list to evict from. The volatile list is
114 	always the first one to be evicted however.
115 
116 	Write jobs
117 	..........
118 
119 	When the write cache is enabled, write jobs are not issued via the normal
120 	job queue. They are just hung on its corresponding cached piece entry, and a
121 	flush_hashed job is issued. This job will inspect the current state of the
122 	cached piece and determine if any of the blocks should be flushed. It also
123 	kicks the hasher, i.e. progresses the SHA1 context, which calculates the
124 	SHA-1 hash of the piece. This job flushed blocks that have been hashed and
125 	also form a contiguous block run of at least the write cache line size.
126 
127 	Read jobs
128 	.........
129 
130 	The data blocks pulled in from disk by read jobs, are hung on the
131 	corresponding cache piece (cached_piece_entry) once the operation completes.
132 	Read operations typically pulls in an entire read cache stripe, and not just
133 	the one block that was requested. When adjacent blocks are requested to be
134 	read in quick succession, there is a risk that each block would pull in more
135 	blocks (read ahead) and potentially read the same blocks several times, if
136 	the original requests were serviced by different disk thread. This is
137 	because all the read operation may start before any of them has completed,
138 	hanging the resulting blocks in the cache. i.e. they would all be cache
139 	misses, even though all but the first should be cache hits in the first's
140 	read ahead.
141 
142 	In order to solve this problem, there is only a single outstanding read job
143 	at any given time per piece. When there is an outstanding read job on a
144 	piece, the *outstanding_read* member is set to 1. This indicates that the
145 	job should be hung on the piece for later processing, instead of being
146 	issued into the main job queue. There is a tailqueue on each piece entry
147 	called read_jobs where these jobs are added.
148 
149 	At the end of every read job, this job list is inspected, any job in it is
150 	tried against the cache to see if it's a cache hit now. If it is, complete
151 	it right away. If it isn't, put it back in the read_jobs list except for
152 	one, which is issued into the regular job queue.
153 */
154 
155 #define DEBUG_CACHE 0
156 
157 #if DEBUG_CACHE
158 #define DLOG(...) std::fprintf(__VA_ARGS__)
159 #else
160 #define DLOG(...) do {} while (false)
161 #endif
162 
163 namespace libtorrent {
164 
165 #if DEBUG_CACHE
log_refcounts(cached_piece_entry const * pe)166 void log_refcounts(cached_piece_entry const* pe)
167 {
168 	char out[4096];
169 	char* ptr = out;
170 	char* end = ptr + sizeof(out);
171 	ptr += std::snprintf(ptr, end - ptr, "piece: %d [ ", int(pe->piece));
172 	for (int i = 0; i < pe->blocks_in_piece; ++i)
173 	{
174 		ptr += std::snprintf(ptr, end - ptr, "%d ", int(pe->blocks[i].refcount));
175 	}
176 	strncpy(ptr, "]\n", end - ptr);
177 	DLOG(stderr, out);
178 }
179 #endif
180 
181 std::array<const char*, 15> const job_action_name =
182 {{
183 	"read",
184 	"write",
185 	"hash",
186 	"move_storage",
187 	"release_files",
188 	"delete_files",
189 	"check_fastresume",
190 	"rename_file",
191 	"stop_torrent",
192 	"flush_piece",
193 	"flush_hashed",
194 	"flush_storage",
195 	"trim_cache",
196 	"set_file_priority",
197 	"clear_piece",
198 }};
199 
200 // make sure the job names array covers all the job IDs
201 static_assert(int(job_action_name.size()) == static_cast<int>(job_action_t::num_job_ids)
202 	, "disk-job-action and action-name-array mismatch");
203 
204 #if TORRENT_USE_ASSERTS || !defined TORRENT_DISABLE_LOGGING
205 
206 	std::array<char const*, 7> const piece_log_t::job_names =
207 	{{
208 		"flushing",
209 		"flush_expired",
210 		"try_flush_write_blocks",
211 		"try_flush_write_blocks2",
212 		"flush_range",
213 		"clear_outstanding_jobs",
214 		"set_outstanding_jobs",
215 	}};
216 
job_name(job_action_t const job)217 	char const* job_name(job_action_t const job)
218 	{
219 		int const j = static_cast<int>(job);
220 		if (j < 0 || j >= piece_log_t::last_job)
221 			return "unknown";
222 
223 		if (j < piece_log_t::flushing)
224 			return job_action_name[static_cast<std::size_t>(j)];
225 		return piece_log_t::job_names[static_cast<std::size_t>(j - piece_log_t::flushing)];
226 	}
227 
228 #endif // TORRENT_DISABLE_LOGGING
229 
230 #if TORRENT_USE_ASSERTS
231 
print_piece_log(aux::vector<piece_log_t> const & piece_log)232 	void print_piece_log(aux::vector<piece_log_t> const& piece_log)
233 	{
234 		for (int i = 0; i < int(piece_log.size()); ++i)
235 		{
236 			if (piece_log[i].block == -1)
237 			{
238 				std::printf("%d: %s\n", i, job_name(piece_log[i].job));
239 			}
240 			else
241 			{
242 				std::printf("%d: %s %d\n", i, job_name(piece_log[i].job), piece_log[i].block);
243 			}
244 		}
245 	}
246 
assert_print_piece(cached_piece_entry const * pe)247 	void assert_print_piece(cached_piece_entry const* pe)
248 	{
249 		static const char* const cache_state[] =
250 		{
251 			"write", "volatile-read", "read-lru", "read-lru-ghost", "read-lfu", "read-lfu-ghost"
252 		};
253 
254 		if (pe == nullptr)
255 		{
256 			assert_print("piece: nullptr\n");
257 		}
258 		else
259 		{
260 			assert_print("piece: %d\nrefcount: %d\npiece_refcount: %d\n"
261 				"num_blocks: %d\nhashing: %d\n\nhash: %p\nhash_offset: %d\n"
262 				"cache_state: (%d) %s\noutstanding_flush: %d\npiece: %d\n"
263 				"num_dirty: %d\nnum_blocks: %d\nblocks_in_piece: %d\n"
264 				"hashing_done: %d\nmarked_for_deletion: %d\nneed_readback: %d\n"
265 				"hash_passed: %d\nread_jobs: %d\njobs: %d\n"
266 				"piece_log:\n"
267 				, int(pe->piece), pe->refcount, pe->piece_refcount, int(pe->num_blocks)
268 				, int(pe->hashing), static_cast<void*>(pe->hash.get()), pe->hash ? pe->hash->offset : -1
269 				, int(pe->cache_state)
270 				, pe->cache_state < cached_piece_entry::num_lrus ? cache_state[pe->cache_state] : ""
271 				, int(pe->outstanding_flush), int(pe->piece), int(pe->num_dirty)
272 				, int(pe->num_blocks), int(pe->blocks_in_piece), int(pe->hashing_done)
273 				, int(pe->marked_for_eviction), int(pe->need_readback), pe->hash_passes
274 				, pe->read_jobs.size(), pe->jobs.size());
275 			bool first = true;
276 			for (auto const& log : pe->piece_log)
277 			{
278 				assert_print("%s %s (%d)", (first ? "" : ",")
279 					, job_name(log.job), log.block);
280 				first = false;
281 			}
282 		}
283 		assert_print("\n");
284 	}
285 
286 
287 #define TORRENT_PIECE_ASSERT(cond, piece) \
288 	do { if (!(cond)) { assert_print_piece(piece); assert_fail(#cond, __LINE__, __FILE__, __func__, nullptr); } } TORRENT_WHILE_0
289 
290 #else
291 #define TORRENT_PIECE_ASSERT(cond, piece) do {} TORRENT_WHILE_0
292 #endif
293 
cached_piece_entry()294 cached_piece_entry::cached_piece_entry()
295 	: num_dirty(0)
296 	, num_blocks(0)
297 	, hashing(0)
298 	, hashing_done(0)
299 	, marked_for_deletion(false)
300 	, need_readback(false)
301 	, cache_state(none)
302 	, outstanding_flush(0)
303 	, outstanding_read(0)
304 	, marked_for_eviction(false)
305 	, pinned(0)
306 {}
307 
~cached_piece_entry()308 cached_piece_entry::~cached_piece_entry()
309 {
310 	TORRENT_ASSERT(piece_refcount == 0);
311 	TORRENT_ASSERT(jobs.empty());
312 	TORRENT_ASSERT(read_jobs.empty());
313 #if TORRENT_USE_ASSERTS
314 	if (blocks)
315 	{
316 		for (int i = 0; i < blocks_in_piece; ++i)
317 		{
318 			TORRENT_ASSERT(!blocks[i].pending);
319 			TORRENT_ASSERT(blocks[i].refcount == 0);
320 			TORRENT_ASSERT(blocks[i].hashing_count == 0);
321 			TORRENT_ASSERT(blocks[i].flushing_count == 0);
322 		}
323 	}
324 	in_use = false;
325 #endif
326 }
327 
block_cache(io_service & ios,std::function<void ()> const & trigger_trim)328 block_cache::block_cache(io_service& ios
329 	, std::function<void()> const& trigger_trim)
330 	: disk_buffer_pool(ios, trigger_trim)
331 	, m_last_cache_op(cache_miss)
332 	, m_ghost_size(8)
333 	, m_max_volatile_blocks(100)
334 	, m_volatile_size(0)
335 	, m_read_cache_size(0)
336 	, m_write_cache_size(0)
337 	, m_send_buffer_blocks(0)
338 	, m_pinned_blocks(0)
339 {
340 }
341 
~block_cache()342 block_cache::~block_cache()
343 {
344 	std::vector<char*> bufs;
345 	for (auto const& pe : m_pieces)
346 	{
347 		if (!pe.blocks) continue;
348 
349 		int const num_blocks = int(pe.blocks_in_piece);
350 		for (int i = 0; i < num_blocks; ++i)
351 		{
352 			if (pe.blocks[i].buf == nullptr) continue;
353 			bufs.push_back(pe.blocks[i].buf);
354 		}
355 	}
356 	free_multiple_buffers(bufs);
357 }
358 
359 // returns:
360 // -1: not in cache
361 // -2: no memory
try_read(disk_io_job * j,buffer_allocator_interface & allocator,bool expect_no_fail)362 int block_cache::try_read(disk_io_job* j, buffer_allocator_interface& allocator
363 	, bool expect_no_fail)
364 {
365 	INVARIANT_CHECK;
366 
367 	cached_piece_entry* p = find_piece(j);
368 
369 	int ret = 0;
370 
371 	// if the piece cannot be found in the cache,
372 	// it's a cache miss
373 	TORRENT_ASSERT(!expect_no_fail || p != nullptr);
374 	if (p == nullptr) return -1;
375 
376 #if TORRENT_USE_ASSERTS
377 	p->piece_log.push_back(piece_log_t(j->action, j->d.io.offset / 0x4000));
378 #endif
379 	cache_hit(p, j->d.io.offset / default_block_size, bool(j->flags & disk_interface::volatile_read));
380 
381 	ret = copy_from_piece(p, j, allocator, expect_no_fail);
382 	if (ret < 0) return ret;
383 
384 	ret = j->d.io.buffer_size;
385 	return ret;
386 }
387 
bump_lru(cached_piece_entry * p)388 void block_cache::bump_lru(cached_piece_entry* p)
389 {
390 	// move to the top of the LRU list
391 	TORRENT_PIECE_ASSERT(p->cache_state == cached_piece_entry::write_lru, p);
392 	linked_list<cached_piece_entry>* lru_list = &m_lru[p->cache_state];
393 
394 	// move to the back (MRU) of the list
395 	lru_list->erase(p);
396 	lru_list->push_back(p);
397 	p->expire = aux::time_now();
398 }
399 
400 // this is called for pieces that we're reading from, when they
401 // are in the cache (including the ghost lists)
cache_hit(cached_piece_entry * p,int block,bool volatile_read)402 void block_cache::cache_hit(cached_piece_entry* p, int block, bool volatile_read)
403 {
404 // this can be pretty expensive
405 //	INVARIANT_CHECK;
406 
407 	TORRENT_ASSERT(p);
408 	TORRENT_ASSERT(p->in_use);
409 
410 	// move the piece into this queue. Whenever we have a cache
411 	// hit, we move the piece into the lru2 queue (i.e. the most
412 	// frequently used piece).
413 	std::uint16_t target_queue = cached_piece_entry::read_lru2;
414 
415 	if (p->blocks[block].cache_hit == 0)
416 	{
417 		// if it's not a duplicate hit and the piece isn't in
418 		// any of the ghost lists, ignore it
419 		if (p->cache_state == cached_piece_entry::read_lru1
420 			|| p->cache_state == cached_piece_entry::read_lru2
421 			|| p->cache_state == cached_piece_entry::write_lru
422 			|| p->cache_state == cached_piece_entry::volatile_read_lru)
423 			return;
424 
425 		if (p->cache_state == cached_piece_entry::read_lru1_ghost)
426 			target_queue = cached_piece_entry::read_lru1;
427 	}
428 
429 	if (p->cache_state == cached_piece_entry::volatile_read_lru)
430 	{
431 		// a volatile read hit on a volatile piece doesn't do anything
432 		if (volatile_read) return;
433 
434 		// however, if this is a proper read on a volatile piece
435 		// we need to promote it to lru1
436 		target_queue = cached_piece_entry::read_lru1;
437 	}
438 
439 	// if we have this piece anywhere in L1 or L2, it's a "hit"
440 	// and it should be bumped to the highest priority in L2
441 	// i.e. "frequently used"
442 	if (p->cache_state < cached_piece_entry::read_lru1
443 		|| p->cache_state > cached_piece_entry::read_lru2_ghost)
444 		return;
445 
446 	// if we got a cache hit in a ghost list, that indicates the proper
447 	// list is too small. Record which ghost list we got the hit in and
448 	// it will be used to determine which end of the cache we'll evict
449 	// from, next time we need to reclaim blocks
450 	if (p->cache_state == cached_piece_entry::read_lru1_ghost)
451 	{
452 		m_last_cache_op = ghost_hit_lru1;
453 	}
454 	else if (p->cache_state == cached_piece_entry::read_lru2_ghost)
455 	{
456 		m_last_cache_op = ghost_hit_lru2;
457 	}
458 
459 	// move into L2 (frequently used)
460 	m_lru[p->cache_state].erase(p);
461 	m_lru[target_queue].push_back(p);
462 	p->cache_state = target_queue;
463 	p->expire = aux::time_now();
464 #if TORRENT_USE_ASSERTS
465 	switch (p->cache_state)
466 	{
467 		case cached_piece_entry::write_lru:
468 		case cached_piece_entry::volatile_read_lru:
469 		case cached_piece_entry::read_lru1:
470 		case cached_piece_entry::read_lru2:
471 			TORRENT_ASSERT(p->in_storage == true);
472 			break;
473 		default:
474 			TORRENT_ASSERT(p->in_storage == false);
475 			break;
476 	}
477 #endif
478 }
479 
480 // this is used to move pieces primarily from the write cache
481 // to the read cache. Technically it can move from read to write
482 // cache as well, it's unclear if that ever happens though
update_cache_state(cached_piece_entry * p)483 void block_cache::update_cache_state(cached_piece_entry* p)
484 {
485 	int state = p->cache_state;
486 	std::uint16_t desired_state = p->cache_state;
487 	if (p->num_dirty > 0 || p->hash)
488 		desired_state = cached_piece_entry::write_lru;
489 	else if (p->cache_state == cached_piece_entry::write_lru)
490 		desired_state = cached_piece_entry::read_lru1;
491 
492 	if (desired_state == state) return;
493 
494 	TORRENT_PIECE_ASSERT(state < cached_piece_entry::num_lrus, p);
495 	TORRENT_PIECE_ASSERT(desired_state < cached_piece_entry::num_lrus, p);
496 	linked_list<cached_piece_entry>* src = &m_lru[state];
497 	linked_list<cached_piece_entry>* dst = &m_lru[desired_state];
498 
499 	src->erase(p);
500 	dst->push_back(p);
501 	p->expire = aux::time_now();
502 	p->cache_state = desired_state;
503 #if TORRENT_USE_ASSERTS
504 	switch (p->cache_state)
505 	{
506 		case cached_piece_entry::write_lru:
507 		case cached_piece_entry::volatile_read_lru:
508 		case cached_piece_entry::read_lru1:
509 		case cached_piece_entry::read_lru2:
510 			TORRENT_ASSERT(p->in_storage == true);
511 			break;
512 		default:
513 			TORRENT_ASSERT(p->in_storage == false);
514 			break;
515 	}
516 #endif
517 }
518 
try_evict_one_volatile()519 void block_cache::try_evict_one_volatile()
520 {
521 	INVARIANT_CHECK;
522 
523 	DLOG(stderr, "[%p] try_evict_one_volatile\n", static_cast<void*>(this));
524 
525 	if (m_volatile_size < m_max_volatile_blocks) return;
526 
527 	linked_list<cached_piece_entry>* piece_list = &m_lru[cached_piece_entry::volatile_read_lru];
528 
529 	for (list_iterator<cached_piece_entry> i = piece_list->iterate(); i.get();)
530 	{
531 		cached_piece_entry* pe = i.get();
532 		TORRENT_PIECE_ASSERT(pe->in_use, pe);
533 		i.next();
534 
535 		if (pe->ok_to_evict() && pe->num_blocks == 0)
536 		{
537 #if TORRENT_USE_INVARIANT_CHECKS
538 			for (int j = 0; j < pe->blocks_in_piece; ++j)
539 				TORRENT_PIECE_ASSERT(pe->blocks[j].buf == nullptr, pe);
540 #endif
541 			TORRENT_PIECE_ASSERT(pe->refcount == 0, pe);
542 			move_to_ghost(pe);
543 			continue;
544 		}
545 
546 		TORRENT_PIECE_ASSERT(pe->num_dirty == 0, pe);
547 
548 		// someone else is using this piece
549 		if (pe->refcount > 0) continue;
550 
551 		// some blocks are pinned in this piece, skip it
552 		if (pe->pinned > 0) continue;
553 
554 		TORRENT_ALLOCA(to_delete, char*, pe->blocks_in_piece);
555 		int num_to_delete = 0;
556 
557 		// go through the blocks and evict the ones that are not dirty and not
558 		// referenced
559 		for (int j = 0; j < pe->blocks_in_piece; ++j)
560 		{
561 			cached_block_entry& b = pe->blocks[j];
562 
563 			TORRENT_PIECE_ASSERT(b.dirty == false, pe);
564 			TORRENT_PIECE_ASSERT(b.pending == false, pe);
565 
566 			if (b.buf == nullptr || b.refcount > 0 || b.dirty || b.pending) continue;
567 
568 			to_delete[num_to_delete++] = b.buf;
569 			b.buf = nullptr;
570 			TORRENT_PIECE_ASSERT(pe->num_blocks > 0, pe);
571 			--pe->num_blocks;
572 			TORRENT_PIECE_ASSERT(m_read_cache_size > 0, pe);
573 			--m_read_cache_size;
574 			TORRENT_PIECE_ASSERT(m_volatile_size > 0, pe);
575 			--m_volatile_size;
576 		}
577 
578 		if (pe->ok_to_evict() && pe->num_blocks == 0)
579 		{
580 #if TORRENT_USE_INVARIANT_CHECKS
581 			for (int j = 0; j < pe->blocks_in_piece; ++j)
582 				TORRENT_PIECE_ASSERT(pe->blocks[j].buf == nullptr, pe);
583 #endif
584 			move_to_ghost(pe);
585 		}
586 
587 		if (num_to_delete == 0) return;
588 
589 		DLOG(stderr, "[%p]    removed %d blocks\n", static_cast<void*>(this)
590 			, num_to_delete);
591 
592 		free_multiple_buffers(to_delete.first(num_to_delete));
593 		return;
594 	}
595 }
596 
allocate_piece(disk_io_job const * j,std::uint16_t const cache_state)597 cached_piece_entry* block_cache::allocate_piece(disk_io_job const* j, std::uint16_t const cache_state)
598 {
599 #ifdef TORRENT_EXPENSIVE_INVARIANT_CHECKS
600 	INVARIANT_CHECK;
601 #endif
602 
603 	TORRENT_ASSERT(cache_state < cached_piece_entry::num_lrus);
604 
605 	// we're assuming we're not allocating a ghost piece
606 	// a bit further down
607 	TORRENT_ASSERT(cache_state != cached_piece_entry::read_lru1_ghost
608 		&& cache_state != cached_piece_entry::read_lru2_ghost);
609 
610 	cached_piece_entry* p = find_piece(j);
611 	if (p == nullptr)
612 	{
613 		int const piece_size = j->storage->files().piece_size(j->piece);
614 		int const blocks_in_piece = (piece_size + default_block_size - 1) / default_block_size;
615 
616 		cached_piece_entry pe;
617 		pe.piece = j->piece;
618 		pe.storage = j->storage;
619 		pe.expire = aux::time_now();
620 		pe.blocks_in_piece = aux::numeric_cast<std::uint16_t>(blocks_in_piece);
621 
622 		pe.blocks.reset(new (std::nothrow) cached_block_entry[std::size_t(blocks_in_piece)]);
623 		if (!pe.blocks) return nullptr;
624 		p = const_cast<cached_piece_entry*>(&*m_pieces.insert(std::move(pe)).first);
625 
626 		j->storage->add_piece(p);
627 		p->cache_state = cache_state;
628 
629 		TORRENT_PIECE_ASSERT(p->cache_state < cached_piece_entry::num_lrus, p);
630 		linked_list<cached_piece_entry>* lru_list = &m_lru[p->cache_state];
631 		lru_list->push_back(p);
632 
633 		// this piece is part of the ARC cache (as opposed to
634 		// the write cache). Allocating a new read piece indicates
635 		// that we just got a cache miss. Record this to determine
636 		// which end to evict blocks from next time we need to
637 		// evict blocks
638 		if (cache_state == cached_piece_entry::read_lru1)
639 			m_last_cache_op = cache_miss;
640 
641 #if TORRENT_USE_ASSERTS
642 		switch (p->cache_state)
643 		{
644 			case cached_piece_entry::write_lru:
645 			case cached_piece_entry::volatile_read_lru:
646 			case cached_piece_entry::read_lru1:
647 			case cached_piece_entry::read_lru2:
648 				TORRENT_ASSERT(p->in_storage == true);
649 				break;
650 			default:
651 				TORRENT_ASSERT(p->in_storage == false);
652 				break;
653 		}
654 #endif
655 	}
656 	else
657 	{
658 		TORRENT_PIECE_ASSERT(p->in_use, p);
659 
660 		// we want to retain the piece now
661 		p->marked_for_eviction = false;
662 
663 		// only allow changing the cache state downwards. i.e. turn a ghost
664 		// piece into a non-ghost, or a read piece into a write piece
665 		if (p->cache_state > cache_state)
666 		{
667 			// this can happen for instance if a piece fails the hash check
668 			// first it's in the write cache, then it completes and is moved
669 			// into the read cache, but fails and is cleared (into the ghost list)
670 			// then we want to add new dirty blocks to it and we need to move
671 			// it back into the write cache
672 			m_lru[p->cache_state].erase(p);
673 			p->cache_state = cache_state;
674 			m_lru[p->cache_state].push_back(p);
675 			p->expire = aux::time_now();
676 #if TORRENT_USE_ASSERTS
677 			switch (p->cache_state)
678 			{
679 				case cached_piece_entry::write_lru:
680 				case cached_piece_entry::volatile_read_lru:
681 				case cached_piece_entry::read_lru1:
682 				case cached_piece_entry::read_lru2:
683 					TORRENT_ASSERT(p->in_storage == true);
684 					break;
685 				default:
686 					TORRENT_ASSERT(p->in_storage == false);
687 					break;
688 			}
689 #endif
690 		}
691 	}
692 
693 	return p;
694 }
695 
add_dirty_block(disk_io_job * j,bool const add_hasher)696 cached_piece_entry* block_cache::add_dirty_block(disk_io_job* j, bool const add_hasher)
697 {
698 #ifdef TORRENT_EXPENSIVE_INVARIANT_CHECKS
699 	INVARIANT_CHECK;
700 #endif
701 
702 	TORRENT_ASSERT(boost::get<disk_buffer_holder>(j->argument));
703 	TORRENT_ASSERT(m_write_cache_size + m_read_cache_size + 1 <= in_use());
704 
705 	cached_piece_entry* pe = allocate_piece(j, cached_piece_entry::write_lru);
706 	TORRENT_ASSERT(pe);
707 	if (pe == nullptr) return pe;
708 
709 	TORRENT_PIECE_ASSERT(pe->in_use, pe);
710 
711 	int block = j->d.io.offset / default_block_size;
712 	TORRENT_ASSERT((j->d.io.offset % default_block_size) == 0);
713 
714 	// we should never add a new dirty block on a piece
715 	// that has checked the hash. Before we add it, the
716 	// piece need to be cleared (with async_clear_piece)
717 	TORRENT_PIECE_ASSERT(pe->hashing_done == 0, pe);
718 
719 	// this only evicts read blocks
720 
721 	int evict = num_to_evict(1);
722 	if (evict > 0) try_evict_blocks(evict, pe);
723 
724 	TORRENT_PIECE_ASSERT(block < pe->blocks_in_piece, pe);
725 	TORRENT_PIECE_ASSERT(j->piece == pe->piece, pe);
726 	TORRENT_PIECE_ASSERT(!pe->marked_for_eviction, pe);
727 
728 	TORRENT_PIECE_ASSERT(pe->blocks[block].refcount == 0, pe);
729 
730 	cached_block_entry& b = pe->blocks[block];
731 
732 	TORRENT_PIECE_ASSERT(b.buf != boost::get<disk_buffer_holder>(j->argument).get(), pe);
733 
734 	// we might have a left-over read block from
735 	// hash checking
736 	// we might also have a previous dirty block which
737 	// we're still waiting for to be written
738 	if (b.buf != nullptr && b.buf != boost::get<disk_buffer_holder>(j->argument).get())
739 	{
740 		TORRENT_PIECE_ASSERT(b.refcount == 0 && !b.pending, pe);
741 		free_block(pe, block);
742 		TORRENT_PIECE_ASSERT(b.dirty == 0, pe);
743 	}
744 
745 	b.buf = boost::get<disk_buffer_holder>(j->argument).release();
746 
747 	b.dirty = true;
748 	++pe->num_blocks;
749 	++pe->num_dirty;
750 	++m_write_cache_size;
751 	TORRENT_PIECE_ASSERT(j->piece == pe->piece, pe);
752 	TORRENT_PIECE_ASSERT(j->flags & disk_io_job::in_progress, pe);
753 	TORRENT_PIECE_ASSERT(j->piece == pe->piece, pe);
754 	pe->jobs.push_back(j);
755 
756 	if (block == 0 && !pe->hash && pe->hashing_done == false && add_hasher)
757 		pe->hash.reset(new partial_hash);
758 
759 	update_cache_state(pe);
760 
761 	bump_lru(pe);
762 
763 	return pe;
764 }
765 
766 // flushed is an array of num_flushed integers. Each integer is the block index
767 // that was flushed. This function marks those blocks as not pending and not
768 // dirty. It also adjusts its understanding of the read vs. write cache size
769 // (since these blocks now are part of the read cache) the refcounts of the
770 // blocks are also decremented by this function. They are expected to have been
771 // incremented by the caller.
blocks_flushed(cached_piece_entry * pe,int const * flushed,int const num_flushed)772 bool block_cache::blocks_flushed(cached_piece_entry* pe, int const* flushed, int const num_flushed)
773 {
774 	TORRENT_PIECE_ASSERT(pe->in_use, pe);
775 
776 	for (int i = 0; i < num_flushed; ++i)
777 	{
778 		int block = flushed[i];
779 		TORRENT_PIECE_ASSERT(block >= 0, pe);
780 		TORRENT_PIECE_ASSERT(block < pe->blocks_in_piece, pe);
781 		TORRENT_PIECE_ASSERT(pe->blocks[block].dirty, pe);
782 		TORRENT_PIECE_ASSERT(pe->blocks[block].pending, pe);
783 		pe->blocks[block].pending = false;
784 		// it's important to mark it as non-dirty before decrementing the
785 		// refcount because the buffer may be marked as discardable/volatile it
786 		// this is the last reference to it
787 		pe->blocks[block].dirty = false;
788 		dec_block_refcount(pe, block, block_cache::ref_flushing);
789 	}
790 
791 	m_write_cache_size -= num_flushed;
792 	m_read_cache_size += num_flushed;
793 	pe->num_dirty -= num_flushed;
794 
795 	update_cache_state(pe);
796 	return maybe_free_piece(pe);
797 }
798 
all_pieces() const799 std::pair<block_cache::const_iterator, block_cache::const_iterator> block_cache::all_pieces() const
800 {
801 	return std::make_pair(m_pieces.begin(), m_pieces.end());
802 }
803 
free_block(cached_piece_entry * pe,int block)804 void block_cache::free_block(cached_piece_entry* pe, int block)
805 {
806 	TORRENT_ASSERT(pe != nullptr);
807 	TORRENT_PIECE_ASSERT(pe->in_use, pe);
808 	TORRENT_PIECE_ASSERT(block < pe->blocks_in_piece, pe);
809 	TORRENT_PIECE_ASSERT(block >= 0, pe);
810 
811 	cached_block_entry& b = pe->blocks[block];
812 
813 	TORRENT_PIECE_ASSERT(b.refcount == 0, pe);
814 	TORRENT_PIECE_ASSERT(!b.pending, pe);
815 	TORRENT_PIECE_ASSERT(b.buf, pe);
816 
817 	if (b.dirty)
818 	{
819 		--pe->num_dirty;
820 		b.dirty = false;
821 		TORRENT_PIECE_ASSERT(m_write_cache_size > 0, pe);
822 		--m_write_cache_size;
823 	}
824 	else
825 	{
826 		TORRENT_PIECE_ASSERT(m_read_cache_size > 0, pe);
827 		--m_read_cache_size;
828 		if (pe->cache_state == cached_piece_entry::volatile_read_lru)
829 		{
830 			--m_volatile_size;
831 		}
832 	}
833 
834 
835 	TORRENT_PIECE_ASSERT(pe->num_blocks > 0, pe);
836 	--pe->num_blocks;
837 	free_buffer(b.buf);
838 	b.buf = nullptr;
839 }
840 
evict_piece(cached_piece_entry * pe,tailqueue<disk_io_job> & jobs,eviction_mode const mode)841 bool block_cache::evict_piece(cached_piece_entry* pe, tailqueue<disk_io_job>& jobs
842 	, eviction_mode const mode)
843 {
844 	INVARIANT_CHECK;
845 
846 	TORRENT_PIECE_ASSERT(pe->in_use, pe);
847 
848 	TORRENT_ALLOCA(to_delete, char*, pe->blocks_in_piece);
849 	int num_to_delete = 0;
850 	for (int i = 0; i < pe->blocks_in_piece; ++i)
851 	{
852 		if (pe->blocks[i].buf == nullptr || pe->blocks[i].refcount > 0) continue;
853 		TORRENT_PIECE_ASSERT(!pe->blocks[i].pending, pe);
854 		TORRENT_PIECE_ASSERT(pe->blocks[i].buf != nullptr, pe);
855 		TORRENT_PIECE_ASSERT(num_to_delete < pe->blocks_in_piece, pe);
856 		to_delete[num_to_delete++] = pe->blocks[i].buf;
857 		pe->blocks[i].buf = nullptr;
858 		TORRENT_PIECE_ASSERT(pe->num_blocks > 0, pe);
859 		--pe->num_blocks;
860 		if (!pe->blocks[i].dirty)
861 		{
862 			TORRENT_PIECE_ASSERT(m_read_cache_size > 0, pe);
863 			--m_read_cache_size;
864 		}
865 		else
866 		{
867 			TORRENT_PIECE_ASSERT(pe->num_dirty > 0, pe);
868 			--pe->num_dirty;
869 			pe->blocks[i].dirty = false;
870 			TORRENT_PIECE_ASSERT(m_write_cache_size > 0, pe);
871 			--m_write_cache_size;
872 		}
873 		if (pe->num_blocks == 0) break;
874 	}
875 
876 	if (pe->cache_state == cached_piece_entry::volatile_read_lru)
877 	{
878 		m_volatile_size -= num_to_delete;
879 	}
880 
881 	if (num_to_delete) free_multiple_buffers(to_delete.first(num_to_delete));
882 
883 	if (pe->ok_to_evict(true) && pe->num_blocks == 0)
884 	{
885 		pe->hash.reset();
886 
887 		// append will move the items from pe->jobs onto the end of jobs
888 		jobs.append(pe->jobs);
889 		TORRENT_ASSERT(pe->jobs.empty());
890 
891 		if (mode == allow_ghost
892 			&& (pe->cache_state == cached_piece_entry::read_lru1_ghost
893 			|| pe->cache_state == cached_piece_entry::read_lru2_ghost))
894 			return true;
895 
896 		if (mode == disallow_ghost
897 			|| pe->cache_state == cached_piece_entry::write_lru
898 			|| pe->cache_state == cached_piece_entry::volatile_read_lru)
899 			erase_piece(pe);
900 		else
901 			move_to_ghost(pe);
902 		return true;
903 	}
904 
905 	return false;
906 }
907 
mark_for_eviction(cached_piece_entry * p,eviction_mode const mode)908 void block_cache::mark_for_eviction(cached_piece_entry* p
909 	, eviction_mode const mode)
910 {
911 	INVARIANT_CHECK;
912 
913 	DLOG(stderr, "[%p] block_cache mark-for-deletion "
914 		"piece: %d\n", static_cast<void*>(this), int(p->piece));
915 
916 	TORRENT_PIECE_ASSERT(p->jobs.empty(), p);
917 	tailqueue<disk_io_job> jobs;
918 	if (!evict_piece(p, jobs, mode))
919 	{
920 		p->marked_for_eviction = true;
921 		p->marked_for_deletion = mode == disallow_ghost;
922 	}
923 }
924 
erase_piece(cached_piece_entry * pe)925 void block_cache::erase_piece(cached_piece_entry* pe)
926 {
927 	INVARIANT_CHECK;
928 
929 	TORRENT_PIECE_ASSERT(pe->ok_to_evict(), pe);
930 	TORRENT_PIECE_ASSERT(pe->cache_state < cached_piece_entry::num_lrus, pe);
931 	TORRENT_PIECE_ASSERT(pe->jobs.empty(), pe);
932 	linked_list<cached_piece_entry>* lru_list = &m_lru[pe->cache_state];
933 	if (pe->hash)
934 	{
935 		TORRENT_PIECE_ASSERT(pe->hash->offset == 0, pe);
936 		pe->hash.reset();
937 	}
938 	pe->storage->remove_piece(pe);
939 	lru_list->erase(pe);
940 	m_pieces.erase(*pe);
941 }
942 
943 // this only evicts read blocks. For write blocks, see
944 // try_flush_write_blocks in disk_io_thread.cpp
try_evict_blocks(int num,cached_piece_entry * ignore)945 int block_cache::try_evict_blocks(int num, cached_piece_entry* ignore)
946 {
947 	INVARIANT_CHECK;
948 
949 	if (num <= 0) return 0;
950 
951 	DLOG(stderr, "[%p] try_evict_blocks: %d\n", static_cast<void*>(this), num);
952 
953 	TORRENT_ALLOCA(to_delete, char*, num);
954 	int num_to_delete = 0;
955 
956 	// There are two ends of the ARC cache we can evict from. There's L1 and L2.
957 	// The last cache operation determines which end we'll evict from. If we go
958 	// through the entire list from the preferred end, and still need to evict
959 	// more blocks, we'll go to the other end and start evicting from there. The
960 	// lru_list is an array of two lists, these are the two ends to evict from,
961 	// ordered by preference.
962 
963 	linked_list<cached_piece_entry>* lru_list[3];
964 
965 	// however, before we consider any of the proper LRU lists, we evict pieces
966 	// from the volatile list. These are low priority pieces that were
967 	// specifically marked as to not survive long in the cache. These are the
968 	// first pieces to go when evicting
969 	lru_list[0] = &m_lru[cached_piece_entry::volatile_read_lru];
970 
971 	if (m_last_cache_op == cache_miss)
972 	{
973 		// when there was a cache miss, evict from the largest list, to tend to
974 		// keep the lists of equal size when we don't know which one is
975 		// performing better
976 		if (m_lru[cached_piece_entry::read_lru2].size()
977 			> m_lru[cached_piece_entry::read_lru1].size())
978 		{
979 			lru_list[1] = &m_lru[cached_piece_entry::read_lru2];
980 			lru_list[2] = &m_lru[cached_piece_entry::read_lru1];
981 		}
982 		else
983 		{
984 			lru_list[1] = &m_lru[cached_piece_entry::read_lru1];
985 			lru_list[2] = &m_lru[cached_piece_entry::read_lru2];
986 		}
987 	}
988 	else if (m_last_cache_op == ghost_hit_lru1)
989 	{
990 		// when we insert new items or move things from L1 to L2
991 		// evict blocks from L2
992 		lru_list[1] = &m_lru[cached_piece_entry::read_lru2];
993 		lru_list[2] = &m_lru[cached_piece_entry::read_lru1];
994 	}
995 	else
996 	{
997 		// when we get cache hits in L2 evict from L1
998 		lru_list[1] = &m_lru[cached_piece_entry::read_lru1];
999 		lru_list[2] = &m_lru[cached_piece_entry::read_lru2];
1000 	}
1001 
1002 	// end refers to which end of the ARC cache we're evicting
1003 	// from. The LFU or the LRU end
1004 	for (int end = 0; num > 0 && end < 3; ++end)
1005 	{
1006 		// iterate over all blocks in order of last being used (oldest first) and
1007 		// as long as we still have blocks to evict TODO: it's somewhat expensive
1008 		// to iterate over this linked list. Presumably because of the random
1009 		// access of memory. It would be nice if pieces with no evictable blocks
1010 		// weren't in this list
1011 		for (auto i = lru_list[end]->iterate(); i.get() && num > 0;)
1012 		{
1013 			cached_piece_entry* pe = i.get();
1014 			TORRENT_PIECE_ASSERT(pe->in_use, pe);
1015 			i.next();
1016 
1017 			if (pe == ignore)
1018 				continue;
1019 
1020 			if (pe->ok_to_evict() && pe->num_blocks == 0)
1021 			{
1022 #if TORRENT_USE_INVARIANT_CHECKS
1023 				for (int j = 0; j < pe->blocks_in_piece; ++j)
1024 					TORRENT_PIECE_ASSERT(pe->blocks[j].buf == nullptr, pe);
1025 #endif
1026 				TORRENT_PIECE_ASSERT(pe->refcount == 0, pe);
1027 				move_to_ghost(pe);
1028 				continue;
1029 			}
1030 
1031 			TORRENT_PIECE_ASSERT(pe->num_dirty == 0, pe);
1032 
1033 			// all blocks are pinned in this piece, skip it
1034 			if (pe->num_blocks <= pe->pinned) continue;
1035 
1036 			// go through the blocks and evict the ones that are not dirty and not
1037 			// referenced
1038 			int removed = 0;
1039 			for (int j = 0; j < pe->blocks_in_piece && num > 0; ++j)
1040 			{
1041 				cached_block_entry& b = pe->blocks[j];
1042 
1043 				if (b.buf == nullptr || b.refcount > 0 || b.dirty || b.pending) continue;
1044 
1045 				to_delete[num_to_delete++] = b.buf;
1046 				b.buf = nullptr;
1047 				TORRENT_PIECE_ASSERT(pe->num_blocks > 0, pe);
1048 				--pe->num_blocks;
1049 				++removed;
1050 				--num;
1051 			}
1052 
1053 			TORRENT_PIECE_ASSERT(m_read_cache_size >= removed, pe);
1054 			m_read_cache_size -= removed;
1055 			if (pe->cache_state == cached_piece_entry::volatile_read_lru)
1056 			{
1057 				m_volatile_size -= removed;
1058 			}
1059 
1060 			if (pe->ok_to_evict() && pe->num_blocks == 0)
1061 			{
1062 #if TORRENT_USE_INVARIANT_CHECKS
1063 				for (int j = 0; j < pe->blocks_in_piece; ++j)
1064 					TORRENT_PIECE_ASSERT(pe->blocks[j].buf == nullptr, pe);
1065 #endif
1066 				move_to_ghost(pe);
1067 			}
1068 		}
1069 	}
1070 
1071 	// if we can't evict enough blocks from the read cache, also look at write
1072 	// cache pieces for blocks that have already been written to disk and can be
1073 	// evicted the first pass, we only evict blocks that have been hashed, the
1074 	// second pass we flush anything this is potentially a very expensive
1075 	// operation, since we're likely to have iterate every single block in the
1076 	// cache, and we might not get to evict anything.
1077 
1078 	// TODO: this should probably only be done every n:th time
1079 	if (num > 0 && m_read_cache_size > m_pinned_blocks)
1080 	{
1081 		for (int pass = 0; pass < 2 && num > 0; ++pass)
1082 		{
1083 			for (auto i = m_lru[cached_piece_entry::write_lru].iterate(); i.get() && num > 0;)
1084 			{
1085 				cached_piece_entry* pe = i.get();
1086 				TORRENT_PIECE_ASSERT(pe->in_use, pe);
1087 
1088 				i.next();
1089 
1090 				if (pe == ignore)
1091 					continue;
1092 
1093 				if (pe->ok_to_evict() && pe->num_blocks == 0)
1094 				{
1095 #if TORRENT_USE_INVARIANT_CHECKS
1096 					for (int j = 0; j < pe->blocks_in_piece; ++j)
1097 						TORRENT_PIECE_ASSERT(pe->blocks[j].buf == nullptr, pe);
1098 #endif
1099 					TORRENT_PIECE_ASSERT(pe->refcount == 0, pe);
1100 					erase_piece(pe);
1101 					continue;
1102 				}
1103 
1104 				// all blocks in this piece are dirty
1105 				if (pe->num_dirty == pe->num_blocks)
1106 					continue;
1107 
1108 				int end = pe->blocks_in_piece;
1109 
1110 				// the first pass, only evict blocks that have been
1111 				// hashed
1112 				if (pass == 0 && pe->hash)
1113 					end = pe->hash->offset / default_block_size;
1114 
1115 				// go through the blocks and evict the ones
1116 				// that are not dirty and not referenced
1117 				int removed = 0;
1118 				for (int j = 0; j < end && num > 0; ++j)
1119 				{
1120 					cached_block_entry& b = pe->blocks[j];
1121 
1122 					if (b.buf == nullptr || b.refcount > 0 || b.dirty || b.pending) continue;
1123 
1124 					to_delete[num_to_delete++] = b.buf;
1125 					b.buf = nullptr;
1126 					TORRENT_PIECE_ASSERT(pe->num_blocks > 0, pe);
1127 					--pe->num_blocks;
1128 					++removed;
1129 					--num;
1130 				}
1131 
1132 				TORRENT_PIECE_ASSERT(m_read_cache_size >= removed, pe);
1133 				m_read_cache_size -= removed;
1134 				if (pe->cache_state == cached_piece_entry::volatile_read_lru)
1135 				{
1136 					m_volatile_size -= removed;
1137 				}
1138 
1139 				if (pe->ok_to_evict() && pe->num_blocks == 0)
1140 				{
1141 #if TORRENT_USE_INVARIANT_CHECKS
1142 					for (int j = 0; j < pe->blocks_in_piece; ++j)
1143 						TORRENT_PIECE_ASSERT(pe->blocks[j].buf == nullptr, pe);
1144 #endif
1145 					erase_piece(pe);
1146 				}
1147 			}
1148 		}
1149 	}
1150 
1151 	if (num_to_delete == 0) return num;
1152 
1153 	DLOG(stderr, "[%p]    removed %d blocks\n", static_cast<void*>(this)
1154 		, num_to_delete);
1155 
1156 	free_multiple_buffers(to_delete.first(num_to_delete));
1157 
1158 	return num;
1159 }
1160 
clear(tailqueue<disk_io_job> & jobs)1161 void block_cache::clear(tailqueue<disk_io_job>& jobs)
1162 {
1163 	INVARIANT_CHECK;
1164 
1165 	// this holds all the block buffers we want to free
1166 	// at the end
1167 	std::vector<char*> bufs;
1168 
1169 	for (auto const& p : m_pieces)
1170 	{
1171 		auto& pe = const_cast<cached_piece_entry&>(p);
1172 #if TORRENT_USE_ASSERTS
1173 		for (tailqueue_iterator<disk_io_job> i = pe.jobs.iterate(); i.get(); i.next())
1174 			TORRENT_PIECE_ASSERT((static_cast<disk_io_job const*>(i.get()))->piece == pe.piece, &pe);
1175 		for (tailqueue_iterator<disk_io_job> i = pe.read_jobs.iterate(); i.get(); i.next())
1176 			TORRENT_PIECE_ASSERT((static_cast<disk_io_job const*>(i.get()))->piece == pe.piece, &pe);
1177 #endif
1178 		// this also removes the jobs from the piece
1179 		jobs.append(pe.jobs);
1180 		jobs.append(pe.read_jobs);
1181 
1182 		drain_piece_bufs(pe, bufs);
1183 	}
1184 
1185 	if (!bufs.empty()) free_multiple_buffers(bufs);
1186 
1187 	// clear lru lists
1188 	for (auto& l : m_lru) l.get_all();
1189 
1190 	// it's not ok to erase pieces with a refcount > 0
1191 	// since we're cancelling all jobs though, it shouldn't be too bad
1192 	// to let the jobs already running complete.
1193 	for (auto i = m_pieces.begin(); i != m_pieces.end();)
1194 	{
1195 		if (i->refcount == 0 && i->piece_refcount == 0)
1196 		{
1197 			i = m_pieces.erase(i);
1198 		}
1199 		else
1200 		{
1201 			++i;
1202 		}
1203 	}
1204 }
1205 
move_to_ghost(cached_piece_entry * pe)1206 void block_cache::move_to_ghost(cached_piece_entry* pe)
1207 {
1208 	TORRENT_PIECE_ASSERT(pe->refcount == 0, pe);
1209 	TORRENT_PIECE_ASSERT(pe->piece_refcount == 0, pe);
1210 	TORRENT_PIECE_ASSERT(pe->num_blocks == 0, pe);
1211 	TORRENT_PIECE_ASSERT(pe->in_use, pe);
1212 
1213 	if (pe->cache_state == cached_piece_entry::volatile_read_lru)
1214 	{
1215 		erase_piece(pe);
1216 		return;
1217 	}
1218 
1219 	TORRENT_PIECE_ASSERT(pe->cache_state == cached_piece_entry::read_lru1
1220 		|| pe->cache_state == cached_piece_entry::read_lru2, pe);
1221 
1222 	// if the piece is in L1 or L2, move it into the ghost list
1223 	// i.e. recently evicted
1224 	if (pe->cache_state != cached_piece_entry::read_lru1
1225 		&& pe->cache_state != cached_piece_entry::read_lru2)
1226 		return;
1227 
1228 	// if the ghost list is growing too big, remove the oldest entry
1229 	linked_list<cached_piece_entry>* ghost_list = &m_lru[pe->cache_state + 1];
1230 	while (ghost_list->size() >= m_ghost_size)
1231 	{
1232 		cached_piece_entry* p = ghost_list->front();
1233 		TORRENT_PIECE_ASSERT(p != pe, p);
1234 		TORRENT_PIECE_ASSERT(p->num_blocks == 0, p);
1235 		TORRENT_PIECE_ASSERT(p->refcount == 0, p);
1236 		TORRENT_PIECE_ASSERT(p->piece_refcount == 0, p);
1237 		erase_piece(p);
1238 	}
1239 
1240 	m_lru[pe->cache_state].erase(pe);
1241 	pe->cache_state += 1;
1242 	ghost_list->push_back(pe);
1243 }
1244 
pad_job(disk_io_job const * j,int const blocks_in_piece,int const read_ahead) const1245 int block_cache::pad_job(disk_io_job const* j, int const blocks_in_piece
1246 	, int const read_ahead) const
1247 {
1248 	int block_offset = j->d.io.offset & (default_block_size - 1);
1249 	int start = j->d.io.offset / default_block_size;
1250 	int end = block_offset > 0 && (read_ahead > default_block_size - block_offset) ? start + 2 : start + 1;
1251 
1252 	// take the read-ahead into account
1253 	// make sure to not overflow in this case
1254 	if (read_ahead == INT_MAX) end = blocks_in_piece;
1255 	else end = std::min(blocks_in_piece, std::max(start + read_ahead, end));
1256 
1257 	return end - start;
1258 }
1259 
insert_blocks(cached_piece_entry * pe,int block,span<iovec_t const> iov,disk_io_job * j,int const flags)1260 void block_cache::insert_blocks(cached_piece_entry* pe, int block, span<iovec_t const> iov
1261 	, disk_io_job* j, int const flags)
1262 {
1263 #ifdef TORRENT_EXPENSIVE_INVARIANT_CHECKS
1264 	INVARIANT_CHECK;
1265 #endif
1266 
1267 	TORRENT_ASSERT(pe);
1268 	TORRENT_ASSERT(pe->in_use);
1269 	TORRENT_PIECE_ASSERT(!iov.empty(), pe);
1270 
1271 	cache_hit(pe, j->d.io.offset / default_block_size, bool(j->flags & disk_interface::volatile_read));
1272 
1273 	TORRENT_ASSERT(pe->in_use);
1274 
1275 	for (auto const& buf : iov)
1276 	{
1277 		// each iovec buffer has to be the size of a block (or the size of the last block)
1278 		TORRENT_PIECE_ASSERT(int(buf.size()) == std::min(default_block_size
1279 			, pe->storage->files().piece_size(pe->piece) - block * default_block_size), pe);
1280 
1281 		// no nullptrs allowed
1282 		TORRENT_ASSERT(buf.data() != nullptr);
1283 
1284 #ifdef TORRENT_DEBUG_BUFFERS
1285 		TORRENT_PIECE_ASSERT(is_disk_buffer(buf.data()), pe);
1286 #endif
1287 
1288 		if (pe->blocks[block].buf && (flags & blocks_inc_refcount))
1289 		{
1290 			inc_block_refcount(pe, block, ref_reading);
1291 		}
1292 
1293 		// either free the block or insert it. Never replace a block
1294 		if (pe->blocks[block].buf)
1295 		{
1296 			free_buffer(buf.data());
1297 		}
1298 		else
1299 		{
1300 			pe->blocks[block].buf = buf.data();
1301 
1302 			TORRENT_PIECE_ASSERT(buf.data() != nullptr, pe);
1303 			TORRENT_PIECE_ASSERT(pe->blocks[block].dirty == false, pe);
1304 			++pe->num_blocks;
1305 			++m_read_cache_size;
1306 			if (j->flags & disk_interface::volatile_read) ++m_volatile_size;
1307 
1308 			if (flags & blocks_inc_refcount)
1309 			{
1310 				bool ret = inc_block_refcount(pe, block, ref_reading);
1311 				TORRENT_UNUSED(ret); // suppress warning
1312 				TORRENT_ASSERT(ret);
1313 			}
1314 		}
1315 
1316 		TORRENT_ASSERT(pe->blocks[block].buf != nullptr);
1317 
1318 		block++;
1319 	}
1320 
1321 	TORRENT_PIECE_ASSERT(pe->cache_state != cached_piece_entry::read_lru1_ghost, pe);
1322 	TORRENT_PIECE_ASSERT(pe->cache_state != cached_piece_entry::read_lru2_ghost, pe);
1323 }
1324 
1325 // return false if the memory was purged
inc_block_refcount(cached_piece_entry * pe,int const block,int const reason)1326 bool block_cache::inc_block_refcount(cached_piece_entry* pe, int const block, int const reason)
1327 {
1328 	TORRENT_PIECE_ASSERT(pe->in_use, pe);
1329 	TORRENT_PIECE_ASSERT(block < pe->blocks_in_piece, pe);
1330 	TORRENT_PIECE_ASSERT(block >= 0, pe);
1331 	if (pe->blocks[block].buf == nullptr) return false;
1332 	TORRENT_PIECE_ASSERT(pe->blocks[block].refcount < cached_block_entry::max_refcount, pe);
1333 	if (pe->blocks[block].refcount == 0)
1334 	{
1335 		++pe->pinned;
1336 		++m_pinned_blocks;
1337 	}
1338 	++pe->blocks[block].refcount;
1339 	++pe->refcount;
1340 #if TORRENT_USE_ASSERTS
1341 	switch (reason)
1342 	{
1343 		case ref_hashing: ++pe->blocks[block].hashing_count; break;
1344 		case ref_reading: ++pe->blocks[block].reading_count; break;
1345 		case ref_flushing: ++pe->blocks[block].flushing_count; break;
1346 	}
1347 	TORRENT_ASSERT(int(pe->blocks[block].refcount) >= pe->blocks[block].hashing_count
1348 		+ pe->blocks[block].reading_count + pe->blocks[block].flushing_count);
1349 #else
1350 	TORRENT_UNUSED(reason);
1351 #endif
1352 	return true;
1353 }
1354 
dec_block_refcount(cached_piece_entry * pe,int const block,int const reason)1355 void block_cache::dec_block_refcount(cached_piece_entry* pe, int const block, int const reason)
1356 {
1357 	TORRENT_PIECE_ASSERT(pe->in_use, pe);
1358 	TORRENT_PIECE_ASSERT(block < pe->blocks_in_piece, pe);
1359 	TORRENT_PIECE_ASSERT(block >= 0, pe);
1360 
1361 	TORRENT_PIECE_ASSERT(pe->blocks[block].buf != nullptr, pe);
1362 	TORRENT_PIECE_ASSERT(pe->blocks[block].refcount > 0, pe);
1363 	--pe->blocks[block].refcount;
1364 	TORRENT_PIECE_ASSERT(pe->refcount > 0, pe);
1365 	--pe->refcount;
1366 	if (pe->blocks[block].refcount == 0)
1367 	{
1368 		TORRENT_PIECE_ASSERT(pe->pinned > 0, pe);
1369 		--pe->pinned;
1370 		TORRENT_PIECE_ASSERT(m_pinned_blocks > 0, pe);
1371 		--m_pinned_blocks;
1372 	}
1373 #if TORRENT_USE_ASSERTS
1374 	switch (reason)
1375 	{
1376 		case ref_hashing: --pe->blocks[block].hashing_count; break;
1377 		case ref_reading: --pe->blocks[block].reading_count; break;
1378 		case ref_flushing: --pe->blocks[block].flushing_count; break;
1379 	}
1380 	TORRENT_PIECE_ASSERT(int(pe->blocks[block].refcount) >= pe->blocks[block].hashing_count
1381 		+ pe->blocks[block].reading_count + pe->blocks[block].flushing_count, pe);
1382 #else
1383 	TORRENT_UNUSED(reason);
1384 #endif
1385 }
1386 
abort_dirty(cached_piece_entry * pe)1387 void block_cache::abort_dirty(cached_piece_entry* pe)
1388 {
1389 	INVARIANT_CHECK;
1390 
1391 	TORRENT_PIECE_ASSERT(pe->in_use, pe);
1392 
1393 	TORRENT_ALLOCA(to_delete, char*, pe->blocks_in_piece);
1394 	int num_to_delete = 0;
1395 	for (int i = 0; i < pe->blocks_in_piece; ++i)
1396 	{
1397 		if (!pe->blocks[i].dirty
1398 			|| pe->blocks[i].refcount > 0
1399 			|| pe->blocks[i].buf == nullptr) continue;
1400 
1401 		TORRENT_PIECE_ASSERT(!pe->blocks[i].pending, pe);
1402 		TORRENT_PIECE_ASSERT(pe->blocks[i].dirty, pe);
1403 		to_delete[num_to_delete++] = pe->blocks[i].buf;
1404 		pe->blocks[i].buf = nullptr;
1405 		pe->blocks[i].dirty = false;
1406 		TORRENT_PIECE_ASSERT(pe->num_blocks > 0, pe);
1407 		--pe->num_blocks;
1408 		TORRENT_PIECE_ASSERT(m_write_cache_size > 0, pe);
1409 		--m_write_cache_size;
1410 		TORRENT_PIECE_ASSERT(pe->num_dirty > 0, pe);
1411 		--pe->num_dirty;
1412 	}
1413 	if (num_to_delete) free_multiple_buffers(to_delete.first(num_to_delete));
1414 
1415 	update_cache_state(pe);
1416 }
1417 
drain_piece_bufs(cached_piece_entry & p,std::vector<char * > & buf)1418 int block_cache::drain_piece_bufs(cached_piece_entry& p, std::vector<char*>& buf)
1419 {
1420 	int const piece_size = p.storage->files().piece_size(p.piece);
1421 	int const blocks_in_piece = (piece_size + default_block_size - 1) / default_block_size;
1422 	int ret = 0;
1423 
1424 	TORRENT_PIECE_ASSERT(p.in_use, &p);
1425 
1426 	int removed_clean = 0;
1427 	for (int i = 0; i < blocks_in_piece; ++i)
1428 	{
1429 		if (p.blocks[i].buf == nullptr) continue;
1430 		TORRENT_PIECE_ASSERT(p.blocks[i].refcount == 0, &p);
1431 		buf.push_back(p.blocks[i].buf);
1432 		++ret;
1433 		p.blocks[i].buf = nullptr;
1434 		TORRENT_PIECE_ASSERT(p.num_blocks > 0, &p);
1435 		--p.num_blocks;
1436 
1437 		if (p.blocks[i].dirty)
1438 		{
1439 			TORRENT_ASSERT(m_write_cache_size > 0);
1440 			--m_write_cache_size;
1441 			TORRENT_PIECE_ASSERT(p.num_dirty > 0, &p);
1442 			--p.num_dirty;
1443 		}
1444 		else
1445 		{
1446 			++removed_clean;
1447 		}
1448 	}
1449 
1450 	TORRENT_ASSERT(m_read_cache_size >= removed_clean);
1451 	m_read_cache_size -= removed_clean;
1452 	if (p.cache_state == cached_piece_entry::volatile_read_lru)
1453 	{
1454 		m_volatile_size -= removed_clean;
1455 	}
1456 
1457 	update_cache_state(&p);
1458 	return ret;
1459 }
1460 
update_stats_counters(counters & c) const1461 void block_cache::update_stats_counters(counters& c) const
1462 {
1463 	c.set_value(counters::write_cache_blocks, m_write_cache_size);
1464 	c.set_value(counters::read_cache_blocks, m_read_cache_size);
1465 	c.set_value(counters::pinned_blocks, m_pinned_blocks);
1466 
1467 	c.set_value(counters::arc_mru_size, m_lru[cached_piece_entry::read_lru1].size());
1468 	c.set_value(counters::arc_mru_ghost_size, m_lru[cached_piece_entry::read_lru1_ghost].size());
1469 	c.set_value(counters::arc_mfu_size, m_lru[cached_piece_entry::read_lru2].size());
1470 	c.set_value(counters::arc_mfu_ghost_size, m_lru[cached_piece_entry::read_lru2_ghost].size());
1471 	c.set_value(counters::arc_write_size, m_lru[cached_piece_entry::write_lru].size());
1472 	c.set_value(counters::arc_volatile_size, m_lru[cached_piece_entry::volatile_read_lru].size());
1473 }
1474 
1475 #if TORRENT_ABI_VERSION == 1
get_stats(cache_status * ret) const1476 void block_cache::get_stats(cache_status* ret) const
1477 {
1478 	ret->write_cache_size = m_write_cache_size;
1479 	ret->read_cache_size = m_read_cache_size;
1480 	ret->pinned_blocks = m_pinned_blocks;
1481 	ret->cache_size = m_read_cache_size + m_write_cache_size;
1482 
1483 	ret->arc_mru_size = m_lru[cached_piece_entry::read_lru1].size();
1484 	ret->arc_mru_ghost_size = m_lru[cached_piece_entry::read_lru1_ghost].size();
1485 	ret->arc_mfu_size = m_lru[cached_piece_entry::read_lru2].size();
1486 	ret->arc_mfu_ghost_size = m_lru[cached_piece_entry::read_lru2_ghost].size();
1487 	ret->arc_write_size = m_lru[cached_piece_entry::write_lru].size();
1488 	ret->arc_volatile_size = m_lru[cached_piece_entry::volatile_read_lru].size();
1489 }
1490 #endif
1491 
set_settings(aux::session_settings const & sett)1492 void block_cache::set_settings(aux::session_settings const& sett)
1493 {
1494 	// the ghost size is the number of pieces to keep track of
1495 	// after they are evicted. Since cache_size is blocks, the
1496 	// assumption is that there are about 128 blocks per piece,
1497 	// and there are two ghost lists, so divide by 2.
1498 
1499 	m_ghost_size = std::max(8, sett.get_int(settings_pack::cache_size)
1500 		/ std::max(sett.get_int(settings_pack::read_cache_line_size), 4) / 2);
1501 
1502 	m_max_volatile_blocks = sett.get_int(settings_pack::cache_size_volatile);
1503 	disk_buffer_pool::set_settings(sett);
1504 }
1505 
1506 #if TORRENT_USE_INVARIANT_CHECKS
check_invariant() const1507 void block_cache::check_invariant() const
1508 {
1509 	int cached_write_blocks = 0;
1510 	int cached_read_blocks = 0;
1511 	int num_pinned = 0;
1512 
1513 	std::set<storage_interface*> storages;
1514 
1515 	for (int i = 0; i < cached_piece_entry::num_lrus; ++i)
1516 	{
1517 		time_point timeout = min_time();
1518 
1519 		for (list_iterator<cached_piece_entry> p = m_lru[i].iterate(); p.get(); p.next())
1520 		{
1521 			cached_piece_entry* pe = p.get();
1522 			TORRENT_PIECE_ASSERT(int(pe->cache_state) == i, pe);
1523 			if (pe->num_dirty > 0)
1524 				TORRENT_PIECE_ASSERT(i == cached_piece_entry::write_lru, pe);
1525 
1526 //			if (i == cached_piece_entry::write_lru)
1527 //				TORRENT_ASSERT(pe->num_dirty > 0);
1528 			for (tailqueue_iterator<disk_io_job> j = pe->jobs.iterate(); j.get(); j.next())
1529 			{
1530 				disk_io_job const* job = static_cast<disk_io_job const*>(j.get());
1531 				TORRENT_PIECE_ASSERT(job->piece == pe->piece, pe);
1532 				TORRENT_PIECE_ASSERT(job->in_use, pe);
1533 				TORRENT_PIECE_ASSERT(!job->callback_called, pe);
1534 			}
1535 
1536 			if (i != cached_piece_entry::read_lru1_ghost
1537 				&& i != cached_piece_entry::read_lru2_ghost)
1538 			{
1539 				TORRENT_PIECE_ASSERT(pe->expire >= timeout, pe);
1540 				timeout = pe->expire;
1541 				TORRENT_PIECE_ASSERT(pe->in_storage, pe);
1542 			}
1543 			else
1544 			{
1545 				// pieces in the ghost lists should never have any blocks
1546 				TORRENT_PIECE_ASSERT(pe->num_blocks == 0, pe);
1547 			}
1548 			// pieces in the ghost list are still in the storage's list of pieces,
1549 			// because we need to be able to evict them when stopping a torrent
1550 
1551 			storages.insert(pe->storage.get());
1552 		}
1553 	}
1554 
1555 	for (auto s : storages)
1556 	{
1557 		for (auto const& pe : s->cached_pieces())
1558 		{
1559 			TORRENT_PIECE_ASSERT(pe.storage.get() == s, &pe);
1560 		}
1561 	}
1562 
1563 	std::unordered_set<char*> buffers;
1564 	for (auto const& p : m_pieces)
1565 	{
1566 		TORRENT_PIECE_ASSERT(p.blocks, &p);
1567 
1568 		TORRENT_PIECE_ASSERT(p.storage, &p);
1569 		int num_blocks = 0;
1570 		int num_dirty = 0;
1571 		int num_pending = 0;
1572 		int num_refcount = 0;
1573 
1574 		for (int k = 0; k < p.blocks_in_piece; ++k)
1575 		{
1576 			if (p.blocks[k].buf)
1577 			{
1578 				++num_blocks;
1579 				if (p.blocks[k].dirty)
1580 				{
1581 					++num_dirty;
1582 					++cached_write_blocks;
1583 				}
1584 				else
1585 				{
1586 					++cached_read_blocks;
1587 				}
1588 				if (p.blocks[k].pending) ++num_pending;
1589 				if (p.blocks[k].refcount > 0) ++num_pinned;
1590 
1591 				TORRENT_PIECE_ASSERT(int(p.blocks[k].refcount) >=
1592 					p.blocks[k].hashing_count
1593 					+ p.blocks[k].reading_count
1594 					+ p.blocks[k].flushing_count, &p);
1595 
1596 			}
1597 			else
1598 			{
1599 				TORRENT_PIECE_ASSERT(!p.blocks[k].dirty, &p);
1600 				TORRENT_PIECE_ASSERT(!p.blocks[k].pending, &p);
1601 				TORRENT_PIECE_ASSERT(p.blocks[k].refcount == 0, &p);
1602 			}
1603 			num_refcount += p.blocks[k].refcount;
1604 		}
1605 		TORRENT_PIECE_ASSERT(num_blocks == int(p.num_blocks), &p);
1606 		TORRENT_PIECE_ASSERT(num_pending <= p.refcount, &p);
1607 		TORRENT_PIECE_ASSERT(num_refcount == p.refcount, &p);
1608 		TORRENT_PIECE_ASSERT(num_dirty == int(p.num_dirty), &p);
1609 	}
1610 	TORRENT_ASSERT(m_read_cache_size == cached_read_blocks);
1611 	TORRENT_ASSERT(m_write_cache_size == cached_write_blocks);
1612 	TORRENT_ASSERT(m_pinned_blocks == num_pinned);
1613 	TORRENT_ASSERT(m_write_cache_size + m_read_cache_size <= in_use());
1614 }
1615 #endif
1616 
1617 // TODO: 2 turn these return values into enums
1618 // returns
1619 // -1: block not in cache
1620 // -2: out of memory
1621 
copy_from_piece(cached_piece_entry * const pe,disk_io_job * const j,buffer_allocator_interface & allocator,bool const expect_no_fail)1622 int block_cache::copy_from_piece(cached_piece_entry* const pe
1623 	, disk_io_job* const j, buffer_allocator_interface& allocator
1624 	, bool const expect_no_fail)
1625 {
1626 	INVARIANT_CHECK;
1627 	TORRENT_UNUSED(expect_no_fail);
1628 
1629 	TORRENT_PIECE_ASSERT(pe->in_use, pe);
1630 
1631 	// copy from the cache and update the last use timestamp
1632 	int block = j->d.io.offset / default_block_size;
1633 	int block_offset = j->d.io.offset & (default_block_size - 1);
1634 	int buffer_offset = 0;
1635 	int size = j->d.io.buffer_size;
1636 	int const blocks_to_read = block_offset > 0 && (size > default_block_size - block_offset) ? 2 : 1;
1637 	TORRENT_PIECE_ASSERT(size <= default_block_size, pe);
1638 	int const start_block = block;
1639 
1640 #if TORRENT_USE_ASSERTS
1641 	int const piece_size = j->storage->files().piece_size(j->piece);
1642 	int const blocks_in_piece = (piece_size + default_block_size - 1) / default_block_size;
1643 	TORRENT_PIECE_ASSERT(start_block < blocks_in_piece, pe);
1644 #endif
1645 
1646 	// if there's no buffer, we don't have this block in
1647 	// the cache, and we're not currently reading it in either
1648 	// since it's not pending
1649 
1650 	if (inc_block_refcount(pe, start_block, ref_reading) == false)
1651 	{
1652 		TORRENT_ASSERT(!expect_no_fail);
1653 		return -1;
1654 	}
1655 
1656 	// if block_offset > 0, we need to read two blocks, and then
1657 	// copy parts of both, because it's not aligned to the block
1658 	// boundaries
1659 	if (blocks_to_read == 1 && !(j->flags & disk_interface::force_copy))
1660 	{
1661 		// special case for block aligned request
1662 		// don't actually copy the buffer, just reference
1663 		// the existing block. Which means we don't want to decrement the
1664 		// refcount, we're handing the ownership of the reference to the calling
1665 		// thread.
1666 		cached_block_entry& bl = pe->blocks[start_block];
1667 		bl.cache_hit = 1;
1668 
1669 		// make sure it didn't wrap
1670 		TORRENT_PIECE_ASSERT(pe->refcount > 0, pe);
1671 		int const blocks_per_piece = (j->storage->files().piece_length() + default_block_size - 1) / default_block_size;
1672 		TORRENT_ASSERT(block_offset < 0x4000);
1673 		j->argument = disk_buffer_holder(allocator
1674 			, aux::block_cache_reference{ j->storage->storage_index()
1675 				, static_cast<int>(pe->piece) * blocks_per_piece + start_block}
1676 			, bl.buf + block_offset, static_cast<std::size_t>(0x4000 - block_offset));
1677 		j->storage->inc_refcount();
1678 
1679 		++m_send_buffer_blocks;
1680 		return j->d.io.buffer_size;
1681 	}
1682 
1683 	// if we don't have the second block, it's a cache miss
1684 	if (blocks_to_read == 2 && inc_block_refcount(pe, start_block + 1, ref_reading) == false)
1685 	{
1686 		TORRENT_ASSERT(!expect_no_fail);
1687 		dec_block_refcount(pe, start_block, ref_reading);
1688 		maybe_free_piece(pe);
1689 		return -1;
1690 	}
1691 
1692 	j->argument = disk_buffer_holder(allocator
1693 		, allocate_buffer("send buffer"), 0x4000);
1694 	if (!boost::get<disk_buffer_holder>(j->argument)) return -2;
1695 
1696 	while (size > 0)
1697 	{
1698 		TORRENT_PIECE_ASSERT(pe->blocks[block].buf, pe);
1699 		int const to_copy = std::min(default_block_size - block_offset, size);
1700 		std::memcpy(boost::get<disk_buffer_holder>(j->argument).get()
1701 			+ buffer_offset
1702 			, pe->blocks[block].buf + block_offset
1703 			, aux::numeric_cast<std::size_t>(to_copy));
1704 		pe->blocks[block].cache_hit = 1;
1705 		size -= to_copy;
1706 		block_offset = 0;
1707 		buffer_offset += to_copy;
1708 		++block;
1709 	}
1710 	// we incremented the refcount for both of these blocks.
1711 	// now decrement it.
1712 	// TODO: create a holder for refcounts that automatically decrement
1713 	dec_block_refcount(pe, start_block, ref_reading);
1714 	if (blocks_to_read == 2) dec_block_refcount(pe, start_block + 1, ref_reading);
1715 	maybe_free_piece(pe);
1716 	return j->d.io.buffer_size;
1717 }
1718 
reclaim_block(storage_interface * st,aux::block_cache_reference const & ref)1719 void block_cache::reclaim_block(storage_interface* st, aux::block_cache_reference const& ref)
1720 {
1721 	TORRENT_ASSERT(st != nullptr);
1722 	int const blocks_per_piece = (st->files().piece_length() + default_block_size - 1) / default_block_size;
1723 	piece_index_t const piece(ref.cookie / blocks_per_piece);
1724 	int const block(ref.cookie % blocks_per_piece);
1725 
1726 	cached_piece_entry* pe = find_piece(st, piece);
1727 	TORRENT_ASSERT(pe);
1728 	if (pe == nullptr) return;
1729 
1730 	TORRENT_PIECE_ASSERT(pe->in_use, pe);
1731 
1732 	TORRENT_PIECE_ASSERT(pe->blocks[block].buf, pe);
1733 	dec_block_refcount(pe, block, block_cache::ref_reading);
1734 
1735 	TORRENT_PIECE_ASSERT(m_send_buffer_blocks > 0, pe);
1736 	--m_send_buffer_blocks;
1737 
1738 	maybe_free_piece(pe);
1739 }
1740 
maybe_free_piece(cached_piece_entry * pe)1741 bool block_cache::maybe_free_piece(cached_piece_entry* pe)
1742 {
1743 	if (!pe->ok_to_evict()
1744 		|| !pe->marked_for_eviction
1745 		|| !pe->jobs.empty())
1746 		return false;
1747 
1748 	DLOG(stderr, "[%p] block_cache maybe_free_piece "
1749 		"piece: %d refcount: %d marked_for_eviction: %d\n"
1750 		, static_cast<void*>(this)
1751 		, int(pe->piece), int(pe->refcount), int(pe->marked_for_eviction));
1752 
1753 	tailqueue<disk_io_job> jobs;
1754 	bool removed = evict_piece(pe, jobs
1755 		, pe->marked_for_deletion ? disallow_ghost : allow_ghost);
1756 	TORRENT_UNUSED(removed); // suppress warning
1757 	TORRENT_PIECE_ASSERT(removed, pe);
1758 	TORRENT_PIECE_ASSERT(jobs.empty(), pe);
1759 
1760 	return true;
1761 }
1762 
find_piece(disk_io_job const * j)1763 cached_piece_entry* block_cache::find_piece(disk_io_job const* j)
1764 {
1765 	return find_piece(j->storage.get(), j->piece);
1766 }
1767 
find_piece(storage_interface * st,piece_index_t const piece)1768 cached_piece_entry* block_cache::find_piece(storage_interface* st, piece_index_t const piece)
1769 {
1770 	cached_piece_entry model;
1771 	model.storage = st->shared_from_this();
1772 	model.piece = piece;
1773 	auto const i = m_pieces.find(model);
1774 	TORRENT_ASSERT(i == m_pieces.end() || (i->storage.get() == st && i->piece == piece));
1775 	if (i == m_pieces.end()) return nullptr;
1776 	TORRENT_PIECE_ASSERT(i->in_use, &*i);
1777 
1778 #if TORRENT_USE_ASSERTS
1779 	for (tailqueue_iterator<const disk_io_job> j = i->jobs.iterate(); j.get(); j.next())
1780 	{
1781 		disk_io_job const* job = static_cast<disk_io_job const*>(j.get());
1782 		TORRENT_PIECE_ASSERT(job->piece == piece, &*i);
1783 	}
1784 #endif
1785 
1786 	return const_cast<cached_piece_entry*>(&*i);
1787 }
1788 
1789 }
1790