xref: /linux/lib/scatterlist.c (revision 6a30653b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
4  *
5  * Scatterlist handling helpers.
6  */
7 #include <linux/export.h>
8 #include <linux/slab.h>
9 #include <linux/scatterlist.h>
10 #include <linux/highmem.h>
11 #include <linux/kmemleak.h>
12 #include <linux/bvec.h>
13 #include <linux/uio.h>
14 
15 /**
16  * sg_next - return the next scatterlist entry in a list
17  * @sg:		The current sg entry
18  *
19  * Description:
20  *   Usually the next entry will be @sg@ + 1, but if this sg element is part
21  *   of a chained scatterlist, it could jump to the start of a new
22  *   scatterlist array.
23  *
24  **/
sg_next(struct scatterlist * sg)25 struct scatterlist *sg_next(struct scatterlist *sg)
26 {
27 	if (sg_is_last(sg))
28 		return NULL;
29 
30 	sg++;
31 	if (unlikely(sg_is_chain(sg)))
32 		sg = sg_chain_ptr(sg);
33 
34 	return sg;
35 }
36 EXPORT_SYMBOL(sg_next);
37 
38 /**
39  * sg_nents - return total count of entries in scatterlist
40  * @sg:		The scatterlist
41  *
42  * Description:
43  * Allows to know how many entries are in sg, taking into account
44  * chaining as well
45  *
46  **/
sg_nents(struct scatterlist * sg)47 int sg_nents(struct scatterlist *sg)
48 {
49 	int nents;
50 	for (nents = 0; sg; sg = sg_next(sg))
51 		nents++;
52 	return nents;
53 }
54 EXPORT_SYMBOL(sg_nents);
55 
56 /**
57  * sg_nents_for_len - return total count of entries in scatterlist
58  *                    needed to satisfy the supplied length
59  * @sg:		The scatterlist
60  * @len:	The total required length
61  *
62  * Description:
63  * Determines the number of entries in sg that are required to meet
64  * the supplied length, taking into account chaining as well
65  *
66  * Returns:
67  *   the number of sg entries needed, negative error on failure
68  *
69  **/
sg_nents_for_len(struct scatterlist * sg,u64 len)70 int sg_nents_for_len(struct scatterlist *sg, u64 len)
71 {
72 	int nents;
73 	u64 total;
74 
75 	if (!len)
76 		return 0;
77 
78 	for (nents = 0, total = 0; sg; sg = sg_next(sg)) {
79 		nents++;
80 		total += sg->length;
81 		if (total >= len)
82 			return nents;
83 	}
84 
85 	return -EINVAL;
86 }
87 EXPORT_SYMBOL(sg_nents_for_len);
88 
89 /**
90  * sg_last - return the last scatterlist entry in a list
91  * @sgl:	First entry in the scatterlist
92  * @nents:	Number of entries in the scatterlist
93  *
94  * Description:
95  *   Should only be used casually, it (currently) scans the entire list
96  *   to get the last entry.
97  *
98  *   Note that the @sgl@ pointer passed in need not be the first one,
99  *   the important bit is that @nents@ denotes the number of entries that
100  *   exist from @sgl@.
101  *
102  **/
sg_last(struct scatterlist * sgl,unsigned int nents)103 struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
104 {
105 	struct scatterlist *sg, *ret = NULL;
106 	unsigned int i;
107 
108 	for_each_sg(sgl, sg, nents, i)
109 		ret = sg;
110 
111 	BUG_ON(!sg_is_last(ret));
112 	return ret;
113 }
114 EXPORT_SYMBOL(sg_last);
115 
116 /**
117  * sg_init_table - Initialize SG table
118  * @sgl:	   The SG table
119  * @nents:	   Number of entries in table
120  *
121  * Notes:
122  *   If this is part of a chained sg table, sg_mark_end() should be
123  *   used only on the last table part.
124  *
125  **/
sg_init_table(struct scatterlist * sgl,unsigned int nents)126 void sg_init_table(struct scatterlist *sgl, unsigned int nents)
127 {
128 	memset(sgl, 0, sizeof(*sgl) * nents);
129 	sg_init_marker(sgl, nents);
130 }
131 EXPORT_SYMBOL(sg_init_table);
132 
133 /**
134  * sg_init_one - Initialize a single entry sg list
135  * @sg:		 SG entry
136  * @buf:	 Virtual address for IO
137  * @buflen:	 IO length
138  *
139  **/
sg_init_one(struct scatterlist * sg,const void * buf,unsigned int buflen)140 void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
141 {
142 	sg_init_table(sg, 1);
143 	sg_set_buf(sg, buf, buflen);
144 }
145 EXPORT_SYMBOL(sg_init_one);
146 
147 /*
148  * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree
149  * helpers.
150  */
sg_kmalloc(unsigned int nents,gfp_t gfp_mask)151 static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
152 {
153 	if (nents == SG_MAX_SINGLE_ALLOC) {
154 		/*
155 		 * Kmemleak doesn't track page allocations as they are not
156 		 * commonly used (in a raw form) for kernel data structures.
157 		 * As we chain together a list of pages and then a normal
158 		 * kmalloc (tracked by kmemleak), in order to for that last
159 		 * allocation not to become decoupled (and thus a
160 		 * false-positive) we need to inform kmemleak of all the
161 		 * intermediate allocations.
162 		 */
163 		void *ptr = (void *) __get_free_page(gfp_mask);
164 		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
165 		return ptr;
166 	} else
167 		return kmalloc_array(nents, sizeof(struct scatterlist),
168 				     gfp_mask);
169 }
170 
sg_kfree(struct scatterlist * sg,unsigned int nents)171 static void sg_kfree(struct scatterlist *sg, unsigned int nents)
172 {
173 	if (nents == SG_MAX_SINGLE_ALLOC) {
174 		kmemleak_free(sg);
175 		free_page((unsigned long) sg);
176 	} else
177 		kfree(sg);
178 }
179 
180 /**
181  * __sg_free_table - Free a previously mapped sg table
182  * @table:	The sg table header to use
183  * @max_ents:	The maximum number of entries per single scatterlist
184  * @nents_first_chunk: Number of entries int the (preallocated) first
185  * 	scatterlist chunk, 0 means no such preallocated first chunk
186  * @free_fn:	Free function
187  * @num_ents:	Number of entries in the table
188  *
189  *  Description:
190  *    Free an sg table previously allocated and setup with
191  *    __sg_alloc_table().  The @max_ents value must be identical to
192  *    that previously used with __sg_alloc_table().
193  *
194  **/
__sg_free_table(struct sg_table * table,unsigned int max_ents,unsigned int nents_first_chunk,sg_free_fn * free_fn,unsigned int num_ents)195 void __sg_free_table(struct sg_table *table, unsigned int max_ents,
196 		     unsigned int nents_first_chunk, sg_free_fn *free_fn,
197 		     unsigned int num_ents)
198 {
199 	struct scatterlist *sgl, *next;
200 	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
201 
202 	if (unlikely(!table->sgl))
203 		return;
204 
205 	sgl = table->sgl;
206 	while (num_ents) {
207 		unsigned int alloc_size = num_ents;
208 		unsigned int sg_size;
209 
210 		/*
211 		 * If we have more than max_ents segments left,
212 		 * then assign 'next' to the sg table after the current one.
213 		 * sg_size is then one less than alloc size, since the last
214 		 * element is the chain pointer.
215 		 */
216 		if (alloc_size > curr_max_ents) {
217 			next = sg_chain_ptr(&sgl[curr_max_ents - 1]);
218 			alloc_size = curr_max_ents;
219 			sg_size = alloc_size - 1;
220 		} else {
221 			sg_size = alloc_size;
222 			next = NULL;
223 		}
224 
225 		num_ents -= sg_size;
226 		if (nents_first_chunk)
227 			nents_first_chunk = 0;
228 		else
229 			free_fn(sgl, alloc_size);
230 		sgl = next;
231 		curr_max_ents = max_ents;
232 	}
233 
234 	table->sgl = NULL;
235 }
236 EXPORT_SYMBOL(__sg_free_table);
237 
238 /**
239  * sg_free_append_table - Free a previously allocated append sg table.
240  * @table:	 The mapped sg append table header
241  *
242  **/
sg_free_append_table(struct sg_append_table * table)243 void sg_free_append_table(struct sg_append_table *table)
244 {
245 	__sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
246 			table->total_nents);
247 }
248 EXPORT_SYMBOL(sg_free_append_table);
249 
250 
251 /**
252  * sg_free_table - Free a previously allocated sg table
253  * @table:	The mapped sg table header
254  *
255  **/
sg_free_table(struct sg_table * table)256 void sg_free_table(struct sg_table *table)
257 {
258 	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
259 			table->orig_nents);
260 }
261 EXPORT_SYMBOL(sg_free_table);
262 
263 /**
264  * __sg_alloc_table - Allocate and initialize an sg table with given allocator
265  * @table:	The sg table header to use
266  * @nents:	Number of entries in sg list
267  * @max_ents:	The maximum number of entries the allocator returns per call
268  * @first_chunk: first SGL if preallocated (may be %NULL)
269  * @nents_first_chunk: Number of entries in the (preallocated) first
270  * 	scatterlist chunk, 0 means no such preallocated chunk provided by user
271  * @gfp_mask:	GFP allocation mask
272  * @alloc_fn:	Allocator to use
273  *
274  * Description:
275  *   This function returns a @table @nents long. The allocator is
276  *   defined to return scatterlist chunks of maximum size @max_ents.
277  *   Thus if @nents is bigger than @max_ents, the scatterlists will be
278  *   chained in units of @max_ents.
279  *
280  * Notes:
281  *   If this function returns non-0 (eg failure), the caller must call
282  *   __sg_free_table() to cleanup any leftover allocations.
283  *
284  **/
__sg_alloc_table(struct sg_table * table,unsigned int nents,unsigned int max_ents,struct scatterlist * first_chunk,unsigned int nents_first_chunk,gfp_t gfp_mask,sg_alloc_fn * alloc_fn)285 int __sg_alloc_table(struct sg_table *table, unsigned int nents,
286 		     unsigned int max_ents, struct scatterlist *first_chunk,
287 		     unsigned int nents_first_chunk, gfp_t gfp_mask,
288 		     sg_alloc_fn *alloc_fn)
289 {
290 	struct scatterlist *sg, *prv;
291 	unsigned int left;
292 	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
293 	unsigned prv_max_ents;
294 
295 	memset(table, 0, sizeof(*table));
296 
297 	if (nents == 0)
298 		return -EINVAL;
299 #ifdef CONFIG_ARCH_NO_SG_CHAIN
300 	if (WARN_ON_ONCE(nents > max_ents))
301 		return -EINVAL;
302 #endif
303 
304 	left = nents;
305 	prv = NULL;
306 	do {
307 		unsigned int sg_size, alloc_size = left;
308 
309 		if (alloc_size > curr_max_ents) {
310 			alloc_size = curr_max_ents;
311 			sg_size = alloc_size - 1;
312 		} else
313 			sg_size = alloc_size;
314 
315 		left -= sg_size;
316 
317 		if (first_chunk) {
318 			sg = first_chunk;
319 			first_chunk = NULL;
320 		} else {
321 			sg = alloc_fn(alloc_size, gfp_mask);
322 		}
323 		if (unlikely(!sg)) {
324 			/*
325 			 * Adjust entry count to reflect that the last
326 			 * entry of the previous table won't be used for
327 			 * linkage.  Without this, sg_kfree() may get
328 			 * confused.
329 			 */
330 			if (prv)
331 				table->nents = ++table->orig_nents;
332 
333 			return -ENOMEM;
334 		}
335 
336 		sg_init_table(sg, alloc_size);
337 		table->nents = table->orig_nents += sg_size;
338 
339 		/*
340 		 * If this is the first mapping, assign the sg table header.
341 		 * If this is not the first mapping, chain previous part.
342 		 */
343 		if (prv)
344 			sg_chain(prv, prv_max_ents, sg);
345 		else
346 			table->sgl = sg;
347 
348 		/*
349 		 * If no more entries after this one, mark the end
350 		 */
351 		if (!left)
352 			sg_mark_end(&sg[sg_size - 1]);
353 
354 		prv = sg;
355 		prv_max_ents = curr_max_ents;
356 		curr_max_ents = max_ents;
357 	} while (left);
358 
359 	return 0;
360 }
361 EXPORT_SYMBOL(__sg_alloc_table);
362 
363 /**
364  * sg_alloc_table - Allocate and initialize an sg table
365  * @table:	The sg table header to use
366  * @nents:	Number of entries in sg list
367  * @gfp_mask:	GFP allocation mask
368  *
369  *  Description:
370  *    Allocate and initialize an sg table. If @nents@ is larger than
371  *    SG_MAX_SINGLE_ALLOC a chained sg table will be setup.
372  *
373  **/
sg_alloc_table(struct sg_table * table,unsigned int nents,gfp_t gfp_mask)374 int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
375 {
376 	int ret;
377 
378 	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
379 			       NULL, 0, gfp_mask, sg_kmalloc);
380 	if (unlikely(ret))
381 		sg_free_table(table);
382 	return ret;
383 }
384 EXPORT_SYMBOL(sg_alloc_table);
385 
get_next_sg(struct sg_append_table * table,struct scatterlist * cur,unsigned long needed_sges,gfp_t gfp_mask)386 static struct scatterlist *get_next_sg(struct sg_append_table *table,
387 				       struct scatterlist *cur,
388 				       unsigned long needed_sges,
389 				       gfp_t gfp_mask)
390 {
391 	struct scatterlist *new_sg, *next_sg;
392 	unsigned int alloc_size;
393 
394 	if (cur) {
395 		next_sg = sg_next(cur);
396 		/* Check if last entry should be keeped for chainning */
397 		if (!sg_is_last(next_sg) || needed_sges == 1)
398 			return next_sg;
399 	}
400 
401 	alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
402 	new_sg = sg_kmalloc(alloc_size, gfp_mask);
403 	if (!new_sg)
404 		return ERR_PTR(-ENOMEM);
405 	sg_init_table(new_sg, alloc_size);
406 	if (cur) {
407 		table->total_nents += alloc_size - 1;
408 		__sg_chain(next_sg, new_sg);
409 	} else {
410 		table->sgt.sgl = new_sg;
411 		table->total_nents = alloc_size;
412 	}
413 	return new_sg;
414 }
415 
pages_are_mergeable(struct page * a,struct page * b)416 static bool pages_are_mergeable(struct page *a, struct page *b)
417 {
418 	if (page_to_pfn(a) != page_to_pfn(b) + 1)
419 		return false;
420 	if (!zone_device_pages_have_same_pgmap(a, b))
421 		return false;
422 	return true;
423 }
424 
425 /**
426  * sg_alloc_append_table_from_pages - Allocate and initialize an append sg
427  *                                    table from an array of pages
428  * @sgt_append:  The sg append table to use
429  * @pages:       Pointer to an array of page pointers
430  * @n_pages:     Number of pages in the pages array
431  * @offset:      Offset from start of the first page to the start of a buffer
432  * @size:        Number of valid bytes in the buffer (after offset)
433  * @max_segment: Maximum size of a scatterlist element in bytes
434  * @left_pages:  Left pages caller have to set after this call
435  * @gfp_mask:	 GFP allocation mask
436  *
437  * Description:
438  *    In the first call it allocate and initialize an sg table from a list of
439  *    pages, else reuse the scatterlist from sgt_append. Contiguous ranges of
440  *    the pages are squashed into a single scatterlist entry up to the maximum
441  *    size specified in @max_segment.  A user may provide an offset at a start
442  *    and a size of valid data in a buffer specified by the page array. The
443  *    returned sg table is released by sg_free_append_table
444  *
445  * Returns:
446  *   0 on success, negative error on failure
447  *
448  * Notes:
449  *   If this function returns non-0 (eg failure), the caller must call
450  *   sg_free_append_table() to cleanup any leftover allocations.
451  *
452  *   In the fist call, sgt_append must by initialized.
453  */
sg_alloc_append_table_from_pages(struct sg_append_table * sgt_append,struct page ** pages,unsigned int n_pages,unsigned int offset,unsigned long size,unsigned int max_segment,unsigned int left_pages,gfp_t gfp_mask)454 int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
455 		struct page **pages, unsigned int n_pages, unsigned int offset,
456 		unsigned long size, unsigned int max_segment,
457 		unsigned int left_pages, gfp_t gfp_mask)
458 {
459 	unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
460 	unsigned int added_nents = 0;
461 	struct scatterlist *s = sgt_append->prv;
462 	struct page *last_pg;
463 
464 	/*
465 	 * The algorithm below requires max_segment to be aligned to PAGE_SIZE
466 	 * otherwise it can overshoot.
467 	 */
468 	max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
469 	if (WARN_ON(max_segment < PAGE_SIZE))
470 		return -EINVAL;
471 
472 	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv)
473 		return -EOPNOTSUPP;
474 
475 	if (sgt_append->prv) {
476 		unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) +
477 			sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE;
478 
479 		if (WARN_ON(offset))
480 			return -EINVAL;
481 
482 		/* Merge contiguous pages into the last SG */
483 		prv_len = sgt_append->prv->length;
484 		if (page_to_pfn(pages[0]) == next_pfn) {
485 			last_pg = pfn_to_page(next_pfn - 1);
486 			while (n_pages && pages_are_mergeable(pages[0], last_pg)) {
487 				if (sgt_append->prv->length + PAGE_SIZE > max_segment)
488 					break;
489 				sgt_append->prv->length += PAGE_SIZE;
490 				last_pg = pages[0];
491 				pages++;
492 				n_pages--;
493 			}
494 			if (!n_pages)
495 				goto out;
496 		}
497 	}
498 
499 	/* compute number of contiguous chunks */
500 	chunks = 1;
501 	seg_len = 0;
502 	for (i = 1; i < n_pages; i++) {
503 		seg_len += PAGE_SIZE;
504 		if (seg_len >= max_segment ||
505 		    !pages_are_mergeable(pages[i], pages[i - 1])) {
506 			chunks++;
507 			seg_len = 0;
508 		}
509 	}
510 
511 	/* merging chunks and putting them into the scatterlist */
512 	cur_page = 0;
513 	for (i = 0; i < chunks; i++) {
514 		unsigned int j, chunk_size;
515 
516 		/* look for the end of the current chunk */
517 		seg_len = 0;
518 		for (j = cur_page + 1; j < n_pages; j++) {
519 			seg_len += PAGE_SIZE;
520 			if (seg_len >= max_segment ||
521 			    !pages_are_mergeable(pages[j], pages[j - 1]))
522 				break;
523 		}
524 
525 		/* Pass how many chunks might be left */
526 		s = get_next_sg(sgt_append, s, chunks - i + left_pages,
527 				gfp_mask);
528 		if (IS_ERR(s)) {
529 			/*
530 			 * Adjust entry length to be as before function was
531 			 * called.
532 			 */
533 			if (sgt_append->prv)
534 				sgt_append->prv->length = prv_len;
535 			return PTR_ERR(s);
536 		}
537 		chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
538 		sg_set_page(s, pages[cur_page],
539 			    min_t(unsigned long, size, chunk_size), offset);
540 		added_nents++;
541 		size -= chunk_size;
542 		offset = 0;
543 		cur_page = j;
544 	}
545 	sgt_append->sgt.nents += added_nents;
546 	sgt_append->sgt.orig_nents = sgt_append->sgt.nents;
547 	sgt_append->prv = s;
548 out:
549 	if (!left_pages)
550 		sg_mark_end(s);
551 	return 0;
552 }
553 EXPORT_SYMBOL(sg_alloc_append_table_from_pages);
554 
555 /**
556  * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from
557  *                                     an array of pages and given maximum
558  *                                     segment.
559  * @sgt:	 The sg table header to use
560  * @pages:	 Pointer to an array of page pointers
561  * @n_pages:	 Number of pages in the pages array
562  * @offset:      Offset from start of the first page to the start of a buffer
563  * @size:        Number of valid bytes in the buffer (after offset)
564  * @max_segment: Maximum size of a scatterlist element in bytes
565  * @gfp_mask:	 GFP allocation mask
566  *
567  *  Description:
568  *    Allocate and initialize an sg table from a list of pages. Contiguous
569  *    ranges of the pages are squashed into a single scatterlist node up to the
570  *    maximum size specified in @max_segment. A user may provide an offset at a
571  *    start and a size of valid data in a buffer specified by the page array.
572  *
573  *    The returned sg table is released by sg_free_table.
574  *
575  *  Returns:
576  *   0 on success, negative error on failure
577  */
sg_alloc_table_from_pages_segment(struct sg_table * sgt,struct page ** pages,unsigned int n_pages,unsigned int offset,unsigned long size,unsigned int max_segment,gfp_t gfp_mask)578 int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
579 				unsigned int n_pages, unsigned int offset,
580 				unsigned long size, unsigned int max_segment,
581 				gfp_t gfp_mask)
582 {
583 	struct sg_append_table append = {};
584 	int err;
585 
586 	err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset,
587 					       size, max_segment, 0, gfp_mask);
588 	if (err) {
589 		sg_free_append_table(&append);
590 		return err;
591 	}
592 	memcpy(sgt, &append.sgt, sizeof(*sgt));
593 	WARN_ON(append.total_nents != sgt->orig_nents);
594 	return 0;
595 }
596 EXPORT_SYMBOL(sg_alloc_table_from_pages_segment);
597 
598 #ifdef CONFIG_SGL_ALLOC
599 
600 /**
601  * sgl_alloc_order - allocate a scatterlist and its pages
602  * @length: Length in bytes of the scatterlist. Must be at least one
603  * @order: Second argument for alloc_pages()
604  * @chainable: Whether or not to allocate an extra element in the scatterlist
605  *	for scatterlist chaining purposes
606  * @gfp: Memory allocation flags
607  * @nent_p: [out] Number of entries in the scatterlist that have pages
608  *
609  * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
610  */
sgl_alloc_order(unsigned long long length,unsigned int order,bool chainable,gfp_t gfp,unsigned int * nent_p)611 struct scatterlist *sgl_alloc_order(unsigned long long length,
612 				    unsigned int order, bool chainable,
613 				    gfp_t gfp, unsigned int *nent_p)
614 {
615 	struct scatterlist *sgl, *sg;
616 	struct page *page;
617 	unsigned int nent, nalloc;
618 	u32 elem_len;
619 
620 	nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order);
621 	/* Check for integer overflow */
622 	if (length > (nent << (PAGE_SHIFT + order)))
623 		return NULL;
624 	nalloc = nent;
625 	if (chainable) {
626 		/* Check for integer overflow */
627 		if (nalloc + 1 < nalloc)
628 			return NULL;
629 		nalloc++;
630 	}
631 	sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
632 			    gfp & ~GFP_DMA);
633 	if (!sgl)
634 		return NULL;
635 
636 	sg_init_table(sgl, nalloc);
637 	sg = sgl;
638 	while (length) {
639 		elem_len = min_t(u64, length, PAGE_SIZE << order);
640 		page = alloc_pages(gfp, order);
641 		if (!page) {
642 			sgl_free_order(sgl, order);
643 			return NULL;
644 		}
645 
646 		sg_set_page(sg, page, elem_len, 0);
647 		length -= elem_len;
648 		sg = sg_next(sg);
649 	}
650 	WARN_ONCE(length, "length = %lld\n", length);
651 	if (nent_p)
652 		*nent_p = nent;
653 	return sgl;
654 }
655 EXPORT_SYMBOL(sgl_alloc_order);
656 
657 /**
658  * sgl_alloc - allocate a scatterlist and its pages
659  * @length: Length in bytes of the scatterlist
660  * @gfp: Memory allocation flags
661  * @nent_p: [out] Number of entries in the scatterlist
662  *
663  * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
664  */
sgl_alloc(unsigned long long length,gfp_t gfp,unsigned int * nent_p)665 struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
666 			      unsigned int *nent_p)
667 {
668 	return sgl_alloc_order(length, 0, false, gfp, nent_p);
669 }
670 EXPORT_SYMBOL(sgl_alloc);
671 
672 /**
673  * sgl_free_n_order - free a scatterlist and its pages
674  * @sgl: Scatterlist with one or more elements
675  * @nents: Maximum number of elements to free
676  * @order: Second argument for __free_pages()
677  *
678  * Notes:
679  * - If several scatterlists have been chained and each chain element is
680  *   freed separately then it's essential to set nents correctly to avoid that a
681  *   page would get freed twice.
682  * - All pages in a chained scatterlist can be freed at once by setting @nents
683  *   to a high number.
684  */
sgl_free_n_order(struct scatterlist * sgl,int nents,int order)685 void sgl_free_n_order(struct scatterlist *sgl, int nents, int order)
686 {
687 	struct scatterlist *sg;
688 	struct page *page;
689 	int i;
690 
691 	for_each_sg(sgl, sg, nents, i) {
692 		if (!sg)
693 			break;
694 		page = sg_page(sg);
695 		if (page)
696 			__free_pages(page, order);
697 	}
698 	kfree(sgl);
699 }
700 EXPORT_SYMBOL(sgl_free_n_order);
701 
702 /**
703  * sgl_free_order - free a scatterlist and its pages
704  * @sgl: Scatterlist with one or more elements
705  * @order: Second argument for __free_pages()
706  */
sgl_free_order(struct scatterlist * sgl,int order)707 void sgl_free_order(struct scatterlist *sgl, int order)
708 {
709 	sgl_free_n_order(sgl, INT_MAX, order);
710 }
711 EXPORT_SYMBOL(sgl_free_order);
712 
713 /**
714  * sgl_free - free a scatterlist and its pages
715  * @sgl: Scatterlist with one or more elements
716  */
sgl_free(struct scatterlist * sgl)717 void sgl_free(struct scatterlist *sgl)
718 {
719 	sgl_free_order(sgl, 0);
720 }
721 EXPORT_SYMBOL(sgl_free);
722 
723 #endif /* CONFIG_SGL_ALLOC */
724 
__sg_page_iter_start(struct sg_page_iter * piter,struct scatterlist * sglist,unsigned int nents,unsigned long pgoffset)725 void __sg_page_iter_start(struct sg_page_iter *piter,
726 			  struct scatterlist *sglist, unsigned int nents,
727 			  unsigned long pgoffset)
728 {
729 	piter->__pg_advance = 0;
730 	piter->__nents = nents;
731 
732 	piter->sg = sglist;
733 	piter->sg_pgoffset = pgoffset;
734 }
735 EXPORT_SYMBOL(__sg_page_iter_start);
736 
sg_page_count(struct scatterlist * sg)737 static int sg_page_count(struct scatterlist *sg)
738 {
739 	return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT;
740 }
741 
__sg_page_iter_next(struct sg_page_iter * piter)742 bool __sg_page_iter_next(struct sg_page_iter *piter)
743 {
744 	if (!piter->__nents || !piter->sg)
745 		return false;
746 
747 	piter->sg_pgoffset += piter->__pg_advance;
748 	piter->__pg_advance = 1;
749 
750 	while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
751 		piter->sg_pgoffset -= sg_page_count(piter->sg);
752 		piter->sg = sg_next(piter->sg);
753 		if (!--piter->__nents || !piter->sg)
754 			return false;
755 	}
756 
757 	return true;
758 }
759 EXPORT_SYMBOL(__sg_page_iter_next);
760 
sg_dma_page_count(struct scatterlist * sg)761 static int sg_dma_page_count(struct scatterlist *sg)
762 {
763 	return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT;
764 }
765 
__sg_page_iter_dma_next(struct sg_dma_page_iter * dma_iter)766 bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter)
767 {
768 	struct sg_page_iter *piter = &dma_iter->base;
769 
770 	if (!piter->__nents || !piter->sg)
771 		return false;
772 
773 	piter->sg_pgoffset += piter->__pg_advance;
774 	piter->__pg_advance = 1;
775 
776 	while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) {
777 		piter->sg_pgoffset -= sg_dma_page_count(piter->sg);
778 		piter->sg = sg_next(piter->sg);
779 		if (!--piter->__nents || !piter->sg)
780 			return false;
781 	}
782 
783 	return true;
784 }
785 EXPORT_SYMBOL(__sg_page_iter_dma_next);
786 
787 /**
788  * sg_miter_start - start mapping iteration over a sg list
789  * @miter: sg mapping iter to be started
790  * @sgl: sg list to iterate over
791  * @nents: number of sg entries
792  * @flags: sg iterator flags
793  *
794  * Description:
795  *   Starts mapping iterator @miter.
796  *
797  * Context:
798  *   Don't care.
799  */
sg_miter_start(struct sg_mapping_iter * miter,struct scatterlist * sgl,unsigned int nents,unsigned int flags)800 void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
801 		    unsigned int nents, unsigned int flags)
802 {
803 	memset(miter, 0, sizeof(struct sg_mapping_iter));
804 
805 	__sg_page_iter_start(&miter->piter, sgl, nents, 0);
806 	WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
807 	miter->__flags = flags;
808 }
809 EXPORT_SYMBOL(sg_miter_start);
810 
sg_miter_get_next_page(struct sg_mapping_iter * miter)811 static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
812 {
813 	if (!miter->__remaining) {
814 		struct scatterlist *sg;
815 
816 		if (!__sg_page_iter_next(&miter->piter))
817 			return false;
818 
819 		sg = miter->piter.sg;
820 
821 		miter->__offset = miter->piter.sg_pgoffset ? 0 : sg->offset;
822 		miter->piter.sg_pgoffset += miter->__offset >> PAGE_SHIFT;
823 		miter->__offset &= PAGE_SIZE - 1;
824 		miter->__remaining = sg->offset + sg->length -
825 				     (miter->piter.sg_pgoffset << PAGE_SHIFT) -
826 				     miter->__offset;
827 		miter->__remaining = min_t(unsigned long, miter->__remaining,
828 					   PAGE_SIZE - miter->__offset);
829 	}
830 
831 	return true;
832 }
833 
834 /**
835  * sg_miter_skip - reposition mapping iterator
836  * @miter: sg mapping iter to be skipped
837  * @offset: number of bytes to plus the current location
838  *
839  * Description:
840  *   Sets the offset of @miter to its current location plus @offset bytes.
841  *   If mapping iterator @miter has been proceeded by sg_miter_next(), this
842  *   stops @miter.
843  *
844  * Context:
845  *   Don't care.
846  *
847  * Returns:
848  *   true if @miter contains the valid mapping.  false if end of sg
849  *   list is reached.
850  */
sg_miter_skip(struct sg_mapping_iter * miter,off_t offset)851 bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset)
852 {
853 	sg_miter_stop(miter);
854 
855 	while (offset) {
856 		off_t consumed;
857 
858 		if (!sg_miter_get_next_page(miter))
859 			return false;
860 
861 		consumed = min_t(off_t, offset, miter->__remaining);
862 		miter->__offset += consumed;
863 		miter->__remaining -= consumed;
864 		offset -= consumed;
865 	}
866 
867 	return true;
868 }
869 EXPORT_SYMBOL(sg_miter_skip);
870 
871 /**
872  * sg_miter_next - proceed mapping iterator to the next mapping
873  * @miter: sg mapping iter to proceed
874  *
875  * Description:
876  *   Proceeds @miter to the next mapping.  @miter should have been started
877  *   using sg_miter_start().  On successful return, @miter->page,
878  *   @miter->addr and @miter->length point to the current mapping.
879  *
880  * Context:
881  *   May sleep if !SG_MITER_ATOMIC.
882  *
883  * Returns:
884  *   true if @miter contains the next mapping.  false if end of sg
885  *   list is reached.
886  */
sg_miter_next(struct sg_mapping_iter * miter)887 bool sg_miter_next(struct sg_mapping_iter *miter)
888 {
889 	sg_miter_stop(miter);
890 
891 	/*
892 	 * Get to the next page if necessary.
893 	 * __remaining, __offset is adjusted by sg_miter_stop
894 	 */
895 	if (!sg_miter_get_next_page(miter))
896 		return false;
897 
898 	miter->page = sg_page_iter_page(&miter->piter);
899 	miter->consumed = miter->length = miter->__remaining;
900 
901 	if (miter->__flags & SG_MITER_ATOMIC)
902 		miter->addr = kmap_atomic(miter->page) + miter->__offset;
903 	else
904 		miter->addr = kmap(miter->page) + miter->__offset;
905 
906 	return true;
907 }
908 EXPORT_SYMBOL(sg_miter_next);
909 
910 /**
911  * sg_miter_stop - stop mapping iteration
912  * @miter: sg mapping iter to be stopped
913  *
914  * Description:
915  *   Stops mapping iterator @miter.  @miter should have been started
916  *   using sg_miter_start().  A stopped iteration can be resumed by
917  *   calling sg_miter_next() on it.  This is useful when resources (kmap)
918  *   need to be released during iteration.
919  *
920  * Context:
921  *   Don't care otherwise.
922  */
sg_miter_stop(struct sg_mapping_iter * miter)923 void sg_miter_stop(struct sg_mapping_iter *miter)
924 {
925 	WARN_ON(miter->consumed > miter->length);
926 
927 	/* drop resources from the last iteration */
928 	if (miter->addr) {
929 		miter->__offset += miter->consumed;
930 		miter->__remaining -= miter->consumed;
931 
932 		if (miter->__flags & SG_MITER_TO_SG)
933 			flush_dcache_page(miter->page);
934 
935 		if (miter->__flags & SG_MITER_ATOMIC) {
936 			WARN_ON_ONCE(!pagefault_disabled());
937 			kunmap_atomic(miter->addr);
938 		} else
939 			kunmap(miter->page);
940 
941 		miter->page = NULL;
942 		miter->addr = NULL;
943 		miter->length = 0;
944 		miter->consumed = 0;
945 	}
946 }
947 EXPORT_SYMBOL(sg_miter_stop);
948 
949 /**
950  * sg_copy_buffer - Copy data between a linear buffer and an SG list
951  * @sgl:		 The SG list
952  * @nents:		 Number of SG entries
953  * @buf:		 Where to copy from
954  * @buflen:		 The number of bytes to copy
955  * @skip:		 Number of bytes to skip before copying
956  * @to_buffer:		 transfer direction (true == from an sg list to a
957  *			 buffer, false == from a buffer to an sg list)
958  *
959  * Returns the number of copied bytes.
960  *
961  **/
sg_copy_buffer(struct scatterlist * sgl,unsigned int nents,void * buf,size_t buflen,off_t skip,bool to_buffer)962 size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
963 		      size_t buflen, off_t skip, bool to_buffer)
964 {
965 	unsigned int offset = 0;
966 	struct sg_mapping_iter miter;
967 	unsigned int sg_flags = SG_MITER_ATOMIC;
968 
969 	if (to_buffer)
970 		sg_flags |= SG_MITER_FROM_SG;
971 	else
972 		sg_flags |= SG_MITER_TO_SG;
973 
974 	sg_miter_start(&miter, sgl, nents, sg_flags);
975 
976 	if (!sg_miter_skip(&miter, skip))
977 		return 0;
978 
979 	while ((offset < buflen) && sg_miter_next(&miter)) {
980 		unsigned int len;
981 
982 		len = min(miter.length, buflen - offset);
983 
984 		if (to_buffer)
985 			memcpy(buf + offset, miter.addr, len);
986 		else
987 			memcpy(miter.addr, buf + offset, len);
988 
989 		offset += len;
990 	}
991 
992 	sg_miter_stop(&miter);
993 
994 	return offset;
995 }
996 EXPORT_SYMBOL(sg_copy_buffer);
997 
998 /**
999  * sg_copy_from_buffer - Copy from a linear buffer to an SG list
1000  * @sgl:		 The SG list
1001  * @nents:		 Number of SG entries
1002  * @buf:		 Where to copy from
1003  * @buflen:		 The number of bytes to copy
1004  *
1005  * Returns the number of copied bytes.
1006  *
1007  **/
sg_copy_from_buffer(struct scatterlist * sgl,unsigned int nents,const void * buf,size_t buflen)1008 size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
1009 			   const void *buf, size_t buflen)
1010 {
1011 	return sg_copy_buffer(sgl, nents, (void *)buf, buflen, 0, false);
1012 }
1013 EXPORT_SYMBOL(sg_copy_from_buffer);
1014 
1015 /**
1016  * sg_copy_to_buffer - Copy from an SG list to a linear buffer
1017  * @sgl:		 The SG list
1018  * @nents:		 Number of SG entries
1019  * @buf:		 Where to copy to
1020  * @buflen:		 The number of bytes to copy
1021  *
1022  * Returns the number of copied bytes.
1023  *
1024  **/
sg_copy_to_buffer(struct scatterlist * sgl,unsigned int nents,void * buf,size_t buflen)1025 size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
1026 			 void *buf, size_t buflen)
1027 {
1028 	return sg_copy_buffer(sgl, nents, buf, buflen, 0, true);
1029 }
1030 EXPORT_SYMBOL(sg_copy_to_buffer);
1031 
1032 /**
1033  * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list
1034  * @sgl:		 The SG list
1035  * @nents:		 Number of SG entries
1036  * @buf:		 Where to copy from
1037  * @buflen:		 The number of bytes to copy
1038  * @skip:		 Number of bytes to skip before copying
1039  *
1040  * Returns the number of copied bytes.
1041  *
1042  **/
sg_pcopy_from_buffer(struct scatterlist * sgl,unsigned int nents,const void * buf,size_t buflen,off_t skip)1043 size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
1044 			    const void *buf, size_t buflen, off_t skip)
1045 {
1046 	return sg_copy_buffer(sgl, nents, (void *)buf, buflen, skip, false);
1047 }
1048 EXPORT_SYMBOL(sg_pcopy_from_buffer);
1049 
1050 /**
1051  * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer
1052  * @sgl:		 The SG list
1053  * @nents:		 Number of SG entries
1054  * @buf:		 Where to copy to
1055  * @buflen:		 The number of bytes to copy
1056  * @skip:		 Number of bytes to skip before copying
1057  *
1058  * Returns the number of copied bytes.
1059  *
1060  **/
sg_pcopy_to_buffer(struct scatterlist * sgl,unsigned int nents,void * buf,size_t buflen,off_t skip)1061 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
1062 			  void *buf, size_t buflen, off_t skip)
1063 {
1064 	return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
1065 }
1066 EXPORT_SYMBOL(sg_pcopy_to_buffer);
1067 
1068 /**
1069  * sg_zero_buffer - Zero-out a part of a SG list
1070  * @sgl:		 The SG list
1071  * @nents:		 Number of SG entries
1072  * @buflen:		 The number of bytes to zero out
1073  * @skip:		 Number of bytes to skip before zeroing
1074  *
1075  * Returns the number of bytes zeroed.
1076  **/
sg_zero_buffer(struct scatterlist * sgl,unsigned int nents,size_t buflen,off_t skip)1077 size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
1078 		       size_t buflen, off_t skip)
1079 {
1080 	unsigned int offset = 0;
1081 	struct sg_mapping_iter miter;
1082 	unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
1083 
1084 	sg_miter_start(&miter, sgl, nents, sg_flags);
1085 
1086 	if (!sg_miter_skip(&miter, skip))
1087 		return false;
1088 
1089 	while (offset < buflen && sg_miter_next(&miter)) {
1090 		unsigned int len;
1091 
1092 		len = min(miter.length, buflen - offset);
1093 		memset(miter.addr, 0, len);
1094 
1095 		offset += len;
1096 	}
1097 
1098 	sg_miter_stop(&miter);
1099 	return offset;
1100 }
1101 EXPORT_SYMBOL(sg_zero_buffer);
1102 
1103 /*
1104  * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
1105  * iterators, and add them to the scatterlist.
1106  */
extract_user_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1107 static ssize_t extract_user_to_sg(struct iov_iter *iter,
1108 				  ssize_t maxsize,
1109 				  struct sg_table *sgtable,
1110 				  unsigned int sg_max,
1111 				  iov_iter_extraction_t extraction_flags)
1112 {
1113 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1114 	struct page **pages;
1115 	unsigned int npages;
1116 	ssize_t ret = 0, res;
1117 	size_t len, off;
1118 
1119 	/* We decant the page list into the tail of the scatterlist */
1120 	pages = (void *)sgtable->sgl +
1121 		array_size(sg_max, sizeof(struct scatterlist));
1122 	pages -= sg_max;
1123 
1124 	do {
1125 		res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
1126 					     extraction_flags, &off);
1127 		if (res <= 0)
1128 			goto failed;
1129 
1130 		len = res;
1131 		maxsize -= len;
1132 		ret += len;
1133 		npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
1134 		sg_max -= npages;
1135 
1136 		for (; npages > 0; npages--) {
1137 			struct page *page = *pages;
1138 			size_t seg = min_t(size_t, PAGE_SIZE - off, len);
1139 
1140 			*pages++ = NULL;
1141 			sg_set_page(sg, page, seg, off);
1142 			sgtable->nents++;
1143 			sg++;
1144 			len -= seg;
1145 			off = 0;
1146 		}
1147 	} while (maxsize > 0 && sg_max > 0);
1148 
1149 	return ret;
1150 
1151 failed:
1152 	while (sgtable->nents > sgtable->orig_nents)
1153 		unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
1154 	return res;
1155 }
1156 
1157 /*
1158  * Extract up to sg_max pages from a BVEC-type iterator and add them to the
1159  * scatterlist.  The pages are not pinned.
1160  */
extract_bvec_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1161 static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
1162 				  ssize_t maxsize,
1163 				  struct sg_table *sgtable,
1164 				  unsigned int sg_max,
1165 				  iov_iter_extraction_t extraction_flags)
1166 {
1167 	const struct bio_vec *bv = iter->bvec;
1168 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1169 	unsigned long start = iter->iov_offset;
1170 	unsigned int i;
1171 	ssize_t ret = 0;
1172 
1173 	for (i = 0; i < iter->nr_segs; i++) {
1174 		size_t off, len;
1175 
1176 		len = bv[i].bv_len;
1177 		if (start >= len) {
1178 			start -= len;
1179 			continue;
1180 		}
1181 
1182 		len = min_t(size_t, maxsize, len - start);
1183 		off = bv[i].bv_offset + start;
1184 
1185 		sg_set_page(sg, bv[i].bv_page, len, off);
1186 		sgtable->nents++;
1187 		sg++;
1188 		sg_max--;
1189 
1190 		ret += len;
1191 		maxsize -= len;
1192 		if (maxsize <= 0 || sg_max == 0)
1193 			break;
1194 		start = 0;
1195 	}
1196 
1197 	if (ret > 0)
1198 		iov_iter_advance(iter, ret);
1199 	return ret;
1200 }
1201 
1202 /*
1203  * Extract up to sg_max pages from a KVEC-type iterator and add them to the
1204  * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
1205  * static buffers.  The pages are not pinned.
1206  */
extract_kvec_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1207 static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
1208 				  ssize_t maxsize,
1209 				  struct sg_table *sgtable,
1210 				  unsigned int sg_max,
1211 				  iov_iter_extraction_t extraction_flags)
1212 {
1213 	const struct kvec *kv = iter->kvec;
1214 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1215 	unsigned long start = iter->iov_offset;
1216 	unsigned int i;
1217 	ssize_t ret = 0;
1218 
1219 	for (i = 0; i < iter->nr_segs; i++) {
1220 		struct page *page;
1221 		unsigned long kaddr;
1222 		size_t off, len, seg;
1223 
1224 		len = kv[i].iov_len;
1225 		if (start >= len) {
1226 			start -= len;
1227 			continue;
1228 		}
1229 
1230 		kaddr = (unsigned long)kv[i].iov_base + start;
1231 		off = kaddr & ~PAGE_MASK;
1232 		len = min_t(size_t, maxsize, len - start);
1233 		kaddr &= PAGE_MASK;
1234 
1235 		maxsize -= len;
1236 		ret += len;
1237 		do {
1238 			seg = min_t(size_t, len, PAGE_SIZE - off);
1239 			if (is_vmalloc_or_module_addr((void *)kaddr))
1240 				page = vmalloc_to_page((void *)kaddr);
1241 			else
1242 				page = virt_to_page((void *)kaddr);
1243 
1244 			sg_set_page(sg, page, len, off);
1245 			sgtable->nents++;
1246 			sg++;
1247 			sg_max--;
1248 
1249 			len -= seg;
1250 			kaddr += PAGE_SIZE;
1251 			off = 0;
1252 		} while (len > 0 && sg_max > 0);
1253 
1254 		if (maxsize <= 0 || sg_max == 0)
1255 			break;
1256 		start = 0;
1257 	}
1258 
1259 	if (ret > 0)
1260 		iov_iter_advance(iter, ret);
1261 	return ret;
1262 }
1263 
1264 /*
1265  * Extract up to sg_max folios from an XARRAY-type iterator and add them to
1266  * the scatterlist.  The pages are not pinned.
1267  */
extract_xarray_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1268 static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
1269 				    ssize_t maxsize,
1270 				    struct sg_table *sgtable,
1271 				    unsigned int sg_max,
1272 				    iov_iter_extraction_t extraction_flags)
1273 {
1274 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1275 	struct xarray *xa = iter->xarray;
1276 	struct folio *folio;
1277 	loff_t start = iter->xarray_start + iter->iov_offset;
1278 	pgoff_t index = start / PAGE_SIZE;
1279 	ssize_t ret = 0;
1280 	size_t offset, len;
1281 	XA_STATE(xas, xa, index);
1282 
1283 	rcu_read_lock();
1284 
1285 	xas_for_each(&xas, folio, ULONG_MAX) {
1286 		if (xas_retry(&xas, folio))
1287 			continue;
1288 		if (WARN_ON(xa_is_value(folio)))
1289 			break;
1290 		if (WARN_ON(folio_test_hugetlb(folio)))
1291 			break;
1292 
1293 		offset = offset_in_folio(folio, start);
1294 		len = min_t(size_t, maxsize, folio_size(folio) - offset);
1295 
1296 		sg_set_page(sg, folio_page(folio, 0), len, offset);
1297 		sgtable->nents++;
1298 		sg++;
1299 		sg_max--;
1300 
1301 		maxsize -= len;
1302 		ret += len;
1303 		if (maxsize <= 0 || sg_max == 0)
1304 			break;
1305 	}
1306 
1307 	rcu_read_unlock();
1308 	if (ret > 0)
1309 		iov_iter_advance(iter, ret);
1310 	return ret;
1311 }
1312 
1313 /**
1314  * extract_iter_to_sg - Extract pages from an iterator and add to an sglist
1315  * @iter: The iterator to extract from
1316  * @maxsize: The amount of iterator to copy
1317  * @sgtable: The scatterlist table to fill in
1318  * @sg_max: Maximum number of elements in @sgtable that may be filled
1319  * @extraction_flags: Flags to qualify the request
1320  *
1321  * Extract the page fragments from the given amount of the source iterator and
1322  * add them to a scatterlist that refers to all of those bits, to a maximum
1323  * addition of @sg_max elements.
1324  *
1325  * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
1326  * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
1327  * and DISCARD-type are not supported.
1328  *
1329  * No end mark is placed on the scatterlist; that's left to the caller.
1330  *
1331  * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
1332  * be allowed on the pages extracted.
1333  *
1334  * If successful, @sgtable->nents is updated to include the number of elements
1335  * added and the number of bytes added is returned.  @sgtable->orig_nents is
1336  * left unaltered.
1337  *
1338  * The iov_iter_extract_mode() function should be used to query how cleanup
1339  * should be performed.
1340  */
extract_iter_to_sg(struct iov_iter * iter,size_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)1341 ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
1342 			   struct sg_table *sgtable, unsigned int sg_max,
1343 			   iov_iter_extraction_t extraction_flags)
1344 {
1345 	if (maxsize == 0)
1346 		return 0;
1347 
1348 	switch (iov_iter_type(iter)) {
1349 	case ITER_UBUF:
1350 	case ITER_IOVEC:
1351 		return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
1352 					  extraction_flags);
1353 	case ITER_BVEC:
1354 		return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
1355 					  extraction_flags);
1356 	case ITER_KVEC:
1357 		return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
1358 					  extraction_flags);
1359 	case ITER_XARRAY:
1360 		return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
1361 					    extraction_flags);
1362 	default:
1363 		pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
1364 		WARN_ON_ONCE(1);
1365 		return -EIO;
1366 	}
1367 }
1368 EXPORT_SYMBOL_GPL(extract_iter_to_sg);
1369