1 // Copyright (c) 2018-2021  Robert J. Hijmans
2 //
3 // This file is part of the "spat" library.
4 //
5 // spat is free software: you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 2 of the License, or
8 // (at your option) any later version.
9 //
10 // spat is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with spat. If not, see <http://www.gnu.org/licenses/>.
17 
18 #include <vector>
19 #include "spatRaster.h"
20 #include "distance.h"
21 #include "recycle.h"
22 #include <random>
23 #include <unordered_set>
24 #include "string_utils.h"
25 
26 
getSampleRowCol(std::vector<size_t> & oldrow,std::vector<size_t> & oldcol,size_t nrows,size_t ncols,size_t snrow,size_t sncol)27 void getSampleRowCol(std::vector<size_t> &oldrow, std::vector<size_t> &oldcol, size_t nrows, size_t ncols, size_t snrow, size_t sncol) {
28 
29 	double rf = nrows / (double)(snrow);
30 	double cf = ncols / (double)(sncol);
31 	//double rstart = std::floor(0.5 * rf);
32 	//double cstart = std::floor(0.5 * cf);
33 	double rstart = 0.5 * rf;
34 	double cstart = 0.5 * cf;
35 	oldcol.reserve(sncol);
36 	for (size_t i =0; i<sncol; i++) {
37         oldcol.push_back(i * cf + cstart);
38 	}
39 	oldrow.reserve(snrow);
40 	for (size_t i =0; i<snrow; i++) {
41         oldrow.push_back(i * rf + rstart);
42 	}
43 }
44 
45 
readSample(unsigned src,size_t srows,size_t scols)46 std::vector<double> SpatRaster::readSample(unsigned src, size_t srows, size_t scols) {
47 
48 	unsigned nl = source[src].nlyr;
49 	std::vector<size_t> oldcol, oldrow;
50 	std::vector<double>	out;
51 	getSampleRowCol(oldrow, oldcol, nrow(), ncol(), srows, scols);
52 
53 	out.reserve(srows*scols);
54 	if (source[src].hasWindow) {
55 		size_t offrow = source[src].window.off_row;
56 		size_t offcol = source[src].window.off_col;
57 		size_t fncol = source[src].window.full_ncol;
58 		size_t oldnc = fncol * source[src].window.full_nrow;
59 		for (size_t lyr=0; lyr<nl; lyr++) {
60 			size_t off1 = lyr * oldnc;
61 			for (size_t r=0; r<srows; r++) {
62 				size_t off2 = off1 + (oldrow[r]+offrow) * fncol;
63 				for (size_t c=0; c<scols; c++) {
64 					size_t oldcell = off2 + oldcol[c] + offcol;
65 					out.push_back(source[src].values[oldcell]);
66 				}
67 			}
68 		}
69 	} else {
70 		unsigned oldnc = ncell();
71 		for (size_t lyr=0; lyr<nl; lyr++) {
72 			size_t off = lyr * oldnc;
73 			for (size_t r=0; r<srows; r++) {
74 				unsigned oldc = off + oldrow[r] * ncol();
75 				for (size_t c=0; c<scols; c++) {
76 					unsigned oldcell = oldc + oldcol[c];
77 					out.push_back(source[src].values[oldcell]);
78 				}
79 			}
80 		}
81 	}
82 	return out;
83 }
84 
85 
sampleRegularRaster(unsigned size)86 SpatRaster SpatRaster::sampleRegularRaster(unsigned size) {
87 
88 	if ((size >= ncell())) {
89 		return( *this );
90 	}
91 
92 	double f = std::min(1.0, sqrt(size / ncell()));
93 	size_t nr = std::min((size_t)ceil(nrow() * f), nrow());
94 	size_t nc = std::min((size_t)ceil(ncol() * f), ncol());
95 	if ((nc == ncol()) && (nr == nrow())) {
96 		return( *this );
97 	}
98 	SpatRaster out = geometry(nlyr(), true);
99 	out.source[0].nrow = nr;
100 	out.source[0].ncol = nc;
101 
102 	if (!source[0].hasValues) return (out);
103 
104 	std::vector<double> v;
105 	for (size_t src=0; src<nsrc(); src++) {
106 		if (source[src].memory) {
107 			v = readSample(src, nr, nc);
108 		//} else if (source[src].driver == "raster") {
109 		//	v = readSampleBinary(src, nr, nc);
110 		} else {
111 		    #ifdef useGDAL
112 			v = readGDALsample(src, nr, nc);
113 			#endif
114 		}
115 		if (hasError()) return out;
116 		out.source[0].values.insert(out.source[0].values.end(), v.begin(), v.end());
117 	}
118 	out.source[0].memory = true;
119 	out.source[0].hasValues = true;
120 	out.source[0].setRange();
121 
122 	return out;
123 }
124 
125 
sampleRowColRaster(size_t nr,size_t nc)126 SpatRaster SpatRaster::sampleRowColRaster(size_t nr, size_t nc) {
127 
128 	SpatRaster out = geometry(nlyr(), true);
129 	if ((nr == 0) || (nc ==0)) {
130 		out.setError("number of rows and columns must be > 0");
131 	}
132 
133 	nr = std::min(nr, nrow());
134 	nc = std::min(nc, ncol());
135 	if ((nc == ncol()) && (nr == nrow())) {
136 		return( *this );
137 	}
138 	out.source[0].nrow = nr;
139 	out.source[0].ncol = nc;
140 
141 	if (!source[0].hasValues) return (out);
142 
143 	std::vector<double> v;
144 	for (size_t src=0; src<nsrc(); src++) {
145 		if (source[src].memory) {
146 			v = readSample(src, nr, nc);
147 		//} else if (source[src].driver == "raster") {
148 		//	v = readSampleBinary(src, nr, nc);
149 		} else {
150 		    #ifdef useGDAL
151 			v = readGDALsample(src, nr, nc);
152 			#endif
153 		}
154 		if (hasError()) return out;
155 		out.source[0].values.insert(out.source[0].values.end(), v.begin(), v.end());
156 	}
157 	out.source[0].memory = true;
158 	out.source[0].hasValues = true;
159 	out.source[0].setRange();
160 
161 	return out;
162 }
163 
164 
sampleRegularValues(unsigned size,SpatOptions & opt)165 std::vector<std::vector<double>> SpatRaster::sampleRegularValues(unsigned size, SpatOptions &opt) {
166 
167 	std::vector<std::vector<double>> out;
168 	if (!source[0].hasValues) return (out);
169 
170 	size_t nsize;
171 	size_t nr = nrow();
172 	size_t nc = ncol();
173 	if (size < ncell()) {
174 		double f = sqrt(size / ncell());
175 		nr = std::ceil(nrow() * f);
176 		nc = std::ceil(ncol() * f);
177 	}
178 	nsize = nc * nr;
179 	std::vector<double> v;
180 	if ((size >= ncell()) || ((nc == ncol()) && (nr == nrow()))) {
181 		v = getValues(-1, opt) ;
182 		if (hasError()) return out;
183 		for (size_t i=0; i<nlyr(); i++) {
184 			size_t offset = i * nsize;
185 			std::vector<double> vv(v.begin()+offset, v.begin()+offset+nsize);
186 			out.push_back(vv);
187 		}
188 		return out;
189 	}
190 
191 	for (size_t src=0; src<nsrc(); src++) {
192 		if (source[src].memory) {
193 			v = readSample(src, nr, nc);
194 		//} else if (source[src].driver == "raster") {
195 		//	v = readSampleBinary(src, nr, nc);
196 		} else {
197 		    #ifdef useGDAL
198 			v = readGDALsample(src, nr, nc);
199 			#endif
200 		}
201 		if (hasError()) return out;
202 		for (size_t i=0; i<source[src].nlyr; i++) {
203 			size_t offset = i * nsize;
204 			std::vector<double> vv(v.begin()+offset, v.begin()+offset+nsize);
205 			out.push_back(vv);
206 		}
207 	}
208 	return out;
209 }
210 
211 
sampleRowColValues(size_t nr,size_t nc,SpatOptions & opt)212 std::vector<std::vector<double>> SpatRaster::sampleRowColValues(size_t nr, size_t nc, SpatOptions &opt) {
213 
214 	std::vector<std::vector<double>> out;
215 	if (!source[0].hasValues) return (out);
216 
217 	if ((nr == 0) || (nc ==0)) {
218 		return(out);
219 	}
220 
221 	nr = std::min(nr, nrow());
222 	nc = std::min(nc, ncol());
223 
224 	size_t nsize = nc * nr;
225 	std::vector<double> v;
226 	if ((nc == ncol()) && (nr == nrow())) {
227 		v = getValues(-1, opt) ;
228 		if (hasError()) return out;
229 		for (size_t i=0; i<nlyr(); i++) {
230 			size_t offset = i * nsize;
231 			std::vector<double> vv(v.begin()+offset, v.begin()+offset+nsize);
232 			out.push_back(vv);
233 		}
234 		return out;
235 	}
236 
237 	for (size_t src=0; src<nsrc(); src++) {
238 		if (source[src].memory) {
239 			v = readSample(src, nr, nc);
240 		} else {
241 		    #ifdef useGDAL
242 			v = readGDALsample(src, nr, nc);
243 			#endif
244 		}
245 		if (hasError()) return out;
246 		for (size_t i=0; i<source[src].nlyr; i++) {
247 			size_t offset = i * nsize;
248 			std::vector<double> vv(v.begin()+offset, v.begin()+offset+nsize);
249 			out.push_back(vv);
250 		}
251 	}
252 	return out;
253 }
254 
255 
256 
sample_replace(size_t size,size_t N,unsigned seed)257 std::vector<size_t> sample_replace(size_t size, size_t N, unsigned seed){
258 	std::default_random_engine gen(seed);
259 	std::uniform_int_distribution<> U(0, N-1);
260 	std::vector<size_t> sample;
261 	sample.reserve(size);
262 	for (size_t i=0; i<size; i++) {
263 		sample.push_back( U(gen) );
264 	}
265 	return sample;
266 }
267 
268 
sample_replace_weights(size_t size,size_t N,std::vector<double> prob,unsigned seed)269 std::vector<size_t> sample_replace_weights(size_t size, size_t N, std::vector<double> prob, unsigned seed){
270 
271 	// normalize prob
272 	double maxw = *max_element(prob.begin(), prob.end());
273 	for (double& d : prob)  d /= maxw;
274 	double minw = *min_element(prob.begin(), prob.end());
275 
276 	std::default_random_engine gen(seed);
277 	std::uniform_int_distribution<> U(0, N-1);
278 	std::vector<size_t> sample;
279 	sample.reserve(size);
280 
281 	std::uniform_real_distribution<> Uw(minw, 1);
282 	size_t cnt = 0;
283 	size_t cnt2 = 0;
284 	size_t ssize = size * 10;
285 	while (cnt < size) {
286 		double w = Uw(gen);
287 		double v = U(gen);
288 		if (prob[v] >= w) {
289 			sample.push_back(v);
290 			cnt++;
291 		} else {
292 			cnt2++;
293 			if (cnt2 > ssize) cnt = size;
294 		}
295 	}
296 	return sample;
297 }
298 
299 /*
300 std::vector<size_t> sample_replace_weights_gen(size_t size, size_t N, std::vector<double> prob, std::default_random_engine gen){
301 
302 	// normalize prob
303 	double minw = *min_element(prob.begin(),prob.end());
304 	double maxw = *max_element(prob.begin(),prob.end()) - minw;
305 	for (double& d : prob)  d = (d - minw) / maxw;
306 
307 	std::uniform_int_distribution<> U(0, N-1);
308 	std::vector<size_t> sample;
309 	sample.reserve(size);
310 
311 	std::uniform_real_distribution<> Uw(0, 1);
312 	size_t cnt = 0;
313 	while (cnt < size) {
314 		double w = Uw(gen);
315 		double v = U(gen);
316 		if (prob[v] >= w) {
317 			sample.push_back(v);
318 			cnt++;
319 		}
320 	}
321 	return sample;
322 }
323 */
324 
sample_no_replace(size_t size,size_t N,unsigned seed)325 std::vector<size_t> sample_no_replace(size_t size, size_t N, unsigned seed){
326 	size_t one = 1;
327 	size = std::max(one, std::min(size, N));
328 	std::vector<size_t> sample;
329 	if (size == N) {
330 		sample.resize(size);
331 		std::iota(sample.begin(), sample.end(), 0);
332 		return sample;
333 	}
334 	std::default_random_engine gen(seed);
335 
336 	if (size >= .66 * N) {
337 		sample.resize(N);
338 		std::iota(std::begin(sample), std::end(sample), 0);
339 		std::shuffle(sample.begin(), sample.end(), gen);
340 		if (size < N) {
341 			sample.erase(sample.begin()+size, sample.end());
342 		}
343 		return sample;
344 	}
345 
346 	std::uniform_real_distribution<> U( 0, std::nextafter(1.0, std::numeric_limits<double>::max() ) );
347 
348 	sample.reserve(size);
349 	for (size_t i=0; i<N; i++) {
350 		if ( ((N-i) * U(gen)) <  (size - sample.size()) ) {
351 			sample.push_back(i);
352     		if (sample.size() == size ) {
353 				break;
354 			}
355 		}
356 	}
357 	return sample;
358 }
359 
360 
sample_no_replace_weights(size_t size,size_t N,std::vector<double> prob,unsigned seed)361 std::vector<size_t> sample_no_replace_weights(size_t size, size_t N, std::vector<double> prob, unsigned seed){
362 	size_t one = 1;
363 	size = std::max(one, std::min(size, N));
364 	std::vector<size_t> sample;
365 	std::default_random_engine gen(seed);
366 	if (size == N) {
367 		sample.resize(size);
368 		std::iota(sample.begin(), sample.end(), 0);
369 		std::shuffle(sample.begin(), sample.end(), gen);
370 		return sample;
371 	}
372 
373 	std::uniform_int_distribution<> U(0, std::numeric_limits<int>::max());
374 	std::unordered_set<size_t> sampleset;
375 
376 	size_t isize = size;
377 	if (size > (0.8 * N)) {
378 		isize = N - size;
379 		for (double &d : prob) d = 1-d;
380 		size_t ssize = isize * (1.1 + isize / N);
381 		size_t cnt=0;
382 		while (sampleset.size() < isize) {
383 			seed = U(gen);
384 			std::vector<size_t> s = sample_replace_weights(ssize, N, prob, seed);
385 			for (size_t i=0; i<s.size(); i++) {
386 				sampleset.insert(s[i]);
387 			}
388 			cnt++;
389 			if (cnt > 10) break;
390 		}
391 		std::vector<size_t> invsamp;
392 		invsamp.insert(invsamp.begin(), sampleset.begin(), sampleset.end());
393 		std::sort(invsamp.begin(), invsamp.end());
394 		invsamp.push_back(N+1);
395 		size_t j=0;
396 		sample.reserve(size);
397 		for (size_t i=0; i<N; i++) {
398 			if (i != invsamp[j]) {
399 				sample.push_back(i);
400 			} else {
401 				j++;
402 			}
403 		}
404 		std::shuffle(sample.begin(), sample.end(), gen);
405 
406 	} else {
407 		size_t ssize = size * (1.1 + size / N);
408 		size_t cnt=0;
409 		while (sampleset.size() < size) {
410 			seed = U(gen);
411 			std::vector<size_t> s = sample_replace_weights(ssize, N, prob, seed);
412 			for (size_t i=0; i<s.size(); i++) {
413 				sampleset.insert(s[i]);
414 			}
415 			cnt++;
416 			if (cnt > 10) break;
417 		}
418 		sample.insert(sample.begin(), sampleset.begin(), sampleset.end());
419 		if (sample.size() > size) {
420 			sample.resize(size);
421 		};
422 	}
423 
424 	return(sample);
425 }
426 
427 
sample(size_t size,size_t N,bool replace,std::vector<double> prob,unsigned seed)428 std::vector<size_t> sample(size_t size, size_t N, bool replace, std::vector<double> prob, unsigned seed){
429 	if ((size == 0) || (N == 0)) {
430 		std::vector<size_t> s;
431 		return s;
432 	}
433 	bool w = prob.size() == N;
434 	if (replace) {
435 		if (N == 1) {
436 			std::vector<size_t> s(size,0);
437 			return s;
438 		}
439 		if (w) {
440 			return sample_replace_weights(size, N, prob, seed);
441 		} else {
442 			return sample_replace(size, N, seed);
443 		}
444 	} else {
445 		if (N == 1) {
446 			std::vector<size_t> s(1,0);
447 			return s;
448 		}
449 		if (w) {
450 			return sample_no_replace_weights(size, N, prob, seed);
451 		} else {
452 			return sample_no_replace(size, N, seed);
453 		}
454 	}
455 }
456 
457 
458 
sampleRandomValues(unsigned size,bool replace,unsigned seed)459 std::vector<std::vector<double>> SpatRaster::sampleRandomValues(unsigned size, bool replace, unsigned seed) {
460 
461 	double nc = ncell();
462 	std::vector<size_t> cells;
463 	std::vector<double> w;
464 	if (replace) {
465 		cells = sample(size, nc, false, w, seed);
466 	} else {
467 		cells = sample(size, nc, true, w, seed);
468 	}
469 
470 	std::vector<double> dcells(cells.begin(), cells.end());
471 	std::vector<std::vector<double>> d = extractCell(dcells);
472 	return d;
473 }
474 
475 
sampleRandomRaster(unsigned size,bool replace,unsigned seed)476 SpatRaster SpatRaster::sampleRandomRaster(unsigned size, bool replace, unsigned seed) {
477 
478 	unsigned nsize;
479 	unsigned nr = nrow();
480 	unsigned nc = ncol();
481 	if (size < ncell()) {
482 		double f = sqrt(size / ncell());
483 		nr = std::ceil(nrow() * f);
484 		nc = std::ceil(ncol() * f);
485 	}
486 	SpatRaster out = geometry(nlyr(), true);
487 	out.source[0].nrow = nr;
488 	out.source[0].ncol = nc;
489 	if (!source[0].hasValues) return (out);
490 
491 	nsize = nr * nc;
492 	std::vector<std::vector<double>> vv = sampleRandomValues(nsize, replace, seed);
493 
494 	for (size_t i=0; i<vv.size(); i++) {
495 		out.source[0].values.insert(out.source[0].values.end(), vv[i].begin(), vv[i].end());
496 	}
497 	out.source[0].memory = true;
498 	out.source[0].hasValues = true;
499 	out.source[0].setRange();
500 
501 	return out;
502 }
503 
test_sample(size_t size,size_t N,bool replace,std::vector<double> w,unsigned seed)504 std::vector<size_t> SpatExtent::test_sample(size_t size, size_t N, bool replace, std::vector<double> w, unsigned seed) {
505 	return sample(size, N, replace, w, seed);
506 }
507 
508 
sampleRandom(size_t size,bool lonlat,unsigned seed)509 std::vector<std::vector<double>> SpatExtent::sampleRandom(size_t size, bool lonlat, unsigned seed){
510 	std::vector<std::vector<double>> out(2);
511 	if (size == 0) return out;
512 	std::default_random_engine gen(seed);
513 
514 
515 	if (lonlat) {
516 		double d = (ymax - ymin) / 1000.0;
517 		std::vector<double> r = seq(ymin, ymax, d);
518 		std::vector<double> w;
519 		w.reserve(r.size());
520 		for (size_t i=0; i<r.size(); i++) {
521 			if (i == 0) {
522 			}
523 			double ww = std::abs(cos(M_PI * r[i]/180.0));
524 			w.push_back(ww );
525 
526 		}
527 
528 		std::vector	<size_t> x = sample(size, r.size(), true, w, seed);
529 		std::vector <double> lat, lon;
530 		lat.reserve(size);
531 		lon.reserve(size);
532 		std::uniform_real_distribution<> U1(-0.5, 0.5);
533 
534 		double dx = 0.5 * d;
535 		for (size_t i=0; i<x.size(); i++) {
536 			double v = r[x[i]] + dx * U1(gen);
537 			lat.push_back(v);
538 		}
539 		std::uniform_real_distribution<> U2(xmin, xmax);
540 		for (size_t i=0; i<size; i++) {
541 			lon.push_back(U2(gen));
542 		}
543 		out[0] = lon;
544 		out[1] = lat;
545 
546 	} else {
547 		std::vector <double> x, y;
548 		x.reserve(size);
549 		y.reserve(size);
550 		std::uniform_real_distribution<> runifx(xmin, xmax);
551 		std::uniform_real_distribution<> runify(ymin, ymax);
552 		for (size_t i=0; i<size; i++) {
553 			x.push_back(runifx(gen));
554 			y.push_back(runify(gen));
555 		}
556 		out[0] = x;
557 		out[1] = y;
558 	}
559 	return out;
560 }
561 
562 
563 
564 
sampleRegular(size_t size,bool lonlat)565 std::vector<std::vector<double>> SpatExtent::sampleRegular(size_t size, bool lonlat) {
566 	std::vector<std::vector<double>> out(2);
567 	if (size == 0) return out;
568 
569 	double r1 = xmax - xmin;
570 	double r2 = ymax - ymin;
571 
572 	if (lonlat) {
573 		double halfy = ymin + (ymax - ymin)/2;
574 		double dx = distance_lonlat(xmin, halfy, xmax, halfy);
575 		double dy = distance_lonlat(0, ymin, 0, ymax);
576 		double ratio = dx/dy;
577 		double ny = std::max(1.0, sqrt(size / ratio));
578 		double nx = std::max(1.0, size / ny);
579 		ny = std::round(ny);
580 		nx = std::round(nx);
581 		double x_i = r1 / nx;
582 		double y_i = r2 / ny;
583 
584 		std::vector<double> lat, lon, w, xi;
585 		lat.reserve(ny);
586 		lat.push_back(ymin+0.5*y_i);
587 		for (size_t i=1; i<ny; i++) {
588 			lat.push_back(lat[i-1] + y_i);
589 		}
590 
591 		w.reserve(lat.size());
592 		for (size_t i=0; i<lat.size(); i++) {
593 			w.push_back(cos(M_PI * lat[i] / 180.0));
594 		}
595 
596 		double nwsumw = w.size() / accumulate(w.begin(), w.end(), 0.0);
597 		xi.reserve(w.size());
598 		for (size_t i=0; i<w.size(); i++) {
599 			xi.push_back(x_i / (w[i] * nwsumw));
600 		}
601 		double halfx = xmin + (xmax - xmin)/2;
602 		for (size_t i=0; i<lat.size(); i++) {
603 			double start = halfx - 0.5*xi[i];
604 			std::vector <double> x;
605 			if (start < xmin) {
606 				x = { halfx };
607 			} else {
608 				while (start > xmin) {
609 					start -= xi[i];
610 				}
611 				x = seq(start + xi[i], xmax, xi[i]);
612 			}
613 			if (x.size() <= 1) {
614 				x = { halfx };
615 			}
616 			std::vector <double> y(x.size(), lat[i]);
617 
618 			out[0].insert(out[0].end(), x.begin(), x.end());
619 			out[1].insert(out[1].end(), y.begin(), y.end());
620 		}
621 
622 	} else {
623 		double ratio = r1/r2;
624 		double ny = std::max(1.0, sqrt(size / ratio));
625 		double nx = std::max(1.0, size / ny);
626 		ny = std::round(ny);
627 		nx = std::round(nx);
628 		double x_i = r1 / nx;
629 		double y_i = r2 / ny;
630 
631 		std::vector<double> x, y;
632 		x.reserve(nx);
633 		y.reserve(ny);
634 		x.push_back(xmin+0.5*x_i);
635 		for (size_t i=1; i<nx; i++) {
636 			x.push_back(x[i-1] + x_i);
637 		}
638 		y.reserve(ny);
639 		y.push_back(ymin+0.5*y_i);
640 		for (size_t i=1; i<ny; i++) {
641 			y.push_back(y[i-1] + y_i);
642 		}
643 		rep(x, ny);
644 		rep_each(y, nx);
645 		out[0] = x;
646 		out[1] = y;
647 	}
648 
649 	return out;
650 }
651 
652 
sampleCells(unsigned size,std::string method,bool replace,unsigned seed)653 std::vector<size_t> SpatRaster::sampleCells(unsigned size, std::string method, bool replace, unsigned seed) {
654 
655 	std::default_random_engine gen(seed);
656 	std::vector<size_t> out;
657 	if ((size >= ncell()) & (!replace)) {
658 		out.resize(ncell());
659 		std::iota(out.begin(), out.end(), 0);
660 		if (method == "random") {
661 			std::shuffle(out.begin(), out.end(), gen);
662 		}
663 		return out;
664 	}
665 
666 	if (method == "random") {
667 
668 	} else if (method == "regular") {
669 
670 	} else { //method == "stratified"
671 
672 	} // else "Cluster"
673 	return out;
674 }
675 
676 
sample(unsigned n,std::string method,unsigned seed)677 SpatVector SpatVector::sample(unsigned n, std::string method, unsigned seed) {
678 
679 	std::string gt = type();
680 	SpatVector out;
681 	if (gt != "polygons") {
682 		out.setError("only implemented for polygons");
683 		return out;
684 	}
685 	if (n == 0) {
686 		out.srs = srs;
687 		return out;
688 	}
689 
690 /*
691 	if (strata != "") {
692 		// should use
693 		// SpatVector a = aggregate(strata, false);
694 		// but get nasty self-intersection precision probs.
695 
696 		int i = where_in_vector(strata, get_names());
697 		if (i < 0) {
698 			out.setError("cannot find field");
699 			return out;
700 		}
701 		SpatDataFrame uv;
702 		std::vector<int> idx = df.getIndex(i, uv);
703 		for (size_t i=0; i<uv.nrow(); i++) {
704 			std::vector<int> g;
705 			g.resize(0);
706 			for (size_t j=0; j<idx.size(); j++) {
707 				if (i == (size_t)idx[j]) {
708 					g.push_back(j);
709 				}
710 			}
711 			SpatVector s = subset_rows(g);
712 			s = s.sample(n, "random", false, "", seed);
713 			for (long &v : s.df.iv[0]) v = v+i;
714 			out = out.append(s, true);
715 		}
716 		return out;
717 	}
718 */
719 	bool lonlat = is_lonlat();
720 	bool random = (method == "random");
721 
722 	std::vector<double> a = area("m", true, {});
723 	if (hasError()) {
724 		out.setError(getError());
725 		return out;
726 	}
727 	double suma = accumulate(a.begin(), a.end(), 0.0);
728 
729 /*
730 	if (by_geom) {
731 		std::vector<double> pa;
732 		pa.reserve(a.size());
733 		for (size_t i=0; i<a.size(); i++) {
734 			pa.push_back(a[i] / suma);
735 		}
736 		std::vector<std::vector<double>> pxy(2);
737 
738 		std::vector<size_t> nsamp(size());
739 		for (size_t i=0; i<size(); i++) {
740 			if (pa[i] > 0) {
741 				SpatGeom g = getGeom(i);
742 				SpatVector ve(g.extent, "");
743 				ve.srs = srs;
744 				double vea = ve.area()[0];
745 				if (random) {
746 					double m = vea / a[i];
747 					m = std::max(2.0, std::min(m*m, 100.0));
748 					size_t ssize = pa[i] * n * m;
749 					pxy = g.extent.sampleRandom(ssize, lonlat, seed);
750 				} else {
751 					size_t ssize = std::round(pa[i] * n * vea / a[i]);
752 					pxy = g.extent.sampleRegular(ssize, lonlat);
753 				}
754 				SpatVector vpnt(pxy[0], pxy[1], points, "");
755 				SpatVector vpol(g);
756 				vpnt = vpnt.intersect(vpol);
757 				if (random) {
758 					size_t psize = pa[i] * n;
759 					if (vpnt.size() > psize) {
760 						std::vector<int> rows(psize);
761 						std::iota(rows.begin(), rows.end(), 0);
762 						vpnt = vpnt.subset_rows(rows);
763 					}
764 				}
765 				nsamp[i] = vpnt.size();
766 				if (out.size() == 0) {
767 					out = vpnt;
768 				} else {
769 					out = out.append(vpnt, true);
770 				}
771 			}
772 		}
773 		std::vector<long> id(size());
774 		std::iota(id.begin(), id.end(), 1);
775 		rep_each_vect(id, nsamp);
776 		SpatDataFrame df;
777 		df.add_column(id, "pol.id");
778 		out.df = df;
779 	} else {
780 */
781 	std::vector<std::vector<double>> pxy(2);
782 
783 	SpatVector ve(extent, "");
784 	ve.srs = srs;
785 	double vea = ve.area("m", true, {})[0];
786 	if (random) {
787 		double m = vea / suma;
788 		m = std::max(10.0, std::min(m*m, 100.0));
789 		size_t ssize = n * m;
790 		pxy = extent.sampleRandom(ssize, lonlat, seed);
791 	} else {
792 		size_t ssize = std::round(n * vea / suma);
793 		pxy = extent.sampleRegular(ssize, lonlat);
794 	}
795 	out = SpatVector(pxy[0], pxy[1], points, "");
796 	out = intersect(out);
797 	if (random) {
798 		if (out.size() > n) {
799 			std::vector<int> rows(out.size());
800 			std::iota(rows.begin(), rows.end(), 0);
801 			std::default_random_engine gen(seed);
802 			std::shuffle(rows.begin(), rows.end(), gen);
803 			rows.resize(n);
804 			out = out.subset_rows(rows);
805 		}
806 	}
807 	//std::vector<long> id(out.size(), 1);
808 	//SpatDataFrame df;
809 	//df.add_column(id, "pol.id");
810 	//out.df = df;
811 //	}
812 	out.srs = srs;
813 
814 	return out;
815 }
816 
817 
sample_geom(std::vector<unsigned> n,std::string method,unsigned seed)818 SpatVector SpatVector::sample_geom(std::vector<unsigned> n, std::string method, unsigned seed) {
819 
820 	SpatVector out;
821 	if (n.size() != size()) {
822 		out.setError("length of samples does not match number of geoms");
823 		return out;
824 	}
825 	if (n.size() == 0) {
826 		out.srs = srs;
827 		return out;
828 	}
829 
830 	for (size_t i=0; i<size(); i++) {
831 		if (n[i] == 0) {
832 			continue;
833 		}
834 		int j = i;
835 		SpatVector v = subset_rows(j);
836 		SpatVector p = v.sample(n[i], method, seed+i);
837 		out = out.append(p, true);
838 	}
839 	out.srs = srs;
840 	return out;
841 }
842 
843 /*
844 std::vector<double> sample(size_t size, size_t N, bool replace, std::vector<double> prob, unsigned seed){
845     // Sample "size" elements from [1, N]
846 	std::vector<double> result;
847 	std::default_random_engine gen(seed);
848 
849 	bool weights = false;
850 	if (prob.size() == N) {
851 		weights = true;
852 		// should check for neg numbers
853 		double minw = *min_element(prob.begin(),prob.end());
854 		double maxw = *max_element(prob.begin(),prob.end()) - minw;
855 		for (double& d : prob)  d = (d - minw) / maxw;
856 	}
857 
858 	if (replace) {
859 		//std::vector<double> samples;
860 		result.reserve(size);
861 		std::uniform_int_distribution<> distribution(0, N-1);
862 		if (weights) {
863 			std::uniform_real_distribution<> wdist(0, 1);
864 			size_t cnt = 0;
865 			while (cnt < size) {
866 				double w = wdist(gen);
867 				double v = distribution(gen);
868 				if (prob[v] >= w) {
869 					result.push_back(v);
870 					cnt++;
871 				}
872 			}
873 		} else {
874 			for (size_t i=0; i<size; i++) {
875 				result.push_back( distribution(gen) );
876 			}
877 		}
878 	} else {//without replacement
879 
880 		size = std::max(size_t(1), std::min(size, N)); // k <= N
881 
882 		std::uniform_int_distribution<> distribution(1, N);
883 		std::unordered_set<unsigned> samples;
884 
885 		if (weights) {
886 			std::uniform_int_distribution<> wdist(0, N-1);
887 			size_t cnt = 0;
888 			size_t r = 0;
889 			while (cnt < size) {
890 				double w = wdist(gen)/N;
891 				double v = distribution(gen);
892 				if (prob[v] >= w) {
893 					if (!samples.insert(v).second) {
894 						samples.insert(r);
895 						cnt++;
896 					}
897 				}
898 				r++;
899 				r = r%(N-1);
900 			}
901 		} else {
902 			for (size_t r = N - size; r < N; ++r) {
903 				unsigned v = distribution(gen) - 1;
904 				if (!samples.insert(v).second) samples.insert(r);
905 			}
906 		}
907 		result = std::vector<double>(samples.begin(), samples.end());
908 		std::shuffle(result.begin(), result.end(), gen);
909 	}
910     return result;
911 }
912 */
913 
914