1 // Copyright (c) 2018-2021 Robert J. Hijmans
2 //
3 // This file is part of the "spat" library.
4 //
5 // spat is free software: you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 2 of the License, or
8 // (at your option) any later version.
9 //
10 // spat is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with spat. If not, see <http://www.gnu.org/licenses/>.
17
18 #include <vector>
19 #include "spatRaster.h"
20 #include "distance.h"
21 #include "recycle.h"
22 #include <random>
23 #include <unordered_set>
24 #include "string_utils.h"
25
26
getSampleRowCol(std::vector<size_t> & oldrow,std::vector<size_t> & oldcol,size_t nrows,size_t ncols,size_t snrow,size_t sncol)27 void getSampleRowCol(std::vector<size_t> &oldrow, std::vector<size_t> &oldcol, size_t nrows, size_t ncols, size_t snrow, size_t sncol) {
28
29 double rf = nrows / (double)(snrow);
30 double cf = ncols / (double)(sncol);
31 //double rstart = std::floor(0.5 * rf);
32 //double cstart = std::floor(0.5 * cf);
33 double rstart = 0.5 * rf;
34 double cstart = 0.5 * cf;
35 oldcol.reserve(sncol);
36 for (size_t i =0; i<sncol; i++) {
37 oldcol.push_back(i * cf + cstart);
38 }
39 oldrow.reserve(snrow);
40 for (size_t i =0; i<snrow; i++) {
41 oldrow.push_back(i * rf + rstart);
42 }
43 }
44
45
readSample(unsigned src,size_t srows,size_t scols)46 std::vector<double> SpatRaster::readSample(unsigned src, size_t srows, size_t scols) {
47
48 unsigned nl = source[src].nlyr;
49 std::vector<size_t> oldcol, oldrow;
50 std::vector<double> out;
51 getSampleRowCol(oldrow, oldcol, nrow(), ncol(), srows, scols);
52
53 out.reserve(srows*scols);
54 if (source[src].hasWindow) {
55 size_t offrow = source[src].window.off_row;
56 size_t offcol = source[src].window.off_col;
57 size_t fncol = source[src].window.full_ncol;
58 size_t oldnc = fncol * source[src].window.full_nrow;
59 for (size_t lyr=0; lyr<nl; lyr++) {
60 size_t off1 = lyr * oldnc;
61 for (size_t r=0; r<srows; r++) {
62 size_t off2 = off1 + (oldrow[r]+offrow) * fncol;
63 for (size_t c=0; c<scols; c++) {
64 size_t oldcell = off2 + oldcol[c] + offcol;
65 out.push_back(source[src].values[oldcell]);
66 }
67 }
68 }
69 } else {
70 unsigned oldnc = ncell();
71 for (size_t lyr=0; lyr<nl; lyr++) {
72 size_t off = lyr * oldnc;
73 for (size_t r=0; r<srows; r++) {
74 unsigned oldc = off + oldrow[r] * ncol();
75 for (size_t c=0; c<scols; c++) {
76 unsigned oldcell = oldc + oldcol[c];
77 out.push_back(source[src].values[oldcell]);
78 }
79 }
80 }
81 }
82 return out;
83 }
84
85
sampleRegularRaster(unsigned size)86 SpatRaster SpatRaster::sampleRegularRaster(unsigned size) {
87
88 if ((size >= ncell())) {
89 return( *this );
90 }
91
92 double f = std::min(1.0, sqrt(size / ncell()));
93 size_t nr = std::min((size_t)ceil(nrow() * f), nrow());
94 size_t nc = std::min((size_t)ceil(ncol() * f), ncol());
95 if ((nc == ncol()) && (nr == nrow())) {
96 return( *this );
97 }
98 SpatRaster out = geometry(nlyr(), true);
99 out.source[0].nrow = nr;
100 out.source[0].ncol = nc;
101
102 if (!source[0].hasValues) return (out);
103
104 std::vector<double> v;
105 for (size_t src=0; src<nsrc(); src++) {
106 if (source[src].memory) {
107 v = readSample(src, nr, nc);
108 //} else if (source[src].driver == "raster") {
109 // v = readSampleBinary(src, nr, nc);
110 } else {
111 #ifdef useGDAL
112 v = readGDALsample(src, nr, nc);
113 #endif
114 }
115 if (hasError()) return out;
116 out.source[0].values.insert(out.source[0].values.end(), v.begin(), v.end());
117 }
118 out.source[0].memory = true;
119 out.source[0].hasValues = true;
120 out.source[0].setRange();
121
122 return out;
123 }
124
125
sampleRowColRaster(size_t nr,size_t nc)126 SpatRaster SpatRaster::sampleRowColRaster(size_t nr, size_t nc) {
127
128 SpatRaster out = geometry(nlyr(), true);
129 if ((nr == 0) || (nc ==0)) {
130 out.setError("number of rows and columns must be > 0");
131 }
132
133 nr = std::min(nr, nrow());
134 nc = std::min(nc, ncol());
135 if ((nc == ncol()) && (nr == nrow())) {
136 return( *this );
137 }
138 out.source[0].nrow = nr;
139 out.source[0].ncol = nc;
140
141 if (!source[0].hasValues) return (out);
142
143 std::vector<double> v;
144 for (size_t src=0; src<nsrc(); src++) {
145 if (source[src].memory) {
146 v = readSample(src, nr, nc);
147 //} else if (source[src].driver == "raster") {
148 // v = readSampleBinary(src, nr, nc);
149 } else {
150 #ifdef useGDAL
151 v = readGDALsample(src, nr, nc);
152 #endif
153 }
154 if (hasError()) return out;
155 out.source[0].values.insert(out.source[0].values.end(), v.begin(), v.end());
156 }
157 out.source[0].memory = true;
158 out.source[0].hasValues = true;
159 out.source[0].setRange();
160
161 return out;
162 }
163
164
sampleRegularValues(unsigned size,SpatOptions & opt)165 std::vector<std::vector<double>> SpatRaster::sampleRegularValues(unsigned size, SpatOptions &opt) {
166
167 std::vector<std::vector<double>> out;
168 if (!source[0].hasValues) return (out);
169
170 size_t nsize;
171 size_t nr = nrow();
172 size_t nc = ncol();
173 if (size < ncell()) {
174 double f = sqrt(size / ncell());
175 nr = std::ceil(nrow() * f);
176 nc = std::ceil(ncol() * f);
177 }
178 nsize = nc * nr;
179 std::vector<double> v;
180 if ((size >= ncell()) || ((nc == ncol()) && (nr == nrow()))) {
181 v = getValues(-1, opt) ;
182 if (hasError()) return out;
183 for (size_t i=0; i<nlyr(); i++) {
184 size_t offset = i * nsize;
185 std::vector<double> vv(v.begin()+offset, v.begin()+offset+nsize);
186 out.push_back(vv);
187 }
188 return out;
189 }
190
191 for (size_t src=0; src<nsrc(); src++) {
192 if (source[src].memory) {
193 v = readSample(src, nr, nc);
194 //} else if (source[src].driver == "raster") {
195 // v = readSampleBinary(src, nr, nc);
196 } else {
197 #ifdef useGDAL
198 v = readGDALsample(src, nr, nc);
199 #endif
200 }
201 if (hasError()) return out;
202 for (size_t i=0; i<source[src].nlyr; i++) {
203 size_t offset = i * nsize;
204 std::vector<double> vv(v.begin()+offset, v.begin()+offset+nsize);
205 out.push_back(vv);
206 }
207 }
208 return out;
209 }
210
211
sampleRowColValues(size_t nr,size_t nc,SpatOptions & opt)212 std::vector<std::vector<double>> SpatRaster::sampleRowColValues(size_t nr, size_t nc, SpatOptions &opt) {
213
214 std::vector<std::vector<double>> out;
215 if (!source[0].hasValues) return (out);
216
217 if ((nr == 0) || (nc ==0)) {
218 return(out);
219 }
220
221 nr = std::min(nr, nrow());
222 nc = std::min(nc, ncol());
223
224 size_t nsize = nc * nr;
225 std::vector<double> v;
226 if ((nc == ncol()) && (nr == nrow())) {
227 v = getValues(-1, opt) ;
228 if (hasError()) return out;
229 for (size_t i=0; i<nlyr(); i++) {
230 size_t offset = i * nsize;
231 std::vector<double> vv(v.begin()+offset, v.begin()+offset+nsize);
232 out.push_back(vv);
233 }
234 return out;
235 }
236
237 for (size_t src=0; src<nsrc(); src++) {
238 if (source[src].memory) {
239 v = readSample(src, nr, nc);
240 } else {
241 #ifdef useGDAL
242 v = readGDALsample(src, nr, nc);
243 #endif
244 }
245 if (hasError()) return out;
246 for (size_t i=0; i<source[src].nlyr; i++) {
247 size_t offset = i * nsize;
248 std::vector<double> vv(v.begin()+offset, v.begin()+offset+nsize);
249 out.push_back(vv);
250 }
251 }
252 return out;
253 }
254
255
256
sample_replace(size_t size,size_t N,unsigned seed)257 std::vector<size_t> sample_replace(size_t size, size_t N, unsigned seed){
258 std::default_random_engine gen(seed);
259 std::uniform_int_distribution<> U(0, N-1);
260 std::vector<size_t> sample;
261 sample.reserve(size);
262 for (size_t i=0; i<size; i++) {
263 sample.push_back( U(gen) );
264 }
265 return sample;
266 }
267
268
sample_replace_weights(size_t size,size_t N,std::vector<double> prob,unsigned seed)269 std::vector<size_t> sample_replace_weights(size_t size, size_t N, std::vector<double> prob, unsigned seed){
270
271 // normalize prob
272 double maxw = *max_element(prob.begin(), prob.end());
273 for (double& d : prob) d /= maxw;
274 double minw = *min_element(prob.begin(), prob.end());
275
276 std::default_random_engine gen(seed);
277 std::uniform_int_distribution<> U(0, N-1);
278 std::vector<size_t> sample;
279 sample.reserve(size);
280
281 std::uniform_real_distribution<> Uw(minw, 1);
282 size_t cnt = 0;
283 size_t cnt2 = 0;
284 size_t ssize = size * 10;
285 while (cnt < size) {
286 double w = Uw(gen);
287 double v = U(gen);
288 if (prob[v] >= w) {
289 sample.push_back(v);
290 cnt++;
291 } else {
292 cnt2++;
293 if (cnt2 > ssize) cnt = size;
294 }
295 }
296 return sample;
297 }
298
299 /*
300 std::vector<size_t> sample_replace_weights_gen(size_t size, size_t N, std::vector<double> prob, std::default_random_engine gen){
301
302 // normalize prob
303 double minw = *min_element(prob.begin(),prob.end());
304 double maxw = *max_element(prob.begin(),prob.end()) - minw;
305 for (double& d : prob) d = (d - minw) / maxw;
306
307 std::uniform_int_distribution<> U(0, N-1);
308 std::vector<size_t> sample;
309 sample.reserve(size);
310
311 std::uniform_real_distribution<> Uw(0, 1);
312 size_t cnt = 0;
313 while (cnt < size) {
314 double w = Uw(gen);
315 double v = U(gen);
316 if (prob[v] >= w) {
317 sample.push_back(v);
318 cnt++;
319 }
320 }
321 return sample;
322 }
323 */
324
sample_no_replace(size_t size,size_t N,unsigned seed)325 std::vector<size_t> sample_no_replace(size_t size, size_t N, unsigned seed){
326 size_t one = 1;
327 size = std::max(one, std::min(size, N));
328 std::vector<size_t> sample;
329 if (size == N) {
330 sample.resize(size);
331 std::iota(sample.begin(), sample.end(), 0);
332 return sample;
333 }
334 std::default_random_engine gen(seed);
335
336 if (size >= .66 * N) {
337 sample.resize(N);
338 std::iota(std::begin(sample), std::end(sample), 0);
339 std::shuffle(sample.begin(), sample.end(), gen);
340 if (size < N) {
341 sample.erase(sample.begin()+size, sample.end());
342 }
343 return sample;
344 }
345
346 std::uniform_real_distribution<> U( 0, std::nextafter(1.0, std::numeric_limits<double>::max() ) );
347
348 sample.reserve(size);
349 for (size_t i=0; i<N; i++) {
350 if ( ((N-i) * U(gen)) < (size - sample.size()) ) {
351 sample.push_back(i);
352 if (sample.size() == size ) {
353 break;
354 }
355 }
356 }
357 return sample;
358 }
359
360
sample_no_replace_weights(size_t size,size_t N,std::vector<double> prob,unsigned seed)361 std::vector<size_t> sample_no_replace_weights(size_t size, size_t N, std::vector<double> prob, unsigned seed){
362 size_t one = 1;
363 size = std::max(one, std::min(size, N));
364 std::vector<size_t> sample;
365 std::default_random_engine gen(seed);
366 if (size == N) {
367 sample.resize(size);
368 std::iota(sample.begin(), sample.end(), 0);
369 std::shuffle(sample.begin(), sample.end(), gen);
370 return sample;
371 }
372
373 std::uniform_int_distribution<> U(0, std::numeric_limits<int>::max());
374 std::unordered_set<size_t> sampleset;
375
376 size_t isize = size;
377 if (size > (0.8 * N)) {
378 isize = N - size;
379 for (double &d : prob) d = 1-d;
380 size_t ssize = isize * (1.1 + isize / N);
381 size_t cnt=0;
382 while (sampleset.size() < isize) {
383 seed = U(gen);
384 std::vector<size_t> s = sample_replace_weights(ssize, N, prob, seed);
385 for (size_t i=0; i<s.size(); i++) {
386 sampleset.insert(s[i]);
387 }
388 cnt++;
389 if (cnt > 10) break;
390 }
391 std::vector<size_t> invsamp;
392 invsamp.insert(invsamp.begin(), sampleset.begin(), sampleset.end());
393 std::sort(invsamp.begin(), invsamp.end());
394 invsamp.push_back(N+1);
395 size_t j=0;
396 sample.reserve(size);
397 for (size_t i=0; i<N; i++) {
398 if (i != invsamp[j]) {
399 sample.push_back(i);
400 } else {
401 j++;
402 }
403 }
404 std::shuffle(sample.begin(), sample.end(), gen);
405
406 } else {
407 size_t ssize = size * (1.1 + size / N);
408 size_t cnt=0;
409 while (sampleset.size() < size) {
410 seed = U(gen);
411 std::vector<size_t> s = sample_replace_weights(ssize, N, prob, seed);
412 for (size_t i=0; i<s.size(); i++) {
413 sampleset.insert(s[i]);
414 }
415 cnt++;
416 if (cnt > 10) break;
417 }
418 sample.insert(sample.begin(), sampleset.begin(), sampleset.end());
419 if (sample.size() > size) {
420 sample.resize(size);
421 };
422 }
423
424 return(sample);
425 }
426
427
sample(size_t size,size_t N,bool replace,std::vector<double> prob,unsigned seed)428 std::vector<size_t> sample(size_t size, size_t N, bool replace, std::vector<double> prob, unsigned seed){
429 if ((size == 0) || (N == 0)) {
430 std::vector<size_t> s;
431 return s;
432 }
433 bool w = prob.size() == N;
434 if (replace) {
435 if (N == 1) {
436 std::vector<size_t> s(size,0);
437 return s;
438 }
439 if (w) {
440 return sample_replace_weights(size, N, prob, seed);
441 } else {
442 return sample_replace(size, N, seed);
443 }
444 } else {
445 if (N == 1) {
446 std::vector<size_t> s(1,0);
447 return s;
448 }
449 if (w) {
450 return sample_no_replace_weights(size, N, prob, seed);
451 } else {
452 return sample_no_replace(size, N, seed);
453 }
454 }
455 }
456
457
458
sampleRandomValues(unsigned size,bool replace,unsigned seed)459 std::vector<std::vector<double>> SpatRaster::sampleRandomValues(unsigned size, bool replace, unsigned seed) {
460
461 double nc = ncell();
462 std::vector<size_t> cells;
463 std::vector<double> w;
464 if (replace) {
465 cells = sample(size, nc, false, w, seed);
466 } else {
467 cells = sample(size, nc, true, w, seed);
468 }
469
470 std::vector<double> dcells(cells.begin(), cells.end());
471 std::vector<std::vector<double>> d = extractCell(dcells);
472 return d;
473 }
474
475
sampleRandomRaster(unsigned size,bool replace,unsigned seed)476 SpatRaster SpatRaster::sampleRandomRaster(unsigned size, bool replace, unsigned seed) {
477
478 unsigned nsize;
479 unsigned nr = nrow();
480 unsigned nc = ncol();
481 if (size < ncell()) {
482 double f = sqrt(size / ncell());
483 nr = std::ceil(nrow() * f);
484 nc = std::ceil(ncol() * f);
485 }
486 SpatRaster out = geometry(nlyr(), true);
487 out.source[0].nrow = nr;
488 out.source[0].ncol = nc;
489 if (!source[0].hasValues) return (out);
490
491 nsize = nr * nc;
492 std::vector<std::vector<double>> vv = sampleRandomValues(nsize, replace, seed);
493
494 for (size_t i=0; i<vv.size(); i++) {
495 out.source[0].values.insert(out.source[0].values.end(), vv[i].begin(), vv[i].end());
496 }
497 out.source[0].memory = true;
498 out.source[0].hasValues = true;
499 out.source[0].setRange();
500
501 return out;
502 }
503
test_sample(size_t size,size_t N,bool replace,std::vector<double> w,unsigned seed)504 std::vector<size_t> SpatExtent::test_sample(size_t size, size_t N, bool replace, std::vector<double> w, unsigned seed) {
505 return sample(size, N, replace, w, seed);
506 }
507
508
sampleRandom(size_t size,bool lonlat,unsigned seed)509 std::vector<std::vector<double>> SpatExtent::sampleRandom(size_t size, bool lonlat, unsigned seed){
510 std::vector<std::vector<double>> out(2);
511 if (size == 0) return out;
512 std::default_random_engine gen(seed);
513
514
515 if (lonlat) {
516 double d = (ymax - ymin) / 1000.0;
517 std::vector<double> r = seq(ymin, ymax, d);
518 std::vector<double> w;
519 w.reserve(r.size());
520 for (size_t i=0; i<r.size(); i++) {
521 if (i == 0) {
522 }
523 double ww = std::abs(cos(M_PI * r[i]/180.0));
524 w.push_back(ww );
525
526 }
527
528 std::vector <size_t> x = sample(size, r.size(), true, w, seed);
529 std::vector <double> lat, lon;
530 lat.reserve(size);
531 lon.reserve(size);
532 std::uniform_real_distribution<> U1(-0.5, 0.5);
533
534 double dx = 0.5 * d;
535 for (size_t i=0; i<x.size(); i++) {
536 double v = r[x[i]] + dx * U1(gen);
537 lat.push_back(v);
538 }
539 std::uniform_real_distribution<> U2(xmin, xmax);
540 for (size_t i=0; i<size; i++) {
541 lon.push_back(U2(gen));
542 }
543 out[0] = lon;
544 out[1] = lat;
545
546 } else {
547 std::vector <double> x, y;
548 x.reserve(size);
549 y.reserve(size);
550 std::uniform_real_distribution<> runifx(xmin, xmax);
551 std::uniform_real_distribution<> runify(ymin, ymax);
552 for (size_t i=0; i<size; i++) {
553 x.push_back(runifx(gen));
554 y.push_back(runify(gen));
555 }
556 out[0] = x;
557 out[1] = y;
558 }
559 return out;
560 }
561
562
563
564
sampleRegular(size_t size,bool lonlat)565 std::vector<std::vector<double>> SpatExtent::sampleRegular(size_t size, bool lonlat) {
566 std::vector<std::vector<double>> out(2);
567 if (size == 0) return out;
568
569 double r1 = xmax - xmin;
570 double r2 = ymax - ymin;
571
572 if (lonlat) {
573 double halfy = ymin + (ymax - ymin)/2;
574 double dx = distance_lonlat(xmin, halfy, xmax, halfy);
575 double dy = distance_lonlat(0, ymin, 0, ymax);
576 double ratio = dx/dy;
577 double ny = std::max(1.0, sqrt(size / ratio));
578 double nx = std::max(1.0, size / ny);
579 ny = std::round(ny);
580 nx = std::round(nx);
581 double x_i = r1 / nx;
582 double y_i = r2 / ny;
583
584 std::vector<double> lat, lon, w, xi;
585 lat.reserve(ny);
586 lat.push_back(ymin+0.5*y_i);
587 for (size_t i=1; i<ny; i++) {
588 lat.push_back(lat[i-1] + y_i);
589 }
590
591 w.reserve(lat.size());
592 for (size_t i=0; i<lat.size(); i++) {
593 w.push_back(cos(M_PI * lat[i] / 180.0));
594 }
595
596 double nwsumw = w.size() / accumulate(w.begin(), w.end(), 0.0);
597 xi.reserve(w.size());
598 for (size_t i=0; i<w.size(); i++) {
599 xi.push_back(x_i / (w[i] * nwsumw));
600 }
601 double halfx = xmin + (xmax - xmin)/2;
602 for (size_t i=0; i<lat.size(); i++) {
603 double start = halfx - 0.5*xi[i];
604 std::vector <double> x;
605 if (start < xmin) {
606 x = { halfx };
607 } else {
608 while (start > xmin) {
609 start -= xi[i];
610 }
611 x = seq(start + xi[i], xmax, xi[i]);
612 }
613 if (x.size() <= 1) {
614 x = { halfx };
615 }
616 std::vector <double> y(x.size(), lat[i]);
617
618 out[0].insert(out[0].end(), x.begin(), x.end());
619 out[1].insert(out[1].end(), y.begin(), y.end());
620 }
621
622 } else {
623 double ratio = r1/r2;
624 double ny = std::max(1.0, sqrt(size / ratio));
625 double nx = std::max(1.0, size / ny);
626 ny = std::round(ny);
627 nx = std::round(nx);
628 double x_i = r1 / nx;
629 double y_i = r2 / ny;
630
631 std::vector<double> x, y;
632 x.reserve(nx);
633 y.reserve(ny);
634 x.push_back(xmin+0.5*x_i);
635 for (size_t i=1; i<nx; i++) {
636 x.push_back(x[i-1] + x_i);
637 }
638 y.reserve(ny);
639 y.push_back(ymin+0.5*y_i);
640 for (size_t i=1; i<ny; i++) {
641 y.push_back(y[i-1] + y_i);
642 }
643 rep(x, ny);
644 rep_each(y, nx);
645 out[0] = x;
646 out[1] = y;
647 }
648
649 return out;
650 }
651
652
sampleCells(unsigned size,std::string method,bool replace,unsigned seed)653 std::vector<size_t> SpatRaster::sampleCells(unsigned size, std::string method, bool replace, unsigned seed) {
654
655 std::default_random_engine gen(seed);
656 std::vector<size_t> out;
657 if ((size >= ncell()) & (!replace)) {
658 out.resize(ncell());
659 std::iota(out.begin(), out.end(), 0);
660 if (method == "random") {
661 std::shuffle(out.begin(), out.end(), gen);
662 }
663 return out;
664 }
665
666 if (method == "random") {
667
668 } else if (method == "regular") {
669
670 } else { //method == "stratified"
671
672 } // else "Cluster"
673 return out;
674 }
675
676
sample(unsigned n,std::string method,unsigned seed)677 SpatVector SpatVector::sample(unsigned n, std::string method, unsigned seed) {
678
679 std::string gt = type();
680 SpatVector out;
681 if (gt != "polygons") {
682 out.setError("only implemented for polygons");
683 return out;
684 }
685 if (n == 0) {
686 out.srs = srs;
687 return out;
688 }
689
690 /*
691 if (strata != "") {
692 // should use
693 // SpatVector a = aggregate(strata, false);
694 // but get nasty self-intersection precision probs.
695
696 int i = where_in_vector(strata, get_names());
697 if (i < 0) {
698 out.setError("cannot find field");
699 return out;
700 }
701 SpatDataFrame uv;
702 std::vector<int> idx = df.getIndex(i, uv);
703 for (size_t i=0; i<uv.nrow(); i++) {
704 std::vector<int> g;
705 g.resize(0);
706 for (size_t j=0; j<idx.size(); j++) {
707 if (i == (size_t)idx[j]) {
708 g.push_back(j);
709 }
710 }
711 SpatVector s = subset_rows(g);
712 s = s.sample(n, "random", false, "", seed);
713 for (long &v : s.df.iv[0]) v = v+i;
714 out = out.append(s, true);
715 }
716 return out;
717 }
718 */
719 bool lonlat = is_lonlat();
720 bool random = (method == "random");
721
722 std::vector<double> a = area("m", true, {});
723 if (hasError()) {
724 out.setError(getError());
725 return out;
726 }
727 double suma = accumulate(a.begin(), a.end(), 0.0);
728
729 /*
730 if (by_geom) {
731 std::vector<double> pa;
732 pa.reserve(a.size());
733 for (size_t i=0; i<a.size(); i++) {
734 pa.push_back(a[i] / suma);
735 }
736 std::vector<std::vector<double>> pxy(2);
737
738 std::vector<size_t> nsamp(size());
739 for (size_t i=0; i<size(); i++) {
740 if (pa[i] > 0) {
741 SpatGeom g = getGeom(i);
742 SpatVector ve(g.extent, "");
743 ve.srs = srs;
744 double vea = ve.area()[0];
745 if (random) {
746 double m = vea / a[i];
747 m = std::max(2.0, std::min(m*m, 100.0));
748 size_t ssize = pa[i] * n * m;
749 pxy = g.extent.sampleRandom(ssize, lonlat, seed);
750 } else {
751 size_t ssize = std::round(pa[i] * n * vea / a[i]);
752 pxy = g.extent.sampleRegular(ssize, lonlat);
753 }
754 SpatVector vpnt(pxy[0], pxy[1], points, "");
755 SpatVector vpol(g);
756 vpnt = vpnt.intersect(vpol);
757 if (random) {
758 size_t psize = pa[i] * n;
759 if (vpnt.size() > psize) {
760 std::vector<int> rows(psize);
761 std::iota(rows.begin(), rows.end(), 0);
762 vpnt = vpnt.subset_rows(rows);
763 }
764 }
765 nsamp[i] = vpnt.size();
766 if (out.size() == 0) {
767 out = vpnt;
768 } else {
769 out = out.append(vpnt, true);
770 }
771 }
772 }
773 std::vector<long> id(size());
774 std::iota(id.begin(), id.end(), 1);
775 rep_each_vect(id, nsamp);
776 SpatDataFrame df;
777 df.add_column(id, "pol.id");
778 out.df = df;
779 } else {
780 */
781 std::vector<std::vector<double>> pxy(2);
782
783 SpatVector ve(extent, "");
784 ve.srs = srs;
785 double vea = ve.area("m", true, {})[0];
786 if (random) {
787 double m = vea / suma;
788 m = std::max(10.0, std::min(m*m, 100.0));
789 size_t ssize = n * m;
790 pxy = extent.sampleRandom(ssize, lonlat, seed);
791 } else {
792 size_t ssize = std::round(n * vea / suma);
793 pxy = extent.sampleRegular(ssize, lonlat);
794 }
795 out = SpatVector(pxy[0], pxy[1], points, "");
796 out = intersect(out);
797 if (random) {
798 if (out.size() > n) {
799 std::vector<int> rows(out.size());
800 std::iota(rows.begin(), rows.end(), 0);
801 std::default_random_engine gen(seed);
802 std::shuffle(rows.begin(), rows.end(), gen);
803 rows.resize(n);
804 out = out.subset_rows(rows);
805 }
806 }
807 //std::vector<long> id(out.size(), 1);
808 //SpatDataFrame df;
809 //df.add_column(id, "pol.id");
810 //out.df = df;
811 // }
812 out.srs = srs;
813
814 return out;
815 }
816
817
sample_geom(std::vector<unsigned> n,std::string method,unsigned seed)818 SpatVector SpatVector::sample_geom(std::vector<unsigned> n, std::string method, unsigned seed) {
819
820 SpatVector out;
821 if (n.size() != size()) {
822 out.setError("length of samples does not match number of geoms");
823 return out;
824 }
825 if (n.size() == 0) {
826 out.srs = srs;
827 return out;
828 }
829
830 for (size_t i=0; i<size(); i++) {
831 if (n[i] == 0) {
832 continue;
833 }
834 int j = i;
835 SpatVector v = subset_rows(j);
836 SpatVector p = v.sample(n[i], method, seed+i);
837 out = out.append(p, true);
838 }
839 out.srs = srs;
840 return out;
841 }
842
843 /*
844 std::vector<double> sample(size_t size, size_t N, bool replace, std::vector<double> prob, unsigned seed){
845 // Sample "size" elements from [1, N]
846 std::vector<double> result;
847 std::default_random_engine gen(seed);
848
849 bool weights = false;
850 if (prob.size() == N) {
851 weights = true;
852 // should check for neg numbers
853 double minw = *min_element(prob.begin(),prob.end());
854 double maxw = *max_element(prob.begin(),prob.end()) - minw;
855 for (double& d : prob) d = (d - minw) / maxw;
856 }
857
858 if (replace) {
859 //std::vector<double> samples;
860 result.reserve(size);
861 std::uniform_int_distribution<> distribution(0, N-1);
862 if (weights) {
863 std::uniform_real_distribution<> wdist(0, 1);
864 size_t cnt = 0;
865 while (cnt < size) {
866 double w = wdist(gen);
867 double v = distribution(gen);
868 if (prob[v] >= w) {
869 result.push_back(v);
870 cnt++;
871 }
872 }
873 } else {
874 for (size_t i=0; i<size; i++) {
875 result.push_back( distribution(gen) );
876 }
877 }
878 } else {//without replacement
879
880 size = std::max(size_t(1), std::min(size, N)); // k <= N
881
882 std::uniform_int_distribution<> distribution(1, N);
883 std::unordered_set<unsigned> samples;
884
885 if (weights) {
886 std::uniform_int_distribution<> wdist(0, N-1);
887 size_t cnt = 0;
888 size_t r = 0;
889 while (cnt < size) {
890 double w = wdist(gen)/N;
891 double v = distribution(gen);
892 if (prob[v] >= w) {
893 if (!samples.insert(v).second) {
894 samples.insert(r);
895 cnt++;
896 }
897 }
898 r++;
899 r = r%(N-1);
900 }
901 } else {
902 for (size_t r = N - size; r < N; ++r) {
903 unsigned v = distribution(gen) - 1;
904 if (!samples.insert(v).second) samples.insert(r);
905 }
906 }
907 result = std::vector<double>(samples.begin(), samples.end());
908 std::shuffle(result.begin(), result.end(), gen);
909 }
910 return result;
911 }
912 */
913
914