1 2R version 4.1.1 (2021-08-10) -- "Kick Things" 3Copyright (C) 2021 The R Foundation for Statistical Computing 4Platform: x86_64-pc-linux-gnu (64-bit) 5 6R is free software and comes with ABSOLUTELY NO WARRANTY. 7You are welcome to redistribute it under certain conditions. 8Type 'license()' or 'licence()' for distribution details. 9 10R is a collaborative project with many contributors. 11Type 'contributors()' for more information and 12'citation()' on how to cite R or R packages in publications. 13 14Type 'demo()' for some demos, 'help()' for on-line help, or 15'help.start()' for an HTML browser interface to help. 16Type 'q()' to quit R. 17 18> suppressPackageStartupMessages(library(sf)) 19> 20> library(dplyr) 21 22Attaching package: 'dplyr' 23 24The following objects are masked from 'package:stats': 25 26 filter, lag 27 28The following objects are masked from 'package:base': 29 30 intersect, setdiff, setequal, union 31 32> options(dplyr.summarise.inform=FALSE) 33> read_sf(system.file("shape/nc.shp", package="sf"), quiet = TRUE) %>% 34+ st_transform(3857) -> nc 35> nc %>% filter(AREA > .1) %>% plot() 36Warning message: 37plotting the first 10 out of 14 attributes; use max.plot = 14 to plot all 38> 39> # plot 10 smallest counties in grey: 40> nc %>% 41+ select(BIR74, geometry) %>% 42+ plot() 43> 44> nc %>% 45+ select(AREA, geometry) %>% 46+ arrange(AREA) %>% 47+ slice(1:10) %>% 48+ plot(add = TRUE, col = 'grey', main ="") 49> 50> # select: check both when geometry is part of the selection, and when not: 51> nc %>% select(SID74, SID79) %>% names() 52[1] "SID74" "SID79" "geometry" 53> nc %>% select(SID74, SID79, geometry) %>% names() 54[1] "SID74" "SID79" "geometry" 55> nc %>% select(SID74, SID79) %>% class() 56[1] "sf" "tbl_df" "tbl" "data.frame" 57> nc %>% select(SID74, SID79, geometry) %>% class() 58[1] "sf" "tbl_df" "tbl" "data.frame" 59> 60> # group_by: 61> nc$area_cl = cut(nc$AREA, c(0, .1, .12, .15, .25)) 62> nc %>% group_by(area_cl) %>% class() 63[1] "sf" "grouped_df" "tbl_df" "tbl" "data.frame" 64> 65> # mutate: 66> nc2 <- nc %>% mutate(area10 = AREA/10) 67> 68> # transmute: 69> nc %>% transmute(AREA = AREA/10, geometry = geometry) %>% class() 70[1] "sf" "tbl_df" "tbl" "data.frame" 71> nc %>% transmute(AREA = AREA/10) %>% class() 72[1] "sf" "tbl_df" "tbl" "data.frame" 73> 74> # rename: 75> nc2 <- nc %>% rename(area = AREA) 76> 77> # distinct: 78> nc[c(1:100,1:10),] %>% distinct() %>% nrow() 79[1] 100 80> 81> # summarize: 82> nc$area_cl = cut(nc$AREA, c(0, .1, .12, .15, .25)) 83> nc.g <- nc %>% group_by(area_cl) 84> nc.g %>% summarise(mean(AREA)) 85Simple feature collection with 4 features and 2 fields 86Geometry type: MULTIPOLYGON 87Dimension: XY 88Bounding box: xmin: -9386880 ymin: 4012991 xmax: -8399788 ymax: 4382079 89Projected CRS: WGS 84 / Pseudo-Mercator 90# A tibble: 4 × 3 91 area_cl `mean(AREA)` geometry 92 <fct> <dbl> <MULTIPOLYGON [m]> 931 (0,0.1] 0.0760 (((-9349959 4195534, -9351260 4199567, -9354263 4203… 942 (0.1,0.12] 0.112 (((-9355589 4200996, -9354263 4203717, -9351260 4199… 953 (0.12,0.15] 0.134 (((-9302071 4171499, -9311146 4181234, -9311292 4183… 964 (0.15,0.25] 0.190 (((-9173717 4227806, -9181049 4224455, -9186013 4224… 97> nc.g %>% summarize(mean(AREA)) %>% plot(col = 3:6/7) 98> 99> library(tidyr) 100> 101> # time-wide to long table, using tidyr::gather 102> # stack the two SID columns for the July 1, 1974 - June 30, 1978 and July 1, 1979 - June 30, 1984 periods 103> # (see https://cran.r-project.org/web/packages/spdep/vignettes/sids.pdf) 104> nc %>% select(SID74, SID79, geometry) %>% gather("VAR", "SID", -geometry) %>% summary() 105 geometry VAR SID 106 MULTIPOLYGON :200 Length:200 Min. : 0.000 107 epsg:3857 : 0 Class :character 1st Qu.: 2.000 108 +proj=merc...: 0 Mode :character Median : 5.000 109 Mean : 7.515 110 3rd Qu.: 9.000 111 Max. :57.000 112> 113> # spread: 114> nc$row = 1:100 115> nc.g <- nc %>% select(SID74, SID79, row) %>% gather("VAR", "SID", -row, -geometry) 116> nc.g %>% tail() 117Simple feature collection with 6 features and 3 fields 118Geometry type: MULTIPOLYGON 119Dimension: XY 120Bounding box: xmin: -8802506 ymin: 4012991 xmax: -8492268 ymax: 4166167 121Projected CRS: WGS 84 / Pseudo-Mercator 122# A tibble: 6 × 4 123 row geometry VAR SID 124 <int> <MULTIPOLYGON [m]> <chr> <dbl> 1251 95 (((-8588146 4131923, -8589850 4133303, -8589356 4135198, -8… SID79 4 1262 96 (((-8711999 4081959, -8719511 4077863, -8731642 4078864, -8… SID79 5 1273 97 (((-8685774 4073056, -8697387 4077823, -8700120 4077570, -8… SID79 3 1284 98 (((-8755885 4021935, -8802506 4069795, -8798771 4071779, -8… SID79 17 1295 99 (((-8678517 4054264, -8679088 4061405, -8680136 4061550, -8… SID79 9 1306 100 (((-8755885 4021935, -8753548 4025868, -8753052 4030195, -8… SID79 6 131> nc.g %>% spread(VAR, SID) %>% head() 132Simple feature collection with 6 features and 3 fields 133Geometry type: MULTIPOLYGON 134Dimension: XY 135Bounding box: xmin: -9099356 ymin: 4310668 xmax: -8434988 ymax: 4382079 136Projected CRS: WGS 84 / Pseudo-Mercator 137# A tibble: 6 × 4 138 row geometry SID74 SID79 139 <int> <MULTIPOLYGON [m]> <dbl> <dbl> 1401 1 (((-9069486 4332934, -9077066 4338201, -9079419 4338351, -9… 1 0 1412 2 (((-9043562 4351030, -9043652 4352973, -9046117 4356516, -9… 0 3 1423 3 (((-8956335 4334068, -8958566 4335747, -8965300 4336025, -8… 5 6 1434 4 (((-8461241 4344709, -8462173 4347214, -8463902 4346972, -8… 1 2 1445 5 (((-8595797 4333852, -8597683 4330212, -8604808 4329788, -8… 9 3 1456 6 (((-8543185 4332878, -8569416 4332369, -8570981 4333107, -8… 7 5 146> nc %>% select(SID74, SID79, geometry, row) %>% gather("VAR", "SID", -geometry, -row) %>% spread(VAR, SID) %>% head() 147Simple feature collection with 6 features and 3 fields 148Geometry type: MULTIPOLYGON 149Dimension: XY 150Bounding box: xmin: -9099356 ymin: 4310668 xmax: -8434988 ymax: 4382079 151Projected CRS: WGS 84 / Pseudo-Mercator 152# A tibble: 6 × 4 153 geometry row SID74 SID79 154 <MULTIPOLYGON [m]> <int> <dbl> <dbl> 1551 (((-9069486 4332934, -9077066 4338201, -9079419 4338351, -9… 1 1 0 1562 (((-9043562 4351030, -9043652 4352973, -9046117 4356516, -9… 2 0 3 1573 (((-8956335 4334068, -8958566 4335747, -8965300 4336025, -8… 3 5 6 1584 (((-8461241 4344709, -8462173 4347214, -8463902 4346972, -8… 4 1 2 1595 (((-8595797 4333852, -8597683 4330212, -8604808 4329788, -8… 5 9 3 1606 (((-8543185 4332878, -8569416 4332369, -8570981 4333107, -8… 6 7 5 161> 162> # test st_set_crs in pipe: 163> sfc = st_sfc(st_point(c(0,0)), st_point(c(1,1))) 164> x <- sfc %>% st_set_crs(4326) %>% st_transform(3857) 165> x 166Geometry set for 2 features 167Geometry type: POINT 168Dimension: XY 169Bounding box: xmin: 0 ymin: 0 xmax: 111319.5 ymax: 111325.1 170Projected CRS: WGS 84 / Pseudo-Mercator 171POINT (0 0) 172POINT (111319.5 111325.1) 173> 174> read_sf(system.file("shape/nc.shp", package="sf"), quiet = TRUE) %>% 175+ st_transform(3857) -> nc 176> nc.merc <- st_transform(nc, 32119) # NC State Plane 177> suppressPackageStartupMessages(library(units)) 178> install_unit("person") 179> person = as_units("person") 180> nc.merc <- nc.merc %>% mutate(area = st_area(nc.merc), dens = BIR74 * person / area) 181> 182> # summary(nc.merc$dens) # requires units 0.4-2 183> nc.merc$area_cl <- cut(nc$AREA, c(0, .1, .12, .15, .25)) 184> nc.grp <- nc.merc %>% group_by(area_cl) 185> 186> out <- nc.grp %>% summarise(A = sum(area), pop = sum(dens * area), 187+ new_dens = sum(dens * area)/sum(area)) 188> 189> # mean densities depend on grouping: 190> nc.merc %>% summarize(mean(dens)) 191Simple feature collection with 1 feature and 1 field 192Geometry type: MULTIPOLYGON 193Dimension: XY 194Bounding box: xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9 195Projected CRS: NAD83 / North Carolina 196# A tibble: 1 × 2 197 `mean(dens)` geometry 198 [person/m^2] <MULTIPOLYGON [m]> 1991 0.00000259 (((446521.1 133561, 441668.6 140459.3, 436812.5 146953.9, 433885… 200> out %>% summarise(mean(new_dens)) 201Simple feature collection with 1 feature and 1 field 202Geometry type: MULTIPOLYGON 203Dimension: XY 204Bounding box: xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9 205Projected CRS: NAD83 / North Carolina 206# A tibble: 1 × 2 207 `mean(new_dens)` geometry 208 [person/m^2] <MULTIPOLYGON [m]> 2091 0.00000259 (((154399.2 148898.9, 142569.6 149407.8, 123829 150481.4, 12… 210> 211> # total densities don't: 212> nc.merc %>% summarise(sum(area * dens)) 213Simple feature collection with 1 feature and 1 field 214Geometry type: MULTIPOLYGON 215Dimension: XY 216Bounding box: xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9 217Projected CRS: NAD83 / North Carolina 218# A tibble: 1 × 2 219 `sum(area * dens)` geometry 220 [person] <MULTIPOLYGON [m]> 2211 329962 (((446521.1 133561, 441668.6 140459.3, 436812.5 146953.9, … 222> out %>% summarise(sum(A * new_dens)) 223Simple feature collection with 1 feature and 1 field 224Geometry type: MULTIPOLYGON 225Dimension: XY 226Bounding box: xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9 227Projected CRS: NAD83 / North Carolina 228# A tibble: 1 × 2 229 `sum(A * new_dens)` geometry 230 [person] <MULTIPOLYGON [m]> 2311 329962 (((154399.2 148898.9, 142569.6 149407.8, 123829 150481.4,… 232> 233> conn = system.file("gpkg/nc.gpkg", package = "sf") 234> 235> library(DBI) 236> library(RSQLite) 237> con = dbConnect(SQLite(), dbname = system.file("gpkg/nc.gpkg", package = "sf")) 238> dbReadTable(con, "nc.gpkg") %>% filter(AREA > 0.2) %>% collect %>% st_sf 239Simple feature collection with 11 features and 15 fields 240Geometry type: MULTIPOLYGON 241Dimension: XY 242Bounding box: xmin: -80.06441 ymin: 33.88199 xmax: -76.49254 ymax: 36.06665 243CRS: NA 244First 10 features: 245 fid AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 2461 37 0.219 2.130 1938 1938 Wake 37183 37183 92 14484 16 2472 47 0.201 1.805 1968 1968 Randolph 37151 37151 76 4456 7 2483 54 0.207 1.851 1989 1989 Johnston 37101 37101 51 3999 6 2494 57 0.203 3.197 2004 2004 Beaufort 37013 37013 7 2692 7 2505 79 0.241 2.214 2083 2083 Sampson 37163 37163 82 3025 4 2516 88 0.204 1.871 2100 2100 Duplin 37061 37061 31 2483 4 2527 94 0.240 2.004 2150 2150 Robeson 37155 37155 78 7889 31 2538 96 0.225 2.107 2162 2162 Bladen 37017 37017 9 1782 8 2549 97 0.214 2.152 2185 2185 Pender 37141 37141 71 1228 4 25510 98 0.240 2.365 2232 2232 Columbus 37047 37047 24 3350 15 256 NWBIR74 BIR79 SID79 NWBIR79 geom 2571 4397 20857 31 6221 MULTIPOLYGON (((-78.92107 3... 2582 384 5711 12 483 MULTIPOLYGON (((-79.76499 3... 2593 1165 4780 13 1349 MULTIPOLYGON (((-78.53874 3... 2604 1131 2909 4 1163 MULTIPOLYGON (((-77.10377 3... 2615 1396 3447 4 1524 MULTIPOLYGON (((-78.11377 3... 2626 1061 2777 7 1227 MULTIPOLYGON (((-77.68983 3... 2637 5904 9087 26 6899 MULTIPOLYGON (((-78.86451 3... 2648 818 2052 5 1023 MULTIPOLYGON (((-78.2615 34... 2659 580 1602 3 763 MULTIPOLYGON (((-78.02592 3... 26610 1431 4144 17 1832 MULTIPOLYGON (((-78.65572 3... 267> 268> # nest: 269> storms.sf = st_as_sf(storms, coords = c("long", "lat"), crs = 4326) 270> x <- storms.sf %>% group_by(name, year) %>% nest 271> 272> nrow(distinct(nc[c(1,1,1,2,2,3:100),])) 273[1] 100 274> 275> # set.seed(1331) 276> nc$gp <- sample(10, 100, replace=TRUE) 277> # Get centroid of each group of polygons; https://github.com/r-spatial/sf/issues/969 278> nc_gp_cent <- nc %>% 279+ group_by(gp) %>% 280+ group_map(st_area) 281> 282> nc %>% st_filter(nc[1,]) %>% nrow 283[1] 4 284> 285> proc.time() 286 user system elapsed 287 2.660 0.056 2.743 288