1
2R version 4.1.1 (2021-08-10) -- "Kick Things"
3Copyright (C) 2021 The R Foundation for Statistical Computing
4Platform: x86_64-pc-linux-gnu (64-bit)
5
6R is free software and comes with ABSOLUTELY NO WARRANTY.
7You are welcome to redistribute it under certain conditions.
8Type 'license()' or 'licence()' for distribution details.
9
10R is a collaborative project with many contributors.
11Type 'contributors()' for more information and
12'citation()' on how to cite R or R packages in publications.
13
14Type 'demo()' for some demos, 'help()' for on-line help, or
15'help.start()' for an HTML browser interface to help.
16Type 'q()' to quit R.
17
18> suppressPackageStartupMessages(library(sf))
19>
20> library(dplyr)
21
22Attaching package: 'dplyr'
23
24The following objects are masked from 'package:stats':
25
26    filter, lag
27
28The following objects are masked from 'package:base':
29
30    intersect, setdiff, setequal, union
31
32> options(dplyr.summarise.inform=FALSE)
33> read_sf(system.file("shape/nc.shp", package="sf"), quiet = TRUE) %>%
34+ 	st_transform(3857) -> nc
35> nc %>% filter(AREA > .1) %>% plot()
36Warning message:
37plotting the first 10 out of 14 attributes; use max.plot = 14 to plot all
38>
39> # plot 10 smallest counties in grey:
40> nc %>%
41+   select(BIR74, geometry) %>%
42+   plot()
43>
44> nc %>%
45+   select(AREA, geometry) %>%
46+   arrange(AREA) %>%
47+   slice(1:10) %>%
48+   plot(add = TRUE, col = 'grey', main ="")
49>
50> # select: check both when geometry is part of the selection, and when not:
51> nc %>% select(SID74, SID79) %>% names()
52[1] "SID74"    "SID79"    "geometry"
53> nc %>% select(SID74, SID79, geometry) %>% names()
54[1] "SID74"    "SID79"    "geometry"
55> nc %>% select(SID74, SID79) %>% class()
56[1] "sf"         "tbl_df"     "tbl"        "data.frame"
57> nc %>% select(SID74, SID79, geometry) %>% class()
58[1] "sf"         "tbl_df"     "tbl"        "data.frame"
59>
60> # group_by:
61> nc$area_cl = cut(nc$AREA, c(0, .1, .12, .15, .25))
62> nc %>% group_by(area_cl) %>% class()
63[1] "sf"         "grouped_df" "tbl_df"     "tbl"        "data.frame"
64>
65> # mutate:
66> nc2 <- nc %>% mutate(area10 = AREA/10)
67>
68> # transmute:
69> nc %>% transmute(AREA = AREA/10, geometry = geometry) %>% class()
70[1] "sf"         "tbl_df"     "tbl"        "data.frame"
71> nc %>% transmute(AREA = AREA/10) %>% class()
72[1] "sf"         "tbl_df"     "tbl"        "data.frame"
73>
74> # rename:
75> nc2 <- nc %>% rename(area = AREA)
76>
77> # distinct:
78> nc[c(1:100,1:10),] %>% distinct() %>% nrow()
79[1] 100
80>
81> # summarize:
82> nc$area_cl = cut(nc$AREA, c(0, .1, .12, .15, .25))
83> nc.g <- nc %>% group_by(area_cl)
84> nc.g %>% summarise(mean(AREA))
85Simple feature collection with 4 features and 2 fields
86Geometry type: MULTIPOLYGON
87Dimension:     XY
88Bounding box:  xmin: -9386880 ymin: 4012991 xmax: -8399788 ymax: 4382079
89Projected CRS: WGS 84 / Pseudo-Mercator
90# A tibble: 4 × 3
91  area_cl     `mean(AREA)`                                              geometry
92  <fct>              <dbl>                                    <MULTIPOLYGON [m]>
931 (0,0.1]           0.0760 (((-9349959 4195534, -9351260 4199567, -9354263 4203…
942 (0.1,0.12]        0.112  (((-9355589 4200996, -9354263 4203717, -9351260 4199…
953 (0.12,0.15]       0.134  (((-9302071 4171499, -9311146 4181234, -9311292 4183…
964 (0.15,0.25]       0.190  (((-9173717 4227806, -9181049 4224455, -9186013 4224…
97> nc.g %>% summarize(mean(AREA)) %>% plot(col = 3:6/7)
98>
99> library(tidyr)
100>
101> # time-wide to long table, using tidyr::gather
102> # stack the two SID columns for the July 1, 1974 - June 30, 1978 and July 1, 1979 - June 30, 1984 periods
103> # (see https://cran.r-project.org/web/packages/spdep/vignettes/sids.pdf)
104> nc %>% select(SID74, SID79, geometry) %>% gather("VAR", "SID", -geometry) %>% summary()
105          geometry       VAR                 SID
106 MULTIPOLYGON :200   Length:200         Min.   : 0.000
107 epsg:3857    :  0   Class :character   1st Qu.: 2.000
108 +proj=merc...:  0   Mode  :character   Median : 5.000
109                                        Mean   : 7.515
110                                        3rd Qu.: 9.000
111                                        Max.   :57.000
112>
113> # spread:
114> nc$row = 1:100
115> nc.g <- nc %>% select(SID74, SID79, row) %>% gather("VAR", "SID", -row, -geometry)
116> nc.g %>% tail()
117Simple feature collection with 6 features and 3 fields
118Geometry type: MULTIPOLYGON
119Dimension:     XY
120Bounding box:  xmin: -8802506 ymin: 4012991 xmax: -8492268 ymax: 4166167
121Projected CRS: WGS 84 / Pseudo-Mercator
122# A tibble: 6 × 4
123    row                                                     geometry VAR     SID
124  <int>                                           <MULTIPOLYGON [m]> <chr> <dbl>
1251    95 (((-8588146 4131923, -8589850 4133303, -8589356 4135198, -8… SID79     4
1262    96 (((-8711999 4081959, -8719511 4077863, -8731642 4078864, -8… SID79     5
1273    97 (((-8685774 4073056, -8697387 4077823, -8700120 4077570, -8… SID79     3
1284    98 (((-8755885 4021935, -8802506 4069795, -8798771 4071779, -8… SID79    17
1295    99 (((-8678517 4054264, -8679088 4061405, -8680136 4061550, -8… SID79     9
1306   100 (((-8755885 4021935, -8753548 4025868, -8753052 4030195, -8… SID79     6
131> nc.g %>% spread(VAR, SID) %>% head()
132Simple feature collection with 6 features and 3 fields
133Geometry type: MULTIPOLYGON
134Dimension:     XY
135Bounding box:  xmin: -9099356 ymin: 4310668 xmax: -8434988 ymax: 4382079
136Projected CRS: WGS 84 / Pseudo-Mercator
137# A tibble: 6 × 4
138    row                                                     geometry SID74 SID79
139  <int>                                           <MULTIPOLYGON [m]> <dbl> <dbl>
1401     1 (((-9069486 4332934, -9077066 4338201, -9079419 4338351, -9…     1     0
1412     2 (((-9043562 4351030, -9043652 4352973, -9046117 4356516, -9…     0     3
1423     3 (((-8956335 4334068, -8958566 4335747, -8965300 4336025, -8…     5     6
1434     4 (((-8461241 4344709, -8462173 4347214, -8463902 4346972, -8…     1     2
1445     5 (((-8595797 4333852, -8597683 4330212, -8604808 4329788, -8…     9     3
1456     6 (((-8543185 4332878, -8569416 4332369, -8570981 4333107, -8…     7     5
146> nc %>% select(SID74, SID79, geometry, row) %>% gather("VAR", "SID", -geometry, -row) %>% spread(VAR, SID) %>% head()
147Simple feature collection with 6 features and 3 fields
148Geometry type: MULTIPOLYGON
149Dimension:     XY
150Bounding box:  xmin: -9099356 ymin: 4310668 xmax: -8434988 ymax: 4382079
151Projected CRS: WGS 84 / Pseudo-Mercator
152# A tibble: 6 × 4
153                                                      geometry   row SID74 SID79
154                                            <MULTIPOLYGON [m]> <int> <dbl> <dbl>
1551 (((-9069486 4332934, -9077066 4338201, -9079419 4338351, -9…     1     1     0
1562 (((-9043562 4351030, -9043652 4352973, -9046117 4356516, -9…     2     0     3
1573 (((-8956335 4334068, -8958566 4335747, -8965300 4336025, -8…     3     5     6
1584 (((-8461241 4344709, -8462173 4347214, -8463902 4346972, -8…     4     1     2
1595 (((-8595797 4333852, -8597683 4330212, -8604808 4329788, -8…     5     9     3
1606 (((-8543185 4332878, -8569416 4332369, -8570981 4333107, -8…     6     7     5
161>
162> # test st_set_crs in pipe:
163> sfc = st_sfc(st_point(c(0,0)), st_point(c(1,1)))
164> x <- sfc %>% st_set_crs(4326) %>% st_transform(3857)
165> x
166Geometry set for 2 features
167Geometry type: POINT
168Dimension:     XY
169Bounding box:  xmin: 0 ymin: 0 xmax: 111319.5 ymax: 111325.1
170Projected CRS: WGS 84 / Pseudo-Mercator
171POINT (0 0)
172POINT (111319.5 111325.1)
173>
174> read_sf(system.file("shape/nc.shp", package="sf"), quiet = TRUE) %>%
175+ 	st_transform(3857) -> nc
176> nc.merc <- st_transform(nc, 32119) # NC State Plane
177> suppressPackageStartupMessages(library(units))
178> install_unit("person")
179> person = as_units("person")
180> nc.merc <- nc.merc %>% mutate(area = st_area(nc.merc), dens = BIR74 * person / area)
181>
182> # summary(nc.merc$dens) # requires units 0.4-2
183> nc.merc$area_cl <- cut(nc$AREA, c(0, .1, .12, .15, .25))
184> nc.grp <- nc.merc %>% group_by(area_cl)
185>
186> out <- nc.grp %>% summarise(A = sum(area), pop = sum(dens * area),
187+ 	new_dens = sum(dens * area)/sum(area))
188>
189> # mean densities depend on grouping:
190> nc.merc %>% summarize(mean(dens))
191Simple feature collection with 1 feature and 1 field
192Geometry type: MULTIPOLYGON
193Dimension:     XY
194Bounding box:  xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
195Projected CRS: NAD83 / North Carolina
196# A tibble: 1 × 2
197  `mean(dens)`                                                          geometry
198  [person/m^2]                                                <MULTIPOLYGON [m]>
1991   0.00000259 (((446521.1 133561, 441668.6 140459.3, 436812.5 146953.9, 433885…
200> out %>% summarise(mean(new_dens))
201Simple feature collection with 1 feature and 1 field
202Geometry type: MULTIPOLYGON
203Dimension:     XY
204Bounding box:  xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
205Projected CRS: NAD83 / North Carolina
206# A tibble: 1 × 2
207  `mean(new_dens)`                                                      geometry
208      [person/m^2]                                            <MULTIPOLYGON [m]>
2091       0.00000259 (((154399.2 148898.9, 142569.6 149407.8, 123829 150481.4, 12…
210>
211> # total densities don't:
212> nc.merc %>% summarise(sum(area * dens))
213Simple feature collection with 1 feature and 1 field
214Geometry type: MULTIPOLYGON
215Dimension:     XY
216Bounding box:  xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
217Projected CRS: NAD83 / North Carolina
218# A tibble: 1 × 2
219  `sum(area * dens)`                                                    geometry
220            [person]                                          <MULTIPOLYGON [m]>
2211             329962 (((446521.1 133561, 441668.6 140459.3, 436812.5 146953.9, …
222> out %>% summarise(sum(A * new_dens))
223Simple feature collection with 1 feature and 1 field
224Geometry type: MULTIPOLYGON
225Dimension:     XY
226Bounding box:  xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
227Projected CRS: NAD83 / North Carolina
228# A tibble: 1 × 2
229  `sum(A * new_dens)`                                                   geometry
230             [person]                                         <MULTIPOLYGON [m]>
2311              329962 (((154399.2 148898.9, 142569.6 149407.8, 123829 150481.4,…
232>
233> conn = system.file("gpkg/nc.gpkg", package = "sf")
234>
235> library(DBI)
236> library(RSQLite)
237> con = dbConnect(SQLite(), dbname = system.file("gpkg/nc.gpkg", package = "sf"))
238> dbReadTable(con, "nc.gpkg") %>% filter(AREA > 0.2) %>% collect %>% st_sf
239Simple feature collection with 11 features and 15 fields
240Geometry type: MULTIPOLYGON
241Dimension:     XY
242Bounding box:  xmin: -80.06441 ymin: 33.88199 xmax: -76.49254 ymax: 36.06665
243CRS:           NA
244First 10 features:
245   fid  AREA PERIMETER CNTY_ CNTY_ID     NAME  FIPS FIPSNO CRESS_ID BIR74 SID74
2461   37 0.219     2.130  1938    1938     Wake 37183  37183       92 14484    16
2472   47 0.201     1.805  1968    1968 Randolph 37151  37151       76  4456     7
2483   54 0.207     1.851  1989    1989 Johnston 37101  37101       51  3999     6
2494   57 0.203     3.197  2004    2004 Beaufort 37013  37013        7  2692     7
2505   79 0.241     2.214  2083    2083  Sampson 37163  37163       82  3025     4
2516   88 0.204     1.871  2100    2100   Duplin 37061  37061       31  2483     4
2527   94 0.240     2.004  2150    2150  Robeson 37155  37155       78  7889    31
2538   96 0.225     2.107  2162    2162   Bladen 37017  37017        9  1782     8
2549   97 0.214     2.152  2185    2185   Pender 37141  37141       71  1228     4
25510  98 0.240     2.365  2232    2232 Columbus 37047  37047       24  3350    15
256   NWBIR74 BIR79 SID79 NWBIR79                           geom
2571     4397 20857    31    6221 MULTIPOLYGON (((-78.92107 3...
2582      384  5711    12     483 MULTIPOLYGON (((-79.76499 3...
2593     1165  4780    13    1349 MULTIPOLYGON (((-78.53874 3...
2604     1131  2909     4    1163 MULTIPOLYGON (((-77.10377 3...
2615     1396  3447     4    1524 MULTIPOLYGON (((-78.11377 3...
2626     1061  2777     7    1227 MULTIPOLYGON (((-77.68983 3...
2637     5904  9087    26    6899 MULTIPOLYGON (((-78.86451 3...
2648      818  2052     5    1023 MULTIPOLYGON (((-78.2615 34...
2659      580  1602     3     763 MULTIPOLYGON (((-78.02592 3...
26610    1431  4144    17    1832 MULTIPOLYGON (((-78.65572 3...
267>
268> # nest:
269> storms.sf = st_as_sf(storms, coords = c("long", "lat"), crs = 4326)
270> x <- storms.sf %>% group_by(name, year) %>% nest
271>
272> nrow(distinct(nc[c(1,1,1,2,2,3:100),]))
273[1] 100
274>
275> # set.seed(1331)
276> nc$gp <- sample(10, 100, replace=TRUE)
277> # Get centroid of each group of polygons; https://github.com/r-spatial/sf/issues/969
278> nc_gp_cent <- nc %>%
279+                 group_by(gp) %>%
280+                 group_map(st_area)
281>
282> nc %>% st_filter(nc[1,]) %>% nrow
283[1] 4
284>
285> proc.time()
286   user  system elapsed
287  2.660   0.056   2.743
288