1# Functions for smoothing or dis-aggregating data over map regions.
2
3# m is a map object
4# z is a named vector
5# res is resolution of sampling grid
6# span is kernel parameter (larger = smoother)
7#   span = Inf is a special case which invokes cubic spline kernel.
8#   span is scaled by the map size, and is independent of res.
9# result is a frame
10smooth.map <- function(m, z, res = 50, span = 1/10, averages = FALSE,
11                       type = c("smooth", "interp"), merge = FALSE) {
12  #if(is.data.frame(z)) z = as.named.vector(z)
13  if(averages) {
14    # turn averages into sums
15    z = z * area.map(m, names(z), sqmi=FALSE)
16  }
17  # sampling grid
18  xlim <- range(m$x, na.rm = TRUE)
19  ylim <- range(m$y, na.rm = TRUE)
20  midpoints <- function(start, end, n) {
21    inc <- (end - start)/n
22    seq(start + inc/2, end - inc/2, length.out = n)
23  }
24  # 2*res is an assumption about aspect ratio (usually true)
25  if(length(res) == 1) res = c(2*res, res)
26  xs <- midpoints(xlim[1], xlim[2], res[1])
27  ys <- midpoints(ylim[1], ylim[2], res[2])
28  x <- expand.grid(x = xs, y = ys)
29  if(FALSE) {
30    # add centroids to the sample points
31    xc = apply.polygon(m[c("x", "y")], centroid.polygon)
32    # convert m into a matrix
33    xc <- t(array(unlist(xc), c(2, length(xc))))
34    xc = data.frame(x = xc[, 1], y = xc[, 2])
35    x = rbind(x, xc)
36  }
37  radius = sqrt(diff(xlim)^2 + diff(ylim)^2)/2
38  lambda = 1/(span*radius)^2
39  #cat("lambda = ", lambda, "\n")
40  cell.area = diff(xs[1:2])*diff(ys[1:2])
41
42  r <- factor(map.where(m, x))
43  if(merge) {
44    # merge regions
45    # merge[r] is the parent of region r
46    # regions with merge[r] = NA are considered absent from the map
47    # (no sample points will be placed there)
48    # this can be slow on complex maps
49    merge <- names(z)
50    merge <- merge[match.map(m, merge)]
51    names(merge) <- m$names
52    levels(r) <- merge[levels(r)]
53  }
54  # remove points not on the map
55  i <- !is.na(r)
56  x <- x[i, ]
57  r <- r[i]
58  xo = x
59  if(TRUE) {
60    # kludge - drop regions with no samples
61    n = table(r)
62    bad = (n == 0)
63    newlevels = levels(r)
64    newlevels[bad] = NA
65    levels(r) = newlevels
66  }
67  # put z in canonical order, and drop values which are not in the map
68  z = z[levels(r)]
69  # remove regions not named in z, or where z is NA
70  bad = is.na(z)
71  z = z[!bad]
72  newlevels = levels(r)
73  newlevels[bad] = NA
74  levels(r) = newlevels
75  i <- !is.na(r)
76  x <- x[i, ]
77  r <- r[i]
78  # do all regions have sample points?
79  n = table(r)
80  if(any(n == 0)) stop(paste(paste(names(n)[n == 0], collapse = ", "), "have no sample points"))
81  type <- match.arg(type)
82  if(FALSE) {
83    # code for these is in 315/util.r
84    # most time is spent here
85    # w <- switch(type,
86    #             mass = gp.smoother(x, x, r, lambda),
87    #             smooth = kr.smoother(x, x, r, lambda))
88    # z = drop(z %*% w)
89    # cbind(x, z = z)
90  } else {
91    if(type == "smooth") {
92      z = kernel.smooth(x, z, xo, lambda, r)
93    } else {
94      z = gp.smooth(x, z, xo, lambda, r)
95    }
96    z = z/cell.area
97    cbind(xo, z = z)
98  }
99}
100
101gp.smooth <- function(x, z, xo, lambda, r) {
102  # predict a function measured at locations x to new locations xo
103  krr = kernel.region.region(x, r, lambda)
104  white.z = solve(krr, z)
105  kernel.smooth(x, white.z, xo, lambda, r, normalize = FALSE)
106}
107
108kernel.smooth <- function(x, z, xo, lambda, region = NULL, normalize = TRUE) {
109  # predict a function measured at locations x to new locations xo
110  if(!is.matrix(x)) dim(x) <- c(length(x), 1)
111  if(!is.matrix(xo)) dim(xo) <- c(length(xo), 1)
112  n = nrow(x)
113  if(is.null(region)) region = 1:n
114  if(length(region) < n) stop("region must have same length as x")
115  region = as.integer(region)
116  if(any(is.na(region))) stop("region has NAs")
117  if(max(region) > length(z)) stop("not enough measurements for the number of regions")
118  no = nrow(xo)
119  if(normalize) {
120    # divide by region sizes
121    z = as.double(z/as.numeric(table(region)))
122  }
123  .C(C_kernel_smooth,
124     as.integer(n), as.integer(ncol(x)),
125     as.double(t(x)), z, as.integer(region),
126     as.integer(no), as.double(t(xo)), zo = double(no),
127     as.double(lambda), as.integer(normalize))$zo
128}
129
130kernel.region.region <- function(x, region, lambda) {
131  if(!is.matrix(x)) dim(x) <- c(length(x), 1)
132  region = as.integer(region)
133  nr = max(region)
134  krr = .C(C_kernel_region_region,
135    as.integer(nrow(x)), as.integer(ncol(x)),
136    as.double(t(x)),
137    region, as.double(lambda), as.integer(nr), krr = double(nr*nr))$krr
138  dim(krr) = c(nr, nr)
139  krr
140}
141kernel.region.x <- function(x, region, z, lambda) {
142  if(!is.matrix(x)) dim(x) <- c(length(x), 1)
143  if(!is.matrix(z)) dim(z) <- c(length(z), 1)
144  region = as.integer(region)
145  nr = max(region)
146  no = nrow(z)
147  krx = .C(C_kernel_region_x,
148    as.integer(nrow(x)), as.integer(ncol(x)),
149    as.double(t(x)), region, as.integer(no), as.double(t(z)),
150    as.double(lambda), as.integer(nr), krx = double(nr*no))$krx
151  dim(krx) = c(nr, no)
152  krx
153}
154