1# various test of subsetting ("indexing") a pdata.frame and a pseries (the latter currently commented),
2# e.g., that subsetting by rownames preserves the index
3#  (pre rev. 187/189 all entries were set to NA)
4#  (pre rev. 251 subsetting a pdata.frame added extra information due to coercing rules of "[.data.frame")
5#  (pre rev. 668 subsetting a pdata.frame with [.pdata.frame such that a single column (pseries) is returned was lacking names)
6
7library(plm)
8data("Grunfeld", package = "plm")
9
10pGrunfeld <- pdata.frame(Grunfeld)
11
12# subsetting with [] with rownames - works
13attr(pGrunfeld[c("1-1935"), ], which = "index")
14attr(pGrunfeld[c("1-1935", "1-1936"), ], which = "index")
15
16if (anyNA(attr(pGrunfeld[c("1-1935"), ], which = "index"))) stop("FAIL: NA in index")
17if (anyNA(attr(pGrunfeld[c("1-1935", "1-1936"), ], which = "index"))) stop("FAIL: NA in index")
18
19
20# subsetting with [] by line number works (indexes preserved)
21if (!all(attr(pGrunfeld[c(1), ], which = "index") == c(1, 1935))) stop("wrong index!")
22if (!all(attr(pGrunfeld[c(1,2), ], which = "index") == data.frame(firm = c(1,1), year = c(1935, 1936)))) stop("wrong index!")
23
24if (anyNA(attr(pGrunfeld[c(1), ], which = "index"))) stop("FAIL: NA in index")
25if (anyNA(attr(pGrunfeld[c(1,2), ], which = "index"))) stop("FAIL: NA in index")
26
27# subsetting with [[]] works (indexes preserved)
28attr(pGrunfeld[["inv"]], which = "index")
29attr(pGrunfeld[[3]], which = "index")
30
31if (anyNA(attr(pGrunfeld[["inv"]], which = "index"))) stop("FAIL: NA in index")
32if (anyNA(attr(pGrunfeld[[3]], which = "index"))) stop("FAIL: NA in index")
33
34
35# check that extracting a single column (which becomes a pseries) yield the same
36# result for the three extraction methods $.pdata.freme, [[.pdata.frame, and [.pdata.frame
37extr1 <- pGrunfeld$inv
38extr2 <- pGrunfeld[["inv"]]
39extr3 <- pGrunfeld[ , "inv"]
40if (!isTRUE(all.equal(extr1, extr2))) stop("extraction of single column (pseries) does not yield same results for $.pdata.frame and [[.pdata.frame")
41if (!isTRUE(all.equal(extr1, extr3))) stop("extraction of single column (pseries) does not yield same results for $.pdata.frame and [.pdata.frame")
42
43# check that row names are kept and subsetted by [.pdata.frame when a single column (pseries) is returned
44if (!isTRUE(all.equal(names(pGrunfeld[1:5 , "inv"]), row.names(pGrunfeld)[1:5]))) stop("row names not correctly subsetted by [.pdata.frame")
45
46
47############ subsetting used to change the pdata.frame
48########## since rev.252 this is fully fixed (rev. 251 already fixed large parts of this),
49########## pre rev 251 a lot of unnecessary information was added to the pdata.frame by subsetting
50
51# this should yield a structurally identical pdata.frame as all rows are extracted:
52Grunfeld2 <- Grunfeld[1:nrow(Grunfeld), ]
53pGrunfeld2 <- pGrunfeld[1:nrow(pGrunfeld), ]
54
55identical(Grunfeld, Grunfeld2)    # TRUE for data.frame
56identical(pGrunfeld, pGrunfeld2)  # TRUE for pdata.frame (was FALSE pre rev. 252)
57if (!identical(pGrunfeld, pGrunfeld2))
58  stop("pdata.frame not identical after \"subsetting\" with all rows (which should actually not do any subsetting))")
59
60### compare object sizes
61# object.size(pGrunfeld)  # 37392 bytes
62# object.size(pGrunfeld2) # 37392 bytes since rev. 252 # (was: 83072 bytes in pre rev.251, considerably larger!)
63                                                       # (was: 26200 bytes in rev. 251)
64# if (!object.size(pGrunfeld) == object.size(pGrunfeld2))
65#   print("pdata.frame not same object size after \"subsetting\" with all rows (which should actually not do any subsetting))")
66
67# this is likely to be unnecessarily pedantic, because by default attrib.as.set is TRUE
68# and from ?attributes "Attributes are not stored internally as a list and should be
69# thought of as a set and not a vector."
70identical(Grunfeld, Grunfeld2,   attrib.as.set = FALSE)  # TRUE for data.frame
71identical(pGrunfeld, pGrunfeld2, attrib.as.set = FALSE)  # TRUE for pdata.frame [but was false prior to rev. 1271]
72
73# display differences (if any) [with rev. 252 there should be no differences left]
74all.equal(pGrunfeld, pGrunfeld2)
75all.equal(pGrunfeld, pGrunfeld2, check.attributes = FALSE)
76# compare::compare(pGrunfeld, pGrunfeld2, allowAll = TRUE)
77
78
79# Unused levels from the index attribute of a pdata.frame shall be dropped
80# (NB: unused levels are not dropped from the variables of the pdata.frame as this is standard R behaviour)
81pGrunfeld_sub_id <- pGrunfeld[-c(1:20), ] # drop first individual (1st ind. is in first 20 rows)
82if (!isTRUE(all.equal(levels(attr(pGrunfeld_sub_id, "index")[[1]]), levels(factor(2:10)))))
83  stop("unused levels from index (individual) not dropped")
84
85pGrunfeld_sub_year <- pGrunfeld[!pGrunfeld$year %in% "1936", ] # drop year 1936
86if (!isTRUE(all.equal(levels(attr(pGrunfeld_sub_year, "index")[[2]]), levels(factor(c(1935, 1937:1954))))))
87  stop("unused levels from index (time) not dropped")
88
89
90
91
92
93
94
95
96
97#### test estimation by plm on a subsetted pdata.frame (failed pre rev. 251)
98pGrunfeld_sub <- pGrunfeld[c(23:99), ]
99plm(inv ~ value + capital, data = pGrunfeld[c(23:99), ]) # failed pre rev.251
100
101  # classes of index of pdata.frame and subsetted pdata.frame are the same 'pindex' and 'data.frame')
102  class(attr(pGrunfeld, which="index"))
103  class(attr(pGrunfeld$inv, which="index"))
104  if (!all(class(attr(pGrunfeld, which="index")) == class(attr(pGrunfeld$inv, which="index")))) stop("classes differ!")
105
106  # classes of index of columns of pdata.frame and subsetted pdata.frame must be the same 'pindex' and 'data.frame')
107  class(attr(pGrunfeld$inv, which="index"))
108  class(attr(pGrunfeld_sub$inv, which="index"))
109  if (!all(class(attr(pGrunfeld$inv, which="index")) == class(attr(pGrunfeld_sub$inv, which="index")))) stop("classes differ!")
110
111
112############ further testing subsetting of pdata.frame and its index
113# up to rev.254 subsetting by [i] (with missing j) did not mimic data.frame behavior in case of missing j (j as in [i, j])
114# fixed in rev.255
115data("Grunfeld", package = "plm")
116X <- Grunfeld
117pX <- pdata.frame(X)
118
119###### test dimensions of subsetted pdata.frame
120if (!isTRUE(all.equal(dim(X[]), dim(pX[])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
121if (!isTRUE(all.equal(dim(X[ , ]), dim(pX[ ,])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
122if (!isTRUE(all.equal(dim(X[ , , ]), dim(pX[ , , ])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
123if (!isTRUE(all.equal(dim(X[ , , drop = TRUE]),  dim(pX[ , , drop = TRUE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
124if (!isTRUE(all.equal(dim(X[ , , drop = FALSE]), dim(pX[ , , drop = FALSE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
125
126
127if (!isTRUE(all.equal(dim(X[1:10, 2:4]),               dim(pX[1:10, 2:4])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
128if (!isTRUE(all.equal(dim(X[1:10, 2:4, drop = TRUE]),  dim(pX[1:10, 2:4, drop = TRUE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
129if (!isTRUE(all.equal(dim(X[1:10, 2:4, drop = FALSE]), dim(pX[1:10, 2:4, drop = FALSE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
130
131if (!isTRUE(all.equal(dim(X[1:10, , ]),              dim(pX[1:10, , ])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
132if (!isTRUE(all.equal(dim(X[1:10, , drop = TRUE]),   dim(pX[1:10, , drop = TRUE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
133if (!isTRUE(all.equal(dim(X[1:10, , drop = FALSE]),  dim(pX[1:10, , drop = FALSE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
134
135
136if (!isTRUE(all.equal(dim(X[1:10, ]),    dim(pX[1:10, ])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
137if (!isTRUE(all.equal(dim(X[1, ]),        dim(pX[1, ])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
138
139if (!isTRUE(all.equal(dim(X[1]),                 dim(pX[1])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
140if (!isTRUE(all.equal(dim(X[1, drop = TRUE]),    dim(pX[1, drop = TRUE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
141if (!isTRUE(all.equal(dim(X[1, drop = FALSE]),   dim(pX[1, drop = FALSE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
142
143if (!isTRUE(all.equal(dim(X[1:2]),               dim(pX[1:2])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
144if (!isTRUE(all.equal(dim(X[1:2, drop = TRUE]),  dim(pX[1:2, drop = TRUE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
145if (!isTRUE(all.equal(dim(X[1:2, drop = FALSE]), dim(pX[1:2, drop = FALSE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
146
147if (!isTRUE(all.equal(dim(X[ , 2:4]), dim(pX[ , 2:4])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
148if (!isTRUE(all.equal(dim(X[ , 2:4, drop = TRUE]),  dim(pX[ , 2:4, drop = TRUE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
149if (!isTRUE(all.equal(dim(X[ , 2:4 ,drop = FALSE]), dim(pX[ , 2:4, drop = FALSE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
150
151if (!isTRUE(all.equal(dim(X[ , 3]),               dim(pX[ , 3])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
152if (!isTRUE(all.equal(dim(X[ , 3, drop = TRUE]),  dim(pX[ , 3, drop = TRUE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
153if (!isTRUE(all.equal(dim(X[ , 3, drop = FALSE]), dim(pX[ , 3, drop = FALSE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
154
155if (!isTRUE(all.equal(dim(X[1, , ]),             dim(pX[1, , ])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
156if (!isTRUE(all.equal(dim(X[1, , drop = TRUE]),  dim(pX[1, , drop = TRUE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
157if (!isTRUE(all.equal(dim(X[1, , drop = FALSE]), dim(pX[1, , drop = FALSE])))) stop("dimensions of data.frame and pdata.frame not equal after subsetting")
158
159
160###### test dimensions of index of subsetted pdata.frame
161if (!all(c(dim(pX[1:10, 2:4])[1], 2L) == dim(attr(pX[1:10, 2:4], "index")))) stop("index has wrong dimension after subsetting")
162if (!all(c(dim(pX[1:10,    ])[1], 2L) == dim(attr(pX[1:10,    ], "index")))) stop("index has wrong dimension after subsetting")
163if (!all(c(dim(pX[    , 2:4])[1], 2L) == dim(attr(pX[    , 2:4], "index")))) stop("index has wrong dimension after subsetting")
164
165# NB: this is class c("pseries", "numeric), need length here
166if (!all(c(length(pX[ , 3]), 2L) == dim(attr(pX[ , 3], "index")))) stop("index has wrong dimension after subsetting")
167
168# NB: this is class c("pseries", "numeric), need length here
169if (!all(c(length(pX[ , 3, drop = TRUE]), 2L) == dim(attr(pX[ , 3, drop = TRUE], "index")))) stop("index has wrong dimension after subsetting")
170
171# need dim again here, because drop = FALSE
172if (!all(c(dim(pX[ , 3, drop = FALSE])[1], 2L) == dim(attr(pX[ , 3, drop = FALSE], "index")))) stop("index has wrong dimension after subsetting")
173
174# NB: this is a list! has no index anymore
175length(pX[1, , drop = TRUE])
176# NB: this a a pdata.frame (drop = FALSE)
177if (!all(c(dim(pX[1, , drop = FALSE])[1], 2L) == dim(attr(pX[1, , drop = FALSE], "index")))) stop("index has wrong dimension after subsetting")
178
179
180# case of [i]-indexing with missing j: index must be have full rows
181# dim of pdata.frame: 25, 3
182if (!all(c(dim(pX[2:4])[1], 2L)               == dim(attr(pX[2:4],               "index")))) stop("index has wrong dimension after subsetting")
183if (!all(c(dim(pX[2:4, drop = TRUE])[1],  2L) == dim(attr(pX[2:4, drop = TRUE],  "index")))) stop("index has wrong dimension after subsetting")
184if (!all(c(dim(pX[2:4, drop = FALSE])[1], 2L) == dim(attr(pX[2:4, drop = FALSE], "index")))) stop("index has wrong dimension after subsetting")
185
186if (!all(c(dim(pX[1])[1], 2L)               == dim(attr(pX[1],               "index")))) stop("index has wrong dimension after subsetting")
187if (!all(c(dim(pX[1, drop = TRUE])[1],  2L) == dim(attr(pX[1, drop = TRUE],  "index")))) stop("index has wrong dimension after subsetting")
188if (!all(c(dim(pX[1, drop = FALSE])[1], 2L) == dim(attr(pX[1, drop = FALSE], "index")))) stop("index has wrong dimension after subsetting")
189
190
191####### test return values (named) numeric(0) etc and especially NULL
192
193## compare pdata.frame() to data.frame() in case of subsetting with non-existent return values
194# firm 31 is non-existent
195# valueNonExistent is non-existent
196
197pGrunfeld[pGrunfeld$firm == "31"]
198
199Grunfeld[Grunfeld$firm == "31"]
200
201
202pGrunfeld[pGrunfeld$firm == "31", "value"]
203
204Grunfeld[Grunfeld$firm == "31", "value"]
205
206#### since R 3.4.0 the following two cases gave a warning which was pacified in rev. 626
207# Warning in structure(mydata, index = index, class = base::union("pseries",  :
208#                                                                   Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
209#                                                                 Consider 'structure(list(), *)' instead.
210
211pGrunfeld[pGrunfeld$firm == "31", "valueNonExistent"]
212
213Grunfeld[Grunfeld$firm == "31", "valueNonExistent"]
214
215
216# with existent firm 19
217pGrunfeld[pGrunfeld$firm == "19", "valueNonExistent"]
218
219Grunfeld[Grunfeld$firm == "19", "valueNonExistent"]
220
221
222
223