1test_that("corner cases", {
2
3  res <- re_exec_val(.text <- c("foo", "bar"), "")
4  expect_equal(
5    as.data.frame(res),
6    asdf(.text = .text, .match = reclist(mrec("", 1, 0), mrec("", 1, 0)))
7  )
8
9  res <- re_exec_val(.text <- c("foo", "", "bar"), "")
10  expect_equal(
11    as.data.frame(res),
12    asdf(
13      .text = .text,
14      .match = reclist(mrec("", 1, 0), mrec("", 1, 0), mrec("", 1, 0))
15    )
16  )
17
18  res <- re_exec_val(.text <- character(), "")
19  expect_equal(as.data.frame(res), asdf(.text = .text, .match = reclist()))
20
21  res <- re_exec_val(.text <- character(), "foo")
22  expect_equal(as.data.frame(res), asdf(.text = .text, .match = reclist()))
23
24  res <- re_exec_val(.text <- character(), "foo (g1) (g2)")
25  expect_equal(
26    as.data.frame(res),
27    asdf(reclist(), reclist(), .text = .text, .match = reclist())
28  )
29
30  res <- re_exec_val(.text <- character(), "foo (g1) (?<name>g2)")
31  expect_equal(
32    as.data.frame(res),
33    asdf(reclist(), name = reclist(), .text = .text, .match = reclist())
34  )
35
36  res <- re_exec_val(.text <- "not", "foo")
37  expect_equal(
38    as.data.frame(res),
39    asdf(.text = .text, .match = reclist(mrec(NA, NA, NA)))
40  )
41})
42
43test_that("not so corner cases", {
44
45  dates <- c("2016-04-20", "1977-08-08", "not a date", "2016",
46             "76-03-02", "2012-06-30", "2015-01-21 19:58")
47  isodate <- "([0-9]{4})-([0-1][0-9])-([0-3][0-9])"
48  expected <- asdf(
49    reclist(
50      mrec("2016", 1, 4), mrec("1977", 1, 4), narec(), narec(), narec(),
51      mrec("2012", 1, 4), mrec("2015", 1, 4)
52    ),
53    reclist(
54      mrec("04", 6, 7), mrec("08", 6, 7), narec(), narec(), narec(),
55      mrec("06", 6, 7), mrec("01", 6, 7)
56    ),
57    reclist(
58      mrec("20", 9, 10), mrec("08", 9, 10), narec(), narec(), narec(),
59      mrec("30", 9, 10), mrec("21", 9, 10)
60    ),
61    .text = dates,
62    .match = reclist(
63      mrec("2016-04-20", 1, 10), mrec("1977-08-08", 1, 10), narec(),
64      narec(), narec(), mrec("2012-06-30", 1, 10),
65      mrec("2015-01-21", 1, 10)
66    )
67  )
68  expect_equal(
69    as.data.frame(re_exec_val(text = dates, pattern = isodate)),
70    expected
71  )
72
73  isodaten <- "(?<year>[0-9]{4})-(?<month>[0-1][0-9])-(?<day>[0-3][0-9])"
74  expected <- asdf(
75    year = reclist(
76      mrec("2016", 1, 4), mrec("1977", 1, 4), narec(), narec(), narec(),
77      mrec("2012", 1, 4), mrec("2015", 1, 4)
78    ),
79    month = reclist(
80      mrec("04", 6, 7), mrec("08", 6, 7), narec(), narec(), narec(),
81      mrec("06", 6, 7), mrec("01", 6, 7)
82    ),
83    day = reclist(
84      mrec("20", 9, 10), mrec("08", 9, 10), narec(), narec(), narec(),
85      mrec("30", 9, 10), mrec("21", 9, 10)
86    ),
87    .text = dates,
88    .match = reclist(
89      mrec("2016-04-20", 1, 10), mrec("1977-08-08", 1, 10), narec(),
90      narec(), narec(), mrec("2012-06-30", 1, 10),
91      mrec("2015-01-21", 1, 10)
92    )
93  )
94
95  expect_equal(
96    as.data.frame(re_exec_val(text = dates, pattern = isodaten)),
97    expected
98  )
99})
100
101
102test_that("UTF8", {
103
104  str <- "Gábor Csárdi"
105  pat <- "Gábor"
106  Encoding(str) <- Encoding(pat) <- "UTF-8"
107  res <- re_exec_val(str, pat)
108  expect_equal(
109    as.data.frame(res),
110    asdf(.text = str, .match = reclist(mrec(pat, 1, 5)))
111  )
112
113})
114
115
116test_that("text is scalar & capture groups", {
117
118  res <- re_exec_val(.text <- "foo bar", "(\\w+) (\\w+)")
119  expect_equal(
120    as.data.frame(res),
121    asdf(
122      reclist(mrec("foo", 1, 3)),
123      reclist(mrec("bar", 5, 7)),
124      .text = .text,
125      .match = reclist(mrec("foo bar", 1, 7))
126    )
127  )
128
129  res <- re_exec_val(.text <- "foo bar", "(?<g1>\\w+) (?<g2>\\w+)")
130  expect_equal(
131    as.data.frame(res),
132    asdf(
133      g1 = reclist(mrec("foo", 1, 3)),
134      g2 = reclist(mrec("bar", 5, 7)),
135      .text = .text,
136      .match = reclist(mrec("foo bar", 1, 7))
137    )
138  )
139
140})
141
142test_that("perl argument", {
143  # using perl=TRUE used to cause an error; not important in this case, but must
144  # be supported if we want this to be a drop in replacement for other functions
145  # (e.g. re-implenting `strsplit` with a rematch2 backend)
146
147  res <- re_exec_val(.text <- "foo bar", "\\w+", perl = TRUE)
148  expect_equal(
149    as.data.frame(res),
150    asdf(
151      .text = .text,
152      .match = reclist(mrec("foo", 1, 3))
153    )
154  )
155  # actually check that the capture group doesn't show up
156
157  res.tre <- re_exec_val(.text <- "foo bar", "\\w+ (\\w+)", perl = FALSE)
158  res.perl <- re_exec_val(.text <- "foo bar", "\\w+ (\\w+)", perl= TRUE)
159  expect_true(ncol(as.data.frame(res.perl)) == 3 && ncol(res.tre) == 2)
160})
161