1test_that("corner cases", { 2 3 res <- re_exec_val(.text <- c("foo", "bar"), "") 4 expect_equal( 5 as.data.frame(res), 6 asdf(.text = .text, .match = reclist(mrec("", 1, 0), mrec("", 1, 0))) 7 ) 8 9 res <- re_exec_val(.text <- c("foo", "", "bar"), "") 10 expect_equal( 11 as.data.frame(res), 12 asdf( 13 .text = .text, 14 .match = reclist(mrec("", 1, 0), mrec("", 1, 0), mrec("", 1, 0)) 15 ) 16 ) 17 18 res <- re_exec_val(.text <- character(), "") 19 expect_equal(as.data.frame(res), asdf(.text = .text, .match = reclist())) 20 21 res <- re_exec_val(.text <- character(), "foo") 22 expect_equal(as.data.frame(res), asdf(.text = .text, .match = reclist())) 23 24 res <- re_exec_val(.text <- character(), "foo (g1) (g2)") 25 expect_equal( 26 as.data.frame(res), 27 asdf(reclist(), reclist(), .text = .text, .match = reclist()) 28 ) 29 30 res <- re_exec_val(.text <- character(), "foo (g1) (?<name>g2)") 31 expect_equal( 32 as.data.frame(res), 33 asdf(reclist(), name = reclist(), .text = .text, .match = reclist()) 34 ) 35 36 res <- re_exec_val(.text <- "not", "foo") 37 expect_equal( 38 as.data.frame(res), 39 asdf(.text = .text, .match = reclist(mrec(NA, NA, NA))) 40 ) 41}) 42 43test_that("not so corner cases", { 44 45 dates <- c("2016-04-20", "1977-08-08", "not a date", "2016", 46 "76-03-02", "2012-06-30", "2015-01-21 19:58") 47 isodate <- "([0-9]{4})-([0-1][0-9])-([0-3][0-9])" 48 expected <- asdf( 49 reclist( 50 mrec("2016", 1, 4), mrec("1977", 1, 4), narec(), narec(), narec(), 51 mrec("2012", 1, 4), mrec("2015", 1, 4) 52 ), 53 reclist( 54 mrec("04", 6, 7), mrec("08", 6, 7), narec(), narec(), narec(), 55 mrec("06", 6, 7), mrec("01", 6, 7) 56 ), 57 reclist( 58 mrec("20", 9, 10), mrec("08", 9, 10), narec(), narec(), narec(), 59 mrec("30", 9, 10), mrec("21", 9, 10) 60 ), 61 .text = dates, 62 .match = reclist( 63 mrec("2016-04-20", 1, 10), mrec("1977-08-08", 1, 10), narec(), 64 narec(), narec(), mrec("2012-06-30", 1, 10), 65 mrec("2015-01-21", 1, 10) 66 ) 67 ) 68 expect_equal( 69 as.data.frame(re_exec_val(text = dates, pattern = isodate)), 70 expected 71 ) 72 73 isodaten <- "(?<year>[0-9]{4})-(?<month>[0-1][0-9])-(?<day>[0-3][0-9])" 74 expected <- asdf( 75 year = reclist( 76 mrec("2016", 1, 4), mrec("1977", 1, 4), narec(), narec(), narec(), 77 mrec("2012", 1, 4), mrec("2015", 1, 4) 78 ), 79 month = reclist( 80 mrec("04", 6, 7), mrec("08", 6, 7), narec(), narec(), narec(), 81 mrec("06", 6, 7), mrec("01", 6, 7) 82 ), 83 day = reclist( 84 mrec("20", 9, 10), mrec("08", 9, 10), narec(), narec(), narec(), 85 mrec("30", 9, 10), mrec("21", 9, 10) 86 ), 87 .text = dates, 88 .match = reclist( 89 mrec("2016-04-20", 1, 10), mrec("1977-08-08", 1, 10), narec(), 90 narec(), narec(), mrec("2012-06-30", 1, 10), 91 mrec("2015-01-21", 1, 10) 92 ) 93 ) 94 95 expect_equal( 96 as.data.frame(re_exec_val(text = dates, pattern = isodaten)), 97 expected 98 ) 99}) 100 101 102test_that("UTF8", { 103 104 str <- "Gábor Csárdi" 105 pat <- "Gábor" 106 Encoding(str) <- Encoding(pat) <- "UTF-8" 107 res <- re_exec_val(str, pat) 108 expect_equal( 109 as.data.frame(res), 110 asdf(.text = str, .match = reclist(mrec(pat, 1, 5))) 111 ) 112 113}) 114 115 116test_that("text is scalar & capture groups", { 117 118 res <- re_exec_val(.text <- "foo bar", "(\\w+) (\\w+)") 119 expect_equal( 120 as.data.frame(res), 121 asdf( 122 reclist(mrec("foo", 1, 3)), 123 reclist(mrec("bar", 5, 7)), 124 .text = .text, 125 .match = reclist(mrec("foo bar", 1, 7)) 126 ) 127 ) 128 129 res <- re_exec_val(.text <- "foo bar", "(?<g1>\\w+) (?<g2>\\w+)") 130 expect_equal( 131 as.data.frame(res), 132 asdf( 133 g1 = reclist(mrec("foo", 1, 3)), 134 g2 = reclist(mrec("bar", 5, 7)), 135 .text = .text, 136 .match = reclist(mrec("foo bar", 1, 7)) 137 ) 138 ) 139 140}) 141 142test_that("perl argument", { 143 # using perl=TRUE used to cause an error; not important in this case, but must 144 # be supported if we want this to be a drop in replacement for other functions 145 # (e.g. re-implenting `strsplit` with a rematch2 backend) 146 147 res <- re_exec_val(.text <- "foo bar", "\\w+", perl = TRUE) 148 expect_equal( 149 as.data.frame(res), 150 asdf( 151 .text = .text, 152 .match = reclist(mrec("foo", 1, 3)) 153 ) 154 ) 155 # actually check that the capture group doesn't show up 156 157 res.tre <- re_exec_val(.text <- "foo bar", "\\w+ (\\w+)", perl = FALSE) 158 res.perl <- re_exec_val(.text <- "foo bar", "\\w+ (\\w+)", perl= TRUE) 159 expect_true(ncol(as.data.frame(res.perl)) == 3 && ncol(res.tre) == 2) 160}) 161