1" Tests for regexp in utf8 encoding 2 3func s:equivalence_test() 4 let str = "AÀÁÂÃÄÅĀĂĄǍǞǠǺȂȦȺḀẠẢẤẦẨẪẬẮẰẲẴẶ BƁɃḂḄḆ CÇĆĈĊČƇȻḈꞒ DĎĐƊḊḌḎḐḒ EÈÉÊËĒĔĖĘĚȄȆȨɆḔḖḘḚḜẸẺẼẾỀỂỄỆ FƑḞꞘ GĜĞĠĢƓǤǦǴḠꞠ HĤĦȞḢḤḦḨḪⱧ IÌÍÎÏĨĪĬĮİƗǏȈȊḬḮỈỊ JĴɈ KĶƘǨḰḲḴⱩꝀ LĹĻĽĿŁȽḶḸḺḼⱠ MḾṀṂ NÑŃŅŇǸṄṆṈṊꞤ OÒÓÔÕÖØŌŎŐƟƠǑǪǬǾȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢ PƤṔṖⱣ QɊ RŔŖŘȐȒɌṘṚṜṞⱤꞦ SŚŜŞŠȘṠṢṤṦṨⱾꞨ TŢŤŦƬƮȚȾṪṬṮṰ UÙÚÛÜŨŪŬŮŰƯǕǙǛǓǗȔȖɄṲṴṶṸṺỤỦỨỪỬỮỰ VƲṼṾ WŴẀẂẄẆẈ XẊẌ YÝŶŸƳȲɎẎỲỴỶỸ ZŹŻŽƵẐẒẔⱫ aàáâãäåāăąǎǟǡǻȃȧᶏḁẚạảấầẩẫậắằẳẵặⱥ bƀɓᵬᶀḃḅḇ cçćĉċčƈȼḉꞓꞔ dďđɗᵭᶁᶑḋḍḏḑḓ eèéêëēĕėęěȅȇȩɇᶒḕḗḙḛḝẹẻẽếềểễệ fƒᵮᶂḟꞙ gĝğġģǥǧǵɠᶃḡꞡ hĥħȟḣḥḧḩḫẖⱨꞕ iìíîïĩīĭįǐȉȋɨᶖḭḯỉị jĵǰɉ kķƙǩᶄḱḳḵⱪꝁ lĺļľŀłƚḷḹḻḽⱡ mᵯḿṁṃ nñńņňʼnǹᵰᶇṅṇṉṋꞥ oòóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɵṍṏṑṓọỏốồổỗộớờởỡợ pƥᵱᵽᶈṕṗ qɋʠ rŕŗřȑȓɍɽᵲᵳᶉṛṝṟꞧ sśŝşšșȿᵴᶊṡṣṥṧṩꞩ tţťŧƫƭțʈᵵṫṭṯṱẗⱦ uùúûüũūŭůűųǚǖưǔǘǜȕȗʉᵾᶙṳṵṷṹṻụủứừửữự vʋᶌṽṿ wŵẁẃẅẇẉẘ xẋẍ yýÿŷƴȳɏẏẙỳỵỷỹ zźżžƶᵶᶎẑẓẕⱬ" 5 let groups = split(str) 6 for group1 in groups 7 for c in split(group1, '\zs') 8 " next statement confirms that equivalence class matches every 9 " character in group 10 call assert_match('^[[=' .. c .. '=]]*$', group1) 11 for group2 in groups 12 if group2 != group1 13 " next statement converts that equivalence class doesn't match 14 " character in any other group 15 call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c) 16 endif 17 endfor 18 endfor 19 endfor 20endfunc 21 22func Test_equivalence_re1() 23 set re=1 24 call s:equivalence_test() 25 set re=0 26endfunc 27 28func Test_equivalence_re2() 29 set re=2 30 call s:equivalence_test() 31 set re=0 32endfunc 33 34func s:classes_test() 35 if has('win32') 36 set iskeyword=@,48-57,_,192-255 37 endif 38 set isprint=@,161-255 39 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+')) 40 41 let alnumchars = '' 42 let alphachars = '' 43 let backspacechar = '' 44 let blankchars = '' 45 let cntrlchars = '' 46 let digitchars = '' 47 let escapechar = '' 48 let graphchars = '' 49 let lowerchars = '' 50 let printchars = '' 51 let punctchars = '' 52 let returnchar = '' 53 let spacechars = '' 54 let tabchar = '' 55 let upperchars = '' 56 let xdigitchars = '' 57 let identchars = '' 58 let identchars1 = '' 59 let kwordchars = '' 60 let kwordchars1 = '' 61 let fnamechars = '' 62 let fnamechars1 = '' 63 let i = 1 64 while i <= 255 65 let c = nr2char(i) 66 if c =~ '[[:alpha:]]' 67 let alphachars .= c 68 endif 69 if c =~ '[[:alnum:]]' 70 let alnumchars .= c 71 endif 72 if c =~ '[[:backspace:]]' 73 let backspacechar .= c 74 endif 75 if c =~ '[[:blank:]]' 76 let blankchars .= c 77 endif 78 if c =~ '[[:cntrl:]]' 79 let cntrlchars .= c 80 endif 81 if c =~ '[[:digit:]]' 82 let digitchars .= c 83 endif 84 if c =~ '[[:escape:]]' 85 let escapechar .= c 86 endif 87 if c =~ '[[:graph:]]' 88 let graphchars .= c 89 endif 90 if c =~ '[[:lower:]]' 91 let lowerchars .= c 92 endif 93 if c =~ '[[:print:]]' 94 let printchars .= c 95 endif 96 if c =~ '[[:punct:]]' 97 let punctchars .= c 98 endif 99 if c =~ '[[:return:]]' 100 let returnchar .= c 101 endif 102 if c =~ '[[:space:]]' 103 let spacechars .= c 104 endif 105 if c =~ '[[:tab:]]' 106 let tabchar .= c 107 endif 108 if c =~ '[[:upper:]]' 109 let upperchars .= c 110 endif 111 if c =~ '[[:xdigit:]]' 112 let xdigitchars .= c 113 endif 114 if c =~ '[[:ident:]]' 115 let identchars .= c 116 endif 117 if c =~ '\i' 118 let identchars1 .= c 119 endif 120 if c =~ '[[:keyword:]]' 121 let kwordchars .= c 122 endif 123 if c =~ '\k' 124 let kwordchars1 .= c 125 endif 126 if c =~ '[[:fname:]]' 127 let fnamechars .= c 128 endif 129 if c =~ '\f' 130 let fnamechars1 .= c 131 endif 132 let i += 1 133 endwhile 134 135 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars) 136 call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars) 137 call assert_equal("\b", backspacechar) 138 call assert_equal("\t ", blankchars) 139 call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars) 140 call assert_equal("0123456789", digitchars) 141 call assert_equal("\<Esc>", escapechar) 142 call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars) 143 call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars) 144 call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars) 145 call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars) 146 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars) 147 call assert_equal("\r", returnchar) 148 call assert_equal("\t\n\x0b\f\r ", spacechars) 149 call assert_equal("\t", tabchar) 150 call assert_equal('0123456789ABCDEFabcdef', xdigitchars) 151 152 if has('win32') 153 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ' 154 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 155 elseif has('ebcdic') 156 let identchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 157 let kwordchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 158 else 159 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 160 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 161 endif 162 163 if has('win32') 164 let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 165 elseif has('amiga') 166 let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 167 elseif has('vms') 168 let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 169 elseif has('ebcdic') 170 let fnamechars_ok = '#$%+,-./=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 171 else 172 let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 173 endif 174 175 call assert_equal(identchars_ok, identchars) 176 call assert_equal(kwordchars_ok, kwordchars) 177 call assert_equal(fnamechars_ok, fnamechars) 178 179 call assert_equal(identchars1, identchars) 180 call assert_equal(kwordchars1, kwordchars) 181 call assert_equal(fnamechars1, fnamechars) 182endfunc 183 184func Test_classes_re1() 185 set re=1 186 call s:classes_test() 187 set re=0 188endfunc 189 190func Test_classes_re2() 191 set re=2 192 call s:classes_test() 193 set re=0 194endfunc 195 196func Test_reversed_range() 197 for re in range(0, 2) 198 exe 'set re=' . re 199 call assert_fails('call match("abc def", "[c-a]")', 'E944:', re) 200 endfor 201 set re=0 202endfunc 203 204func Test_large_class() 205 set re=1 206 call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:') 207 set re=2 208 call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]') 209 call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]') 210 set re=0 211endfunc 212 213func Test_optmatch_toolong() 214 set re=1 215 " Can only handle about 8000 characters. 216 let pat = '\\%[' .. repeat('x', 9000) .. ']' 217 call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:') 218 set re=0 219endfunc 220 221" Test for regexp patterns with multi-byte support, using utf-8. 222func Test_multibyte_chars() 223 " tl is a List of Lists with: 224 " 2: test auto/old/new 0: test auto/old 1: test auto/new 225 " regexp pattern 226 " text to test the pattern on 227 " expected match (optional) 228 " expected submatch 1 (optional) 229 " expected submatch 2 (optional) 230 " etc. 231 " When there is no match use only the first two items. 232 let tl = [] 233 234 " Multi-byte character tests. These will fail unless vim is compiled 235 " with Multibyte (FEAT_MBYTE) or BIG/HUGE features. 236 call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna']) 237 call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes 238 call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos']) 239 call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม']) 240 call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna']) 241 242 " this is not a normal "i" but 0xec 243 call add(tl, [2, '\p\+', 'ìa', 'ìa']) 244 call add(tl, [2, '\p*', 'aあ', 'aあ']) 245 246 " Test recognition of some character classes 247 call add(tl, [2, '\i\+', '&*¨xx ', 'xx']) 248 call add(tl, [2, '\f\+', '&*fname ', 'fname']) 249 250 " Test composing character matching 251 call add(tl, [2, '.ม', 'xม่x yมy', 'yม']) 252 call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่']) 253 call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"]) 254 call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"]) 255 call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 256 call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 257 call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 258 call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 259 call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) 260 call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) 261 call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"]) 262 call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"]) 263 call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"]) 264 call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"]) 265 call add(tl, [2, "a", "ca\u0300t"]) 266 call add(tl, [2, "ca", "ca\u0300t"]) 267 call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"]) 268 call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"]) 269 call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"]) 270 call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"]) 271 272 " Test \Z 273 call add(tl, [2, 'ú\Z', 'x']) 274 call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה']) 275 call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה']) 276 call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה']) 277 call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה']) 278 call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ']) 279 call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) 280 call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"]) 281 call add(tl, [2, "ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) 282 call add(tl, [2, "ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"]) 283 call add(tl, [2, "\u05b9\\Z", "xyz"]) 284 call add(tl, [2, "\\Z\u05b9", "xyz"]) 285 call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"]) 286 call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"]) 287 call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"]) 288 call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"]) 289 290 " Combining different tests and features 291 call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd']) 292 293 " Run the tests 294 for t in tl 295 let re = t[0] 296 let pat = t[1] 297 let text = t[2] 298 let matchidx = 3 299 for engine in [0, 1, 2] 300 if engine == 2 && re == 0 || engine == 1 && re == 1 301 continue 302 endif 303 let ®expengine = engine 304 try 305 let l = matchlist(text, pat) 306 catch 307 call assert_report('Error ' . engine . ': pat: \"' . pat . 308 \ '\", text: \"' . text . 309 \ '\", caused an exception: \"' . v:exception . '\"') 310 endtry 311 " check the match itself 312 if len(l) == 0 && len(t) > matchidx 313 call assert_report('Error ' . engine . ': pat: \"' . pat . 314 \ '\", text: \"' . text . 315 \ '\", did not match, expected: \"' . t[matchidx] . '\"') 316 elseif len(l) > 0 && len(t) == matchidx 317 call assert_report('Error ' . engine . ': pat: \"' . pat . 318 \ '\", text: \"' . text . '\", match: \"' . l[0] . 319 \ '\", expected no match') 320 elseif len(t) > matchidx && l[0] != t[matchidx] 321 call assert_report('Error ' . engine . ': pat: \"' . pat . 322 \ '\", text: \"' . text . '\", match: \"' . l[0] . 323 \ '\", expected: \"' . t[matchidx] . '\"') 324 else 325 " Test passed 326 endif 327 if len(l) > 0 328 " check all the nine submatches 329 for i in range(1, 9) 330 if len(t) <= matchidx + i 331 let e = '' 332 else 333 let e = t[matchidx + i] 334 endif 335 if l[i] != e 336 call assert_report('Error ' . engine . ': pat: \"' . pat . 337 \ '\", text: \"' . text . '\", submatch ' . i . 338 \ ': \"' . l[i] . '\", expected: \"' . e . '\"') 339 endif 340 endfor 341 unlet i 342 endif 343 endfor 344 endfor 345 set regexpengine& 346endfunc 347 348" check that 'ambiwidth' does not change the meaning of \p 349func Test_ambiwidth() 350 set regexpengine=1 ambiwidth=single 351 call assert_equal(0, match("\u00EC", '\p')) 352 set regexpengine=1 ambiwidth=double 353 call assert_equal(0, match("\u00EC", '\p')) 354 set regexpengine=2 ambiwidth=single 355 call assert_equal(0, match("\u00EC", '\p')) 356 set regexpengine=2 ambiwidth=double 357 call assert_equal(0, match("\u00EC", '\p')) 358 set regexpengine& ambiwidth& 359endfunc 360 361func Run_regexp_ignore_case() 362 call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g')) 363 364 call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g')) 365 call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g')) 366 call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g')) 367 call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g')) 368 call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g')) 369 call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g')) 370endfunc 371 372func Test_regexp_ignore_case() 373 set regexpengine=1 374 call Run_regexp_ignore_case() 375 set regexpengine=2 376 call Run_regexp_ignore_case() 377 set regexpengine& 378endfunc 379 380" Tests for regexp with multi-byte encoding and various magic settings 381func Run_regexp_multibyte_magic() 382 let text =<< trim END 383 1 a aa abb abbccc 384 2 d dd dee deefff 385 3 g gg ghh ghhiii 386 4 j jj jkk jkklll 387 5 m mm mnn mnnooo 388 6 x ^aa$ x 389 7 (a)(b) abbaa 390 8 axx [ab]xx 391 9 หม่x อมx 392 a อมx หม่x 393 b ちカヨは 394 c x ¬€x 395 d 天使x 396 e ������y 397 f ������z 398 g a啷bb 399 j 0123❤x 400 k combinations 401 l äö üᾱ̆́ 402 END 403 404 new 405 call setline(1, text) 406 exe 'normal /a*b\{2}c\+/e' .. "\<CR>x" 407 call assert_equal('1 a aa abb abbcc', getline('.')) 408 exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x" 409 call assert_equal('2 d dd dee deeff', getline('.')) 410 set nomagic 411 exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x" 412 call assert_equal('3 g gg ghh ghhii', getline('.')) 413 exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x" 414 call assert_equal('4 j jj jkk jkkll', getline('.')) 415 exe 'normal /\vm*n{2}o+/e' .. "\<CR>x" 416 call assert_equal('5 m mm mnn mnnoo', getline('.')) 417 exe 'normal /\V^aa$/' .. "\<CR>x" 418 call assert_equal('6 x aa$ x', getline('.')) 419 set magic 420 exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x" 421 call assert_equal('7 (a)(b) abba', getline('.')) 422 exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x" 423 call assert_equal('8 axx ab]xx', getline('.')) 424 425 " search for multi-byte without composing char 426 exe 'normal /ม' .. "\<CR>x" 427 call assert_equal('9 หม่x อx', getline('.')) 428 429 " search for multi-byte with composing char 430 exe 'normal /ม่' .. "\<CR>x" 431 call assert_equal('a อมx หx', getline('.')) 432 433 " find word by change of word class 434 exe 'normal /ち\<カヨ\>は' .. "\<CR>x" 435 call assert_equal('b カヨは', getline('.')) 436 437 " Test \%u, [\u] and friends 438 " c 439 exe 'normal /\%u20ac' .. "\<CR>x" 440 call assert_equal('c x ¬x', getline('.')) 441 " d 442 exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x" 443 call assert_equal('d 使x', getline('.')) 444 " e 445 exe 'normal /\%U12345678' .. "\<CR>x" 446 call assert_equal('e y', getline('.')) 447 " f 448 exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x" 449 call assert_equal('f z', getline('.')) 450 " g 451 exe 'normal /\%d21879b' .. "\<CR>x" 452 call assert_equal('g abb', getline('.')) 453 454 " j Test backwards search from a multi-byte char 455 exe "normal /x\<CR>x?.\<CR>x" 456 call assert_equal('j 012❤', getline('.')) 457 " k 458 let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g' 459 @w 460 call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18)) 461 462 close! 463endfunc 464 465func Test_regexp_multibyte_magic() 466 set regexpengine=1 467 call Run_regexp_multibyte_magic() 468 set regexpengine=2 469 call Run_regexp_multibyte_magic() 470 set regexpengine& 471endfunc 472 473" Test for 7.3.192 474" command ":s/ \?/ /g" splits multi-byte characters into bytes 475func Test_split_multibyte_to_bytes() 476 new 477 call setline(1, 'l äö üᾱ̆́') 478 s/ \?/ /g 479 call assert_equal(' l ä ö ü ᾱ̆́', getline(1)) 480 close! 481endfunc 482 483" Test for matchstr() with multibyte characters 484func Test_matchstr_multibyte() 485 new 486 call assert_equal('ב', matchstr("אבגד", ".", 0, 2)) 487 call assert_equal('בג', matchstr("אבגד", "..", 0, 2)) 488 call assert_equal('א', matchstr("אבגד", ".", 0, 0)) 489 call assert_equal('ג', matchstr("אבגד", ".", 4, -1)) 490 close! 491endfunc 492 493" Test for 7.4.636 494" A search with end offset gets stuck at end of file. 495func Test_search_with_end_offset() 496 new 497 call setline(1, ['', 'dog(a', 'cat(']) 498 exe "normal /(/e+\<CR>" 499 normal n"ayn 500 call assert_equal("a\ncat(", @a) 501 close! 502endfunc 503 504" Check that "^" matches even when the line starts with a combining char 505func Test_match_start_of_line_combining() 506 new 507 call setline(1, ['', "\u05ae", '']) 508 exe "normal gg/^\<CR>" 509 call assert_equal(2, getcurpos()[1]) 510 bwipe! 511endfunc 512 513" Check that [[:upper:]] matches for automatic engine 514func Test_match_char_class_upper() 515 new 516 let _engine=®expengine 517 518 " Test 1: [[:upper:]]\{2,\} 519 set regexpengine=0 520 call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...']) 521 call cursor(1,1) 522 let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>" 523 exe search_cmd 524 call assert_equal(4, searchcount().total, 'TEST 1') 525 set regexpengine=1 526 exe search_cmd 527 call assert_equal(2, searchcount().total, 'TEST 1') 528 set regexpengine=2 529 exe search_cmd 530 call assert_equal(4, searchcount().total, 'TEST 1') 531 532 " Test 2: [[:upper:]].\+ 533 let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>" 534 set regexpengine=0 535 exe search_cmd 536 call assert_equal(2, searchcount().total, 'TEST 2') 537 set regexpengine=1 538 exe search_cmd 539 call assert_equal(1, searchcount().total, 'TEST 2') 540 set regexpengine=2 541 exe search_cmd 542 call assert_equal(2, searchcount().total, 'TEST 2') 543 544 " Test 3: [[:lower:]]\+ 545 let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>" 546 set regexpengine=0 547 exe search_cmd 548 call assert_equal(4, searchcount().total, 'TEST 3 lower') 549 set regexpengine=1 550 exe search_cmd 551 call assert_equal(2, searchcount().total, 'TEST 3 lower') 552 set regexpengine=2 553 exe search_cmd 554 call assert_equal(4, searchcount().total, 'TEST 3 lower') 555 556 " clean up 557 let ®expengine=_engine 558 bwipe! 559endfunc 560 561func Test_match_invalid_byte() 562 call writefile(0z630a.765d30aa0a.2e0a.790a.4030, 'Xinvalid') 563 new 564 source Xinvalid 565 bwipe! 566 call delete('Xinvalid') 567endfunc 568 569" vim: shiftwidth=2 sts=2 expandtab 570