1" Tests for regexp in utf8 encoding
2
3func s:equivalence_test()
4  let str = "AÀÁÂÃÄÅĀĂĄǍǞǠǺȂȦȺḀẠẢẤẦẨẪẬẮẰẲẴẶ BƁɃḂḄḆ CÇĆĈĊČƇȻḈꞒ DĎĐƊḊḌḎḐḒ EÈÉÊËĒĔĖĘĚȄȆȨɆḔḖḘḚḜẸẺẼẾỀỂỄỆ FƑḞꞘ GĜĞĠĢƓǤǦǴḠꞠ HĤĦȞḢḤḦḨḪⱧ IÌÍÎÏĨĪĬĮİƗǏȈȊḬḮỈỊ JĴɈ KĶƘǨḰḲḴⱩꝀ LĹĻĽĿŁȽḶḸḺḼⱠ MḾṀṂ NÑŃŅŇǸṄṆṈṊꞤ OÒÓÔÕÖØŌŎŐƟƠǑǪǬǾȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢ PƤṔṖⱣ QɊ RŔŖŘȐȒɌṘṚṜṞⱤꞦ SŚŜŞŠȘṠṢṤṦṨⱾꞨ TŢŤŦƬƮȚȾṪṬṮṰ UÙÚÛÜŨŪŬŮŰƯǕǙǛǓǗȔȖɄṲṴṶṸṺỤỦỨỪỬỮỰ  VƲṼṾ WŴẀẂẄẆẈ XẊẌ YÝŶŸƳȲɎẎỲỴỶỸ ZŹŻŽƵẐẒẔⱫ aàáâãäåāăąǎǟǡǻȃȧᶏḁẚạảấầẩẫậắằẳẵặⱥ bƀɓᵬᶀḃḅḇ cçćĉċčƈȼḉꞓꞔ dďđɗᵭᶁᶑḋḍḏḑḓ eèéêëēĕėęěȅȇȩɇᶒḕḗḙḛḝẹẻẽếềểễệ fƒᵮᶂḟꞙ gĝğġģǥǧǵɠᶃḡꞡ hĥħȟḣḥḧḩḫẖⱨꞕ iìíîïĩīĭįǐȉȋɨᶖḭḯỉị jĵǰɉ kķƙǩᶄḱḳḵⱪꝁ lĺļľŀłƚḷḹḻḽⱡ mᵯḿṁṃ nñńņňʼnǹᵰᶇṅṇṉṋꞥ oòóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɵṍṏṑṓọỏốồổỗộớờởỡợ pƥᵱᵽᶈṕṗ qɋʠ rŕŗřȑȓɍɽᵲᵳᶉṛṝṟꞧ sśŝşšșȿᵴᶊṡṣṥṧṩꞩ tţťŧƫƭțʈᵵṫṭṯṱẗⱦ uùúûüũūŭůűųǚǖưǔǘǜȕȗʉᵾᶙṳṵṷṹṻụủứừửữự vʋᶌṽṿ wŵẁẃẅẇẉẘ xẋẍ yýÿŷƴȳɏẏẙỳỵỷỹ zźżžƶᵶᶎẑẓẕⱬ"
5  let groups = split(str)
6  for group1 in groups
7      for c in split(group1, '\zs')
8	" next statement confirms that equivalence class matches every
9	" character in group
10        call assert_match('^[[=' .. c .. '=]]*$', group1)
11        for group2 in groups
12          if group2 != group1
13	    " next statement converts that equivalence class doesn't match
14	    " character in any other group
15            call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c)
16          endif
17        endfor
18      endfor
19  endfor
20endfunc
21
22func Test_equivalence_re1()
23  set re=1
24  call s:equivalence_test()
25  set re=0
26endfunc
27
28func Test_equivalence_re2()
29  set re=2
30  call s:equivalence_test()
31  set re=0
32endfunc
33
34func s:classes_test()
35  if has('win32')
36    set iskeyword=@,48-57,_,192-255
37  endif
38  set isprint=@,161-255
39  call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
40
41  let alnumchars = ''
42  let alphachars = ''
43  let backspacechar = ''
44  let blankchars = ''
45  let cntrlchars = ''
46  let digitchars = ''
47  let escapechar = ''
48  let graphchars = ''
49  let lowerchars = ''
50  let printchars = ''
51  let punctchars = ''
52  let returnchar = ''
53  let spacechars = ''
54  let tabchar = ''
55  let upperchars = ''
56  let xdigitchars = ''
57  let identchars = ''
58  let identchars1 = ''
59  let kwordchars = ''
60  let kwordchars1 = ''
61  let fnamechars = ''
62  let fnamechars1 = ''
63  let i = 1
64  while i <= 255
65    let c = nr2char(i)
66    if c =~ '[[:alpha:]]'
67      let alphachars .= c
68    endif
69    if c =~ '[[:alnum:]]'
70      let alnumchars .= c
71    endif
72    if c =~ '[[:backspace:]]'
73      let backspacechar .= c
74    endif
75    if c =~ '[[:blank:]]'
76      let blankchars .= c
77    endif
78    if c =~ '[[:cntrl:]]'
79      let cntrlchars .= c
80    endif
81    if c =~ '[[:digit:]]'
82      let digitchars .= c
83    endif
84    if c =~ '[[:escape:]]'
85      let escapechar .= c
86    endif
87    if c =~ '[[:graph:]]'
88      let graphchars .= c
89    endif
90    if c =~ '[[:lower:]]'
91      let lowerchars .= c
92    endif
93    if c =~ '[[:print:]]'
94      let printchars .= c
95    endif
96    if c =~ '[[:punct:]]'
97      let punctchars .= c
98    endif
99    if c =~ '[[:return:]]'
100      let returnchar .= c
101    endif
102    if c =~ '[[:space:]]'
103      let spacechars .= c
104    endif
105    if c =~ '[[:tab:]]'
106      let tabchar .= c
107    endif
108    if c =~ '[[:upper:]]'
109      let upperchars .= c
110    endif
111    if c =~ '[[:xdigit:]]'
112      let xdigitchars .= c
113    endif
114    if c =~ '[[:ident:]]'
115      let identchars .= c
116    endif
117    if c =~ '\i'
118      let identchars1 .= c
119    endif
120    if c =~ '[[:keyword:]]'
121      let kwordchars .= c
122    endif
123    if c =~ '\k'
124      let kwordchars1 .= c
125    endif
126    if c =~ '[[:fname:]]'
127      let fnamechars .= c
128    endif
129    if c =~ '\f'
130      let fnamechars1 .= c
131    endif
132    let i += 1
133  endwhile
134
135  call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
136  call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
137  call assert_equal("\b", backspacechar)
138  call assert_equal("\t ", blankchars)
139  call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
140  call assert_equal("0123456789", digitchars)
141  call assert_equal("\<Esc>", escapechar)
142  call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
143  call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
144  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
145  call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
146  call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
147  call assert_equal("\r", returnchar)
148  call assert_equal("\t\n\x0b\f\r ", spacechars)
149  call assert_equal("\t", tabchar)
150  call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
151
152  if has('win32')
153    let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
154    let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
155  elseif has('ebcdic')
156    let identchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€ŒŽœž¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
157    let kwordchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€ŒŽœž¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
158  else
159    let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
160    let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
161  endif
162
163  if has('win32')
164    let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
165  elseif has('amiga')
166    let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
167  elseif has('vms')
168    let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
169  elseif has('ebcdic')
170    let fnamechars_ok = '#$%+,-./=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
171  else
172    let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
173  endif
174
175  call assert_equal(identchars_ok, identchars)
176  call assert_equal(kwordchars_ok, kwordchars)
177  call assert_equal(fnamechars_ok, fnamechars)
178
179  call assert_equal(identchars1, identchars)
180  call assert_equal(kwordchars1, kwordchars)
181  call assert_equal(fnamechars1, fnamechars)
182endfunc
183
184func Test_classes_re1()
185  set re=1
186  call s:classes_test()
187  set re=0
188endfunc
189
190func Test_classes_re2()
191  set re=2
192  call s:classes_test()
193  set re=0
194endfunc
195
196func Test_reversed_range()
197  for re in range(0, 2)
198    exe 'set re=' . re
199    call assert_fails('call match("abc def", "[c-a]")', 'E944:', re)
200  endfor
201  set re=0
202endfunc
203
204func Test_large_class()
205  set re=1
206  call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
207  set re=2
208  call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
209  call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
210  set re=0
211endfunc
212
213func Test_optmatch_toolong()
214  set re=1
215  " Can only handle about 8000 characters.
216  let pat = '\\%[' .. repeat('x', 9000) .. ']'
217  call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:')
218  set re=0
219endfunc
220
221" Test for regexp patterns with multi-byte support, using utf-8.
222func Test_multibyte_chars()
223  " tl is a List of Lists with:
224  "    2: test auto/old/new  0: test auto/old  1: test auto/new
225  "    regexp pattern
226  "    text to test the pattern on
227  "    expected match (optional)
228  "    expected submatch 1 (optional)
229  "    expected submatch 2 (optional)
230  "    etc.
231  "  When there is no match use only the first two items.
232  let tl = []
233
234  " Multi-byte character tests. These will fail unless vim is compiled
235  " with Multibyte (FEAT_MBYTE) or BIG/HUGE features.
236  call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
237  call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ'])								" equivalence classes
238  call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
239  call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม'])
240  call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])
241
242  " this is not a normal "i" but 0xec
243  call add(tl, [2, '\p\+', 'ìa', 'ìa'])
244  call add(tl, [2, '\p*', 'aあ', 'aあ'])
245
246  " Test recognition of some character classes
247  call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
248  call add(tl, [2, '\f\+', '&*Ÿfname ', 'fname'])
249
250  " Test composing character matching
251  call add(tl, [2, '.ม', 'xม่x yมy', 'yม'])
252  call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่'])
253  call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
254  call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
255  call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
256  call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
257  call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
258  call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
259  call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
260  call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
261  call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
262  call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
263  call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
264  call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
265  call add(tl, [2, "a", "ca\u0300t"])
266  call add(tl, [2, "ca", "ca\u0300t"])
267  call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
268  call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
269  call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
270  call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])
271
272  " Test \Z
273  call add(tl, [2, 'ú\Z', 'x'])
274  call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה'])
275  call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה'])
276  call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה'])
277  call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה'])
278  call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ'])
279  call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
280  call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
281  call add(tl, [2, "ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
282  call add(tl, [2, "ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
283  call add(tl, [2, "\u05b9\\Z", "xyz"])
284  call add(tl, [2, "\\Z\u05b9", "xyz"])
285  call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
286  call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
287  call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
288  call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
289
290  " Combining different tests and features
291  call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd'])
292
293  " Run the tests
294  for t in tl
295    let re = t[0]
296    let pat = t[1]
297    let text = t[2]
298    let matchidx = 3
299    for engine in [0, 1, 2]
300      if engine == 2 && re == 0 || engine == 1 && re == 1
301        continue
302      endif
303      let &regexpengine = engine
304      try
305        let l = matchlist(text, pat)
306      catch
307        call assert_report('Error ' . engine . ': pat: \"' . pat .
308		    \ '\", text: \"' . text .
309		    \ '\", caused an exception: \"' . v:exception . '\"')
310      endtry
311      " check the match itself
312      if len(l) == 0 && len(t) > matchidx
313        call assert_report('Error ' . engine . ': pat: \"' . pat .
314		    \ '\", text: \"' . text .
315		    \ '\", did not match, expected: \"' . t[matchidx] . '\"')
316      elseif len(l) > 0 && len(t) == matchidx
317        call assert_report('Error ' . engine . ': pat: \"' . pat .
318		    \ '\", text: \"' . text . '\", match: \"' . l[0] .
319		    \ '\", expected no match')
320      elseif len(t) > matchidx && l[0] != t[matchidx]
321        call assert_report('Error ' . engine . ': pat: \"' . pat .
322		    \ '\", text: \"' . text . '\", match: \"' . l[0] .
323		    \ '\", expected: \"' . t[matchidx] . '\"')
324      else
325        " Test passed
326      endif
327      if len(l) > 0
328        " check all the nine submatches
329        for i in range(1, 9)
330          if len(t) <= matchidx + i
331            let e = ''
332          else
333            let e = t[matchidx + i]
334          endif
335          if l[i] != e
336            call assert_report('Error ' . engine . ': pat: \"' . pat .
337                  \ '\", text: \"' . text . '\", submatch ' . i .
338                  \ ': \"' . l[i] . '\", expected: \"' . e . '\"')
339          endif
340        endfor
341        unlet i
342      endif
343    endfor
344  endfor
345  set regexpengine&
346endfunc
347
348" check that 'ambiwidth' does not change the meaning of \p
349func Test_ambiwidth()
350  set regexpengine=1 ambiwidth=single
351  call assert_equal(0, match("\u00EC", '\p'))
352  set regexpengine=1 ambiwidth=double
353  call assert_equal(0, match("\u00EC", '\p'))
354  set regexpengine=2 ambiwidth=single
355  call assert_equal(0, match("\u00EC", '\p'))
356  set regexpengine=2 ambiwidth=double
357  call assert_equal(0, match("\u00EC", '\p'))
358  set regexpengine& ambiwidth&
359endfunc
360
361func Run_regexp_ignore_case()
362  call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
363
364  call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
365  call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
366  call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
367  call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
368  call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
369  call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
370endfunc
371
372func Test_regexp_ignore_case()
373  set regexpengine=1
374  call Run_regexp_ignore_case()
375  set regexpengine=2
376  call Run_regexp_ignore_case()
377  set regexpengine&
378endfunc
379
380" Tests for regexp with multi-byte encoding and various magic settings
381func Run_regexp_multibyte_magic()
382  let text =<< trim END
383    1 a aa abb abbccc
384    2 d dd dee deefff
385    3 g gg ghh ghhiii
386    4 j jj jkk jkklll
387    5 m mm mnn mnnooo
388    6 x ^aa$ x
389    7 (a)(b) abbaa
390    8 axx [ab]xx
391    9 หม่x อมx
392    a อมx หม่x
393    b ちカヨは
394    c x ¬€x
395    d 天使x
396    e ������y
397    f ������z
398    g a啷bb
399    j 0123❤x
400    k combinations
401    l äö üᾱ̆́
402  END
403
404  new
405  call setline(1, text)
406  exe 'normal /a*b\{2}c\+/e' .. "\<CR>x"
407  call assert_equal('1 a aa abb abbcc', getline('.'))
408  exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x"
409  call assert_equal('2 d dd dee deeff', getline('.'))
410  set nomagic
411  exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x"
412  call assert_equal('3 g gg ghh ghhii', getline('.'))
413  exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x"
414  call assert_equal('4 j jj jkk jkkll', getline('.'))
415  exe 'normal /\vm*n{2}o+/e' .. "\<CR>x"
416  call assert_equal('5 m mm mnn mnnoo', getline('.'))
417  exe 'normal /\V^aa$/' .. "\<CR>x"
418  call assert_equal('6 x aa$ x', getline('.'))
419  set magic
420  exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x"
421  call assert_equal('7 (a)(b) abba', getline('.'))
422  exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x"
423  call assert_equal('8 axx ab]xx', getline('.'))
424
425  " search for multi-byte without composing char
426  exe 'normal /ม' .. "\<CR>x"
427  call assert_equal('9 หม่x อx', getline('.'))
428
429  " search for multi-byte with composing char
430  exe 'normal /ม่' .. "\<CR>x"
431  call assert_equal('a อมx หx', getline('.'))
432
433  " find word by change of word class
434  exe 'normal /ち\<カヨ\>は' .. "\<CR>x"
435  call assert_equal('b カヨは', getline('.'))
436
437  " Test \%u, [\u] and friends
438  " c
439  exe 'normal /\%u20ac' .. "\<CR>x"
440  call assert_equal('c x ¬x', getline('.'))
441  " d
442  exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x"
443  call assert_equal('d 使x', getline('.'))
444  " e
445  exe 'normal /\%U12345678' .. "\<CR>x"
446  call assert_equal('e y', getline('.'))
447  " f
448  exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x"
449  call assert_equal('f z', getline('.'))
450  " g
451  exe 'normal /\%d21879b' .. "\<CR>x"
452  call assert_equal('g abb', getline('.'))
453
454  " j Test backwards search from a multi-byte char
455  exe "normal /x\<CR>x?.\<CR>x"
456  call assert_equal('j 012❤', getline('.'))
457  " k
458  let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g'
459  @w
460  call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18))
461
462  close!
463endfunc
464
465func Test_regexp_multibyte_magic()
466  set regexpengine=1
467  call Run_regexp_multibyte_magic()
468  set regexpengine=2
469  call Run_regexp_multibyte_magic()
470  set regexpengine&
471endfunc
472
473" Test for 7.3.192
474" command ":s/ \?/ /g" splits multi-byte characters into bytes
475func Test_split_multibyte_to_bytes()
476  new
477  call setline(1, 'l äö üᾱ̆́')
478  s/ \?/ /g
479  call assert_equal(' l ä ö ü ᾱ̆́', getline(1))
480  close!
481endfunc
482
483" Test for matchstr() with multibyte characters
484func Test_matchstr_multibyte()
485  new
486  call assert_equal('ב', matchstr("אבגד", ".", 0, 2))
487  call assert_equal('בג', matchstr("אבגד", "..", 0, 2))
488  call assert_equal('א', matchstr("אבגד", ".", 0, 0))
489  call assert_equal('ג', matchstr("אבגד", ".", 4, -1))
490  close!
491endfunc
492
493" Test for 7.4.636
494" A search with end offset gets stuck at end of file.
495func Test_search_with_end_offset()
496  new
497  call setline(1, ['', 'dog(a', 'cat('])
498  exe "normal /(/e+\<CR>"
499  normal n"ayn
500  call assert_equal("a\ncat(", @a)
501  close!
502endfunc
503
504" Check that "^" matches even when the line starts with a combining char
505func Test_match_start_of_line_combining()
506  new
507  call setline(1, ['', "\u05ae", ''])
508  exe "normal gg/^\<CR>"
509  call assert_equal(2, getcurpos()[1])
510  bwipe!
511endfunc
512
513" Check that [[:upper:]] matches for automatic engine
514func Test_match_char_class_upper()
515  new
516  let _engine=&regexpengine
517
518  " Test 1: [[:upper:]]\{2,\}
519  set regexpengine=0
520  call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
521  call cursor(1,1)
522  let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
523  exe search_cmd
524  call assert_equal(4, searchcount().total, 'TEST 1')
525  set regexpengine=1
526  exe search_cmd
527  call assert_equal(2, searchcount().total, 'TEST 1')
528  set regexpengine=2
529  exe search_cmd
530  call assert_equal(4, searchcount().total, 'TEST 1')
531
532  " Test 2: [[:upper:]].\+
533  let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
534  set regexpengine=0
535  exe search_cmd
536  call assert_equal(2, searchcount().total, 'TEST 2')
537  set regexpengine=1
538  exe search_cmd
539  call assert_equal(1, searchcount().total, 'TEST 2')
540  set regexpengine=2
541  exe search_cmd
542  call assert_equal(2, searchcount().total, 'TEST 2')
543
544  " Test 3: [[:lower:]]\+
545  let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
546  set regexpengine=0
547  exe search_cmd
548  call assert_equal(4, searchcount().total, 'TEST 3 lower')
549  set regexpengine=1
550  exe search_cmd
551  call assert_equal(2, searchcount().total, 'TEST 3 lower')
552  set regexpengine=2
553  exe search_cmd
554  call assert_equal(4, searchcount().total, 'TEST 3 lower')
555
556  " clean up
557  let &regexpengine=_engine
558  bwipe!
559endfunc
560
561func Test_match_invalid_byte()
562  call writefile(0z630a.765d30aa0a.2e0a.790a.4030, 'Xinvalid')
563  new
564  source Xinvalid
565  bwipe!
566  call delete('Xinvalid')
567endfunc
568
569" vim: shiftwidth=2 sts=2 expandtab
570