1// Copyright (c) 2014 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package regexp 16 17import ( 18 "reflect" 19 "regexp" 20 "testing" 21) 22 23func TestRegexpCharFilter(t *testing.T) { 24 25 htmlTagPattern := `</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>` 26 htmlRegex := regexp.MustCompile(htmlTagPattern) 27 28 tests := []struct { 29 input []byte 30 output []byte 31 }{ 32 { 33 input: []byte(`<html>test</html>`), 34 output: []byte(` test `), 35 }, 36 } 37 38 for _, test := range tests { 39 filter := New(htmlRegex, []byte{' '}) 40 output := filter.Filter(test.input) 41 if !reflect.DeepEqual(output, test.output) { 42 t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input)) 43 } 44 } 45} 46 47func TestZeroWidthNonJoinerCharFilter(t *testing.T) { 48 49 zeroWidthNonJoinerPattern := `\x{200C}` 50 zeroWidthNonJoinerRegex := regexp.MustCompile(zeroWidthNonJoinerPattern) 51 52 tests := []struct { 53 input []byte 54 output []byte 55 }{ 56 { 57 input: []byte("water\u200Cunder\u200Cthe\u200Cbridge"), 58 output: []byte("water under the bridge"), 59 }, 60 } 61 62 for _, test := range tests { 63 filter := New(zeroWidthNonJoinerRegex, []byte{' '}) 64 output := filter.Filter(test.input) 65 if !reflect.DeepEqual(output, test.output) { 66 t.Errorf("Expected:\n`%s`\ngot:\n`%s`\nfor:\n`%s`\n", string(test.output), string(output), string(test.input)) 67 } 68 } 69} 70