1 // Mozilla has modified this file - see https://hg.mozilla.org/ for details. 2 /* 3 * Licensed to the Apache Software Foundation (ASF) under one or more 4 * contributor license agreements. See the NOTICE file distributed with 5 * this work for additional information regarding copyright ownership. 6 * The ASF licenses this file to You under the Apache License, Version 2.0 7 * (the "License"); you may not use this file except in compliance with 8 * the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.mozilla.apache.commons.codec.language; 20 21 import org.mozilla.apache.commons.codec.EncoderException; 22 import org.mozilla.apache.commons.codec.StringEncoder; 23 24 /** 25 * Utility methods for {@link Soundex} and {@link RefinedSoundex} classes. 26 * 27 * @author Apache Software Foundation 28 * @version $Id: SoundexUtils.java 658834 2008-05-21 19:57:51Z niallp $ 29 * @since 1.3 30 */ 31 final class SoundexUtils { 32 33 /** 34 * Cleans up the input string before Soundex processing by only returning 35 * upper case letters. 36 * 37 * @param str 38 * The String to clean. 39 * @return A clean String. 40 */ clean(String str)41 static String clean(String str) { 42 if (str == null || str.length() == 0) { 43 return str; 44 } 45 int len = str.length(); 46 char[] chars = new char[len]; 47 int count = 0; 48 for (int i = 0; i < len; i++) { 49 if (Character.isLetter(str.charAt(i))) { 50 chars[count++] = str.charAt(i); 51 } 52 } 53 if (count == len) { 54 return str.toUpperCase(java.util.Locale.ENGLISH); 55 } 56 return new String(chars, 0, count).toUpperCase(java.util.Locale.ENGLISH); 57 } 58 59 /** 60 * Encodes the Strings and returns the number of characters in the two 61 * encoded Strings that are the same. 62 * <ul> 63 * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates 64 * little or no similarity, and 4 indicates strong similarity or identical 65 * values.</li> 66 * <li>For refined Soundex, the return value can be greater than 4.</li> 67 * </ul> 68 * 69 * @param encoder 70 * The encoder to use to encode the Strings. 71 * @param s1 72 * A String that will be encoded and compared. 73 * @param s2 74 * A String that will be encoded and compared. 75 * @return The number of characters in the two Soundex encoded Strings that 76 * are the same. 77 * 78 * @see #differenceEncoded(String,String) 79 * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> 80 * MS T-SQL DIFFERENCE</a> 81 * 82 * @throws EncoderException 83 * if an error occurs encoding one of the strings 84 */ difference(StringEncoder encoder, String s1, String s2)85 static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException { 86 return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); 87 } 88 89 /** 90 * Returns the number of characters in the two Soundex encoded Strings that 91 * are the same. 92 * <ul> 93 * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates 94 * little or no similarity, and 4 indicates strong similarity or identical 95 * values.</li> 96 * <li>For refined Soundex, the return value can be greater than 4.</li> 97 * </ul> 98 * 99 * @param es1 100 * An encoded String. 101 * @param es2 102 * An encoded String. 103 * @return The number of characters in the two Soundex encoded Strings that 104 * are the same. 105 * 106 * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> 107 * MS T-SQL DIFFERENCE</a> 108 */ differenceEncoded(String es1, String es2)109 static int differenceEncoded(String es1, String es2) { 110 111 if (es1 == null || es2 == null) { 112 return 0; 113 } 114 int lengthToMatch = Math.min(es1.length(), es2.length()); 115 int diff = 0; 116 for (int i = 0; i < lengthToMatch; i++) { 117 if (es1.charAt(i) == es2.charAt(i)) { 118 diff++; 119 } 120 } 121 return diff; 122 } 123 124 } 125