1module Utils.Match exposing (consecutiveChars, jaroWinkler) 2 3import Char 4import Utils.List exposing (zip) 5 6 7{-| 8 9 Adapted from https://blog.art-of-coding.eu/comparing-strings-with-metrics-in-haskell/ 10 11-} 12jaro : String -> String -> Float 13jaro s1 s2 = 14 if s1 == s2 then 15 1.0 16 17 else 18 let 19 l1 = 20 String.length s1 21 22 l2 = 23 String.length s2 24 25 z2 = 26 zip (List.range 1 l2) (String.toList s2) 27 |> List.map (Tuple.mapSecond Char.toCode) 28 29 searchLength = 30 -- A character must be within searchLength spaces of the 31 -- character we are matching against in order to be considered 32 -- a match. 33 -- (//) is integer division, which removes the need to floor 34 -- the result. 35 (max l1 l2 // 2) - 1 36 37 m = 38 zip (List.range 1 l1) (String.toList s1) 39 |> List.map (Tuple.mapSecond Char.toCode) 40 |> List.concatMap (charMatch searchLength z2) 41 42 ml = 43 List.length m 44 45 t = 46 m 47 |> List.map (transposition z2 >> toFloat >> (*) 0.5) 48 |> List.sum 49 50 ml1 = 51 toFloat ml / toFloat l1 52 53 ml2 = 54 toFloat ml / toFloat l2 55 56 mtm = 57 (toFloat ml - t) / toFloat ml 58 in 59 if ml == 0 then 60 0 61 62 else 63 (1 / 3) * (ml1 + ml2 + mtm) 64 65 66winkler : String -> String -> Float -> Float 67winkler s1 s2 jaro_ = 68 if s1 == "" || s2 == "" then 69 0.0 70 71 else if s1 == s2 then 72 1.0 73 74 else 75 let 76 l = 77 consecutiveChars s1 s2 78 |> String.length 79 |> toFloat 80 81 p = 82 0.25 83 in 84 jaro_ + ((l * p) * (1.0 - jaro_)) 85 86 87jaroWinkler : String -> String -> Float 88jaroWinkler s1 s2 = 89 if s1 == "" || s2 == "" then 90 0.0 91 92 else if s1 == s2 then 93 1.0 94 95 else 96 jaro s1 s2 97 |> winkler s1 s2 98 99 100consecutiveChars : String -> String -> String 101consecutiveChars s1 s2 = 102 if s1 == "" || s2 == "" then 103 "" 104 105 else if s1 == s2 then 106 s1 107 108 else 109 cp (String.toList s1) (String.toList s2) [] 110 |> String.fromList 111 112 113cp : List Char -> List Char -> List Char -> List Char 114cp l1 l2 acc = 115 case ( l1, l2 ) of 116 ( x :: xs, y :: ys ) -> 117 if x == y then 118 cp xs ys (acc ++ [ x ]) 119 120 else if List.length acc > 0 then 121 -- If we have already found matches, we bail. We only want 122 -- consecutive matches. 123 acc 124 125 else 126 -- Go through every character in l1 until it matches the first 127 -- character in l2, and then start counting from there. 128 cp l1 ys acc 129 130 _ -> 131 acc 132 133 134charMatch : Int -> List ( Int, Int ) -> ( Int, Int ) -> List ( Int, Int ) 135charMatch matchRange list ( p, q ) = 136 list 137 |> List.drop (p - matchRange - 1) 138 |> List.take (p + matchRange) 139 |> List.filter (Tuple.second >> (==) q) 140 141 142transposition : List ( Int, Int ) -> ( Int, Int ) -> Int 143transposition list ( p, q ) = 144 list 145 |> List.filter 146 (\( x, y ) -> 147 p /= x && q == y 148 ) 149 |> List.length 150