1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE ScopedTypeVariables #-}
3-- | A pipe-based interface to Aspell.
4--
5-- This interface is beneficial when dynamic linking against the Aspell
6-- library would be undesirable, e.g., for binary portability reasons.
7--
8-- This implementation is based on the description of the Aspell pipe
9-- protocol at
10--
11-- http://aspell.net/man-html/Through-A-Pipe.html
12module Text.Aspell
13  ( Aspell
14  , AspellResponse(..)
15  , Mistake(..)
16  , AspellOption(..)
17  , startAspell
18  , stopAspell
19  , askAspell
20  , aspellIdentification
21  , aspellDictionaries
22  )
23where
24
25import qualified Control.Exception as E
26import Control.Monad (forM, when, void)
27import qualified Control.Concurrent.Async as A
28import Control.Concurrent.MVar (MVar, newMVar, withMVar)
29import Data.Monoid ((<>))
30import Data.Maybe (fromJust)
31import Text.Read (readMaybe)
32import System.IO (Handle, hFlush)
33import qualified Data.Text as T
34import qualified Data.Text.IO as T
35
36import qualified System.Process as P
37
38-- | A handle to a running Aspell instance.
39data Aspell =
40    Aspell { aspellProcessHandle  :: P.ProcessHandle
41           , aspellStdin          :: Handle
42           , aspellStdout         :: Handle
43           , aspellIdentification :: T.Text -- ^ startup-reported version string
44           , aspellLock           :: MVar ()
45           }
46
47instance Show Aspell where
48    show as = mconcat [ "Aspell<"
49                      , T.unpack (aspellIdentification as)
50                      , ">"
51                      ]
52
53-- | The kind of responses we can get from Aspell.
54data AspellResponse =
55    AllCorrect
56    -- ^ The input had no spelling mistakes.
57    | Mistakes [Mistake]
58    -- ^ The input had the specified mistakes.
59    deriving (Eq, Show)
60
61-- | A spelling mistake.
62data Mistake =
63    Mistake { mistakeWord :: T.Text
64            -- ^ The original word in misspelled form.
65            , mistakeNearMisses :: Int
66            -- ^ The number of alternative correct spellings that were
67            -- counted.
68            , mistakeOffset :: Int
69            -- ^ The offset, starting at zero, in the original input
70            -- where this misspelling occurred.
71            , mistakeAlternatives :: [T.Text]
72            -- ^ The correct spelling alternatives.
73            }
74            deriving (Show, Eq)
75
76-- | An Aspell option.
77data AspellOption =
78    UseDictionary T.Text
79    -- ^ Use the specified dictionary (see @aspell -d@).
80    | RawArg T.Text
81    -- ^ Provide a command-line argument directly to @aspell@.
82    deriving (Show, Eq)
83
84-- | Start Aspell with the specified options. Returns either an error
85-- message on failure or an 'Aspell' handle on success.
86--
87-- Any 'RawArg's provided in the option list are provided to @aspell@ as
88-- command-line arguments in the order provided.
89startAspell :: [AspellOption] -> IO (Either String Aspell)
90startAspell options = do
91    optResult <- checkOptions options
92    case optResult of
93        Just e -> return $ Left e
94        Nothing -> tryConvert $ do
95            let proc = (P.proc aspellCommand ("-a" : (concat $ optionToArgs <$> options)))
96                       { P.std_in = P.CreatePipe
97                       , P.std_out = P.CreatePipe
98                       , P.std_err = P.CreatePipe
99                       }
100
101            (Just inH, Just outH, Just errH, ph) <- P.createProcess proc
102
103            errorAsync <- A.async (T.hGetLine errH)
104
105            -- If startup is unsuccessful, stdout will close without output.
106            result <- E.try (T.hGetLine outH) :: IO (Either E.SomeException T.Text)
107
108            case result of
109                Left{} -> do
110                    e <- A.wait errorAsync
111                    fail ("Error starting aspell: " <> T.unpack e)
112
113                Right ident -> do
114                    A.cancel errorAsync
115                    -- Now that aspell has started and we got an
116                    -- identification string, we need to make sure it
117                    -- looks legitimate before we proceed.
118                    case validIdent ident of
119                        False -> fail ("Unexpected identification string: " <> T.unpack ident)
120                        True -> do
121                            mv <- newMVar ()
122
123                            let as = Aspell { aspellProcessHandle  = ph
124                                            , aspellStdin          = inH
125                                            , aspellStdout         = outH
126                                            , aspellIdentification = ident
127                                            , aspellLock           = mv
128                                            }
129
130                            -- Enable terse mode with aspell to improve performance.
131                            T.hPutStrLn inH "!"
132
133                            return as
134
135validIdent :: T.Text -> Bool
136validIdent s =
137    "@(#) International Ispell Version" `T.isPrefixOf` s &&
138    "but really Aspell" `T.isInfixOf` s
139
140checkOptions :: [AspellOption] -> IO (Maybe String)
141checkOptions [] = return Nothing
142checkOptions (o:os) = do
143    result <- checkOption o
144    case result of
145        Nothing -> checkOptions os
146        Just msg -> return $ Just msg
147
148aspellCommand :: String
149aspellCommand = "aspell"
150
151checkOption :: AspellOption -> IO (Maybe String)
152checkOption (RawArg {}) = return Nothing
153checkOption (UseDictionary d) = do
154    -- Get the list of installed dictionaries and check whether the
155    -- desired dictionary is included.
156    dictListResult <- aspellDictionaries
157    case dictListResult of
158        Left msg -> return $ Just msg
159        Right dictList ->
160            case d `elem` dictList of
161                True -> return Nothing
162                False -> return $ Just $ "Requested dictionary " <> show d <> " is not installed"
163
164-- | Obtain the list of installed Aspell dictionaries.
165aspellDictionaries :: IO (Either String [T.Text])
166aspellDictionaries =
167    tryConvert $
168    (T.pack <$>) <$> lines <$> P.readProcess aspellCommand ["dicts"] ""
169
170optionToArgs :: AspellOption -> [String]
171optionToArgs (UseDictionary d) = ["-d", T.unpack d]
172optionToArgs (RawArg val) = [T.unpack val]
173
174-- | Stop a running Aspell instance.
175stopAspell :: Aspell -> IO ()
176stopAspell = P.terminateProcess . aspellProcessHandle
177
178-- | Submit input text to Aspell for spell-checking. The input text may
179-- contain multiple lines. This returns an 'AspellResponse' for each
180-- line.
181--
182-- This function is thread-safe and will block until other callers
183-- finish.
184askAspell :: Aspell -> T.Text -> IO [AspellResponse]
185askAspell as t = withMVar (aspellLock as) $ const $ do
186    -- Send the user's input. Prefix with "^" to ensure that the line is
187    -- checked even if it contains metacharacters.
188    forM (T.lines t) $ \theLine -> do
189        T.hPutStrLn (aspellStdin as) ("^" <> theLine)
190        hFlush (aspellStdin as)
191
192        -- Read lines until we get an empty one, which indicates that aspell
193        -- is done with the request.
194        resultLines <- readLinesUntil (aspellStdout as) T.null
195
196        case resultLines of
197            [] -> return AllCorrect
198            _ -> return $ Mistakes $ parseMistake <$> resultLines
199
200parseMistake :: T.Text -> Mistake
201parseMistake t
202    | "&" `T.isPrefixOf` t = parseWithAlternatives t
203    | "#" `T.isPrefixOf` t = parseWithoutAlternatives t
204
205parseWithAlternatives :: T.Text -> Mistake
206parseWithAlternatives t =
207    let (header, altsWithColon) = T.breakOn ": " t
208        altsStr = T.drop 2 altsWithColon
209        ["&", orig, nearMissesStr, offsetStr] = T.words header
210        alts = T.splitOn ", " altsStr
211        offset = fromJust $ readMaybe $ T.unpack offsetStr
212        nearMisses = fromJust $ readMaybe $ T.unpack nearMissesStr
213    in Mistake { mistakeWord = orig
214               , mistakeNearMisses = nearMisses
215               -- Aspell's offset starts at 1 here because of the "^"
216               -- we included in the input. Here we adjust the offset
217               -- so that it's relative to the beginning of the user's
218               -- input, not our protocol input.
219               , mistakeOffset = offset - 1
220               , mistakeAlternatives = alts
221               }
222
223parseWithoutAlternatives :: T.Text -> Mistake
224parseWithoutAlternatives t =
225    let ["#", orig, offsetStr] = T.words t
226        offset = fromJust $ readMaybe $ T.unpack offsetStr
227    in Mistake { mistakeWord = orig
228               , mistakeNearMisses = 0
229               , mistakeOffset = offset
230               , mistakeAlternatives = []
231               }
232
233readLinesUntil :: Handle -> (T.Text -> Bool) -> IO [T.Text]
234readLinesUntil h f = do
235    line <- T.hGetLine h
236    case f line of
237        True -> return []
238        False -> do
239            rest <- readLinesUntil h f
240            return $ line : rest
241
242tryConvert :: IO a -> IO (Either String a)
243tryConvert act = do
244    result <- E.try act
245    return $ either (Left . showException) Right result
246
247showException :: E.SomeException -> String
248showException = show
249