1
2
3{-# LANGUAGE ForeignFunctionInterface
4  #-}
5
6
7{-| A binding for the native 'wcwidth'. It's important that you 'setLocale'
8    before using it, like this:
9
10 >  #!/usr/bin/env runhaskell
11 >
12 >  import Text.Printf
13 >
14 >  import System.Locale.SetLocale
15 >  import Data.Char.WCWidth
16 >
17 >  main                     =  do
18 >    setLocale LC_ALL (Just "")
19 >    sequence_ [ display c | c <- chars ]
20 >   where
21 >    chars                  =  [minBound..'A']
22 >    display c = printf "%04x  %2d  %s\n" (fromEnum c) (wcwidth c) (show c)
23
24    The program file @WCWidthTableaux.hs@ contains a more extensive example of
25    using 'wcwidth'.
26
27    Note that this binding to the native implementation gets certain
28    characters wrong in obvious ways as well as ways that are problematic for
29    indentation based languages. The ASCII tab should be assigned a width of
30    8, not -1; and one is likely to find -1 assigned to  numerous obscure
31    characters (for example, symbols from the Book of Changes).
32
33 -}
34
35
36module Data.Char.WCWidth
37  ( wcwidth
38  , widths
39  , ranges
40  ) where
41
42import Foreign.C
43import Data.List
44
45
46
47
48{-| Widths of all characters.
49 -}
50widths                      ::  [ (Char, Int) ]
51widths                       =  [ (c, wcwidth c) | c <- [minBound..maxBound] ]
52
53
54{-| Characters broken into contiguous ranges with the same width.
55 -}
56ranges                      ::  [ ((Char, Char), Int) ]
57ranges                       =  reverse (foldl' aggregate start (tail widths))
58 where
59  start                      =  aggregate [] (head widths)
60  aggregate [] (c, w)        =  [((c, c), w)]
61  aggregate (((a, z), i) : t) (c, w)
62    | i == w                 =  ((a, c), i) : t
63    | otherwise              =  ((c, c), w) : ((a, z), i) : t
64
65
66{-| Binding to the native 'wcwidth'.
67 -}
68wcwidth                     ::  Char -> Int
69wcwidth                      =  fromEnum . native . toEnum . fromEnum
70
71
72foreign import ccall unsafe "wchar.h wcwidth" native :: CWchar -> CInt
73
74
75