Made package stack compatible
Created and added stack.yaml and .gitignore files. Relaxed the version dependency on 'binary' package in cabal file. Is that OK? Also brought the minimum cabal version to >=1.8, so I could add a test target that pulls in the library. Changed all tabs to spaces - I don't know when the Haskell compiler started giving warnings about that.
This commit is contained in:
parent
6284c1a677
commit
8727ac25a5
22
.gitignore
vendored
Normal file
22
.gitignore
vendored
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
### Haskell ###
|
||||||
|
dist
|
||||||
|
dist-*
|
||||||
|
cabal-dev
|
||||||
|
*.o
|
||||||
|
*.hi
|
||||||
|
*.chi
|
||||||
|
*.chs.h
|
||||||
|
*.dyn_o
|
||||||
|
*.dyn_hi
|
||||||
|
.hpc
|
||||||
|
.hsenv
|
||||||
|
.cabal-sandbox/
|
||||||
|
cabal.sandbox.config
|
||||||
|
*.prof
|
||||||
|
*.aux
|
||||||
|
*.hp
|
||||||
|
*.eventlog
|
||||||
|
.stack-work/
|
||||||
|
cabal.project.local
|
||||||
|
.HTF/
|
||||||
|
|
||||||
@ -359,6 +359,6 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of
|
|||||||
-- | Takes the name of an encoding and creates a dynamic encoding from it.
|
-- | Takes the name of an encoding and creates a dynamic encoding from it.
|
||||||
encodingFromString :: String -> DynEncoding
|
encodingFromString :: String -> DynEncoding
|
||||||
encodingFromString str = maybe
|
encodingFromString str = maybe
|
||||||
(error $ "Data.Encoding.encodingFromString: Unknown encoding: "++show str)
|
(error $ "Data.Encoding.encodingFromString: Unknown encoding: "++show str)
|
||||||
id
|
id
|
||||||
(encodingFromStringExplicit str)
|
(encodingFromStringExplicit str)
|
||||||
|
|||||||
@ -2,8 +2,8 @@
|
|||||||
{- | This implements BootString en- and decoding, the foundation of Punycode
|
{- | This implements BootString en- and decoding, the foundation of Punycode
|
||||||
-}
|
-}
|
||||||
module Data.Encoding.BootString
|
module Data.Encoding.BootString
|
||||||
(BootString(..)
|
(BootString(..)
|
||||||
,punycode) where
|
,punycode) where
|
||||||
|
|
||||||
import Data.Encoding.Base
|
import Data.Encoding.Base
|
||||||
import Data.Encoding.Exception
|
import Data.Encoding.Exception
|
||||||
@ -17,77 +17,77 @@ import Data.Typeable
|
|||||||
import Control.Monad (when)
|
import Control.Monad (when)
|
||||||
|
|
||||||
data BootString = BootString
|
data BootString = BootString
|
||||||
{base :: Int
|
{base :: Int
|
||||||
,tmin :: Int
|
,tmin :: Int
|
||||||
,tmax :: Int
|
,tmax :: Int
|
||||||
,skew :: Int
|
,skew :: Int
|
||||||
,damp :: Int
|
,damp :: Int
|
||||||
,init_bias :: Int
|
,init_bias :: Int
|
||||||
,init_n :: Int
|
,init_n :: Int
|
||||||
}
|
}
|
||||||
deriving (Show,Eq,Typeable)
|
deriving (Show,Eq,Typeable)
|
||||||
|
|
||||||
punycode :: BootString
|
punycode :: BootString
|
||||||
punycode = BootString
|
punycode = BootString
|
||||||
{base = 36
|
{base = 36
|
||||||
,tmin = 1
|
,tmin = 1
|
||||||
,tmax = 26
|
,tmax = 26
|
||||||
,skew = 38
|
,skew = 38
|
||||||
,damp = 700
|
,damp = 700
|
||||||
,init_bias = 72
|
,init_bias = 72
|
||||||
,init_n = 0x80
|
,init_n = 0x80
|
||||||
}
|
}
|
||||||
|
|
||||||
punyValue :: ByteSource m => Word8 -> m Int
|
punyValue :: ByteSource m => Word8 -> m Int
|
||||||
punyValue c
|
punyValue c
|
||||||
| n < 0x30 = norep
|
| n < 0x30 = norep
|
||||||
| n <= 0x39 = return $ n-0x30+26
|
| n <= 0x39 = return $ n-0x30+26
|
||||||
| n < 0x41 = norep
|
| n < 0x41 = norep
|
||||||
| n <= 0x5A = return $ n-0x41
|
| n <= 0x5A = return $ n-0x41
|
||||||
| n < 0x61 = norep
|
| n < 0x61 = norep
|
||||||
| n <= 0x7A = return $ n-0x61
|
| n <= 0x7A = return $ n-0x61
|
||||||
| otherwise = norep
|
| otherwise = norep
|
||||||
where
|
where
|
||||||
n = fromIntegral c
|
n = fromIntegral c
|
||||||
norep = throwException (IllegalCharacter c)
|
norep = throwException (IllegalCharacter c)
|
||||||
|
|
||||||
punyChar :: ByteSink m => Int -> m Word8
|
punyChar :: ByteSink m => Int -> m Word8
|
||||||
punyChar c
|
punyChar c
|
||||||
| c < 0 = norep
|
| c < 0 = norep
|
||||||
| c < 26 = return $ fromIntegral $ 0x61+c
|
| c < 26 = return $ fromIntegral $ 0x61+c
|
||||||
| c < 36 = return $ fromIntegral $ 0x30+c-26
|
| c < 36 = return $ fromIntegral $ 0x30+c-26
|
||||||
| otherwise = norep
|
| otherwise = norep
|
||||||
where
|
where
|
||||||
norep = throwException (HasNoRepresentation (chr c))
|
norep = throwException (HasNoRepresentation (chr c))
|
||||||
|
|
||||||
getT :: BootString -> Int -> Int -> Int
|
getT :: BootString -> Int -> Int -> Int
|
||||||
getT bs k bias
|
getT bs k bias
|
||||||
| k <= bias + (tmin bs) = tmin bs
|
| k <= bias + (tmin bs) = tmin bs
|
||||||
| k >= bias + (tmax bs) = tmax bs
|
| k >= bias + (tmax bs) = tmax bs
|
||||||
| otherwise = k-bias
|
| otherwise = k-bias
|
||||||
|
|
||||||
adapt :: BootString -> Int -> Int -> Bool -> Int
|
adapt :: BootString -> Int -> Int -> Bool -> Int
|
||||||
adapt bs delta numpoints firsttime = let
|
adapt bs delta numpoints firsttime = let
|
||||||
delta1 = if firsttime
|
delta1 = if firsttime
|
||||||
then delta `div` (damp bs)
|
then delta `div` (damp bs)
|
||||||
else delta `div` 2
|
else delta `div` 2
|
||||||
delta2 = delta1 + (delta1 `div` numpoints)
|
delta2 = delta1 + (delta1 `div` numpoints)
|
||||||
(rd,rk) = head
|
(rd,rk) = head
|
||||||
$ filter ((<=((base bs - tmin bs) * (tmax bs)) `div` 2).fst)
|
$ filter ((<=((base bs - tmin bs) * (tmax bs)) `div` 2).fst)
|
||||||
$ iterate (\(d,k) -> (d `div` (base bs - tmin bs),k+(base bs))) (delta2,0)
|
$ iterate (\(d,k) -> (d `div` (base bs - tmin bs),k+(base bs))) (delta2,0)
|
||||||
in rk + (((base bs - tmin bs +1) * rd) `div` (rd + skew bs))
|
in rk + (((base bs - tmin bs +1) * rd) `div` (rd + skew bs))
|
||||||
|
|
||||||
decodeValue :: ByteSource m => BootString -> Int -> Int -> Int -> Int -> [Int] -> m (Int,[Int])
|
decodeValue :: ByteSource m => BootString -> Int -> Int -> Int -> Int -> [Int] -> m (Int,[Int])
|
||||||
decodeValue bs bias i k w (x:xs)
|
decodeValue bs bias i k w (x:xs)
|
||||||
| x >= base bs = throwException OutOfRange
|
| x >= base bs = throwException OutOfRange
|
||||||
| x > (maxBound - i) `div` w = throwException OutOfRange
|
| x > (maxBound - i) `div` w = throwException OutOfRange
|
||||||
| x < t = return (ni,xs)
|
| x < t = return (ni,xs)
|
||||||
| w > maxBound `div` (base bs - t) = throwException OutOfRange
|
| w > maxBound `div` (base bs - t) = throwException OutOfRange
|
||||||
| null xs = throwException OutOfRange
|
| null xs = throwException OutOfRange
|
||||||
| otherwise = decodeValue bs bias ni (k+base bs) (w*(base bs - t)) xs
|
| otherwise = decodeValue bs bias ni (k+base bs) (w*(base bs - t)) xs
|
||||||
where
|
where
|
||||||
ni = i + x*w
|
ni = i + x*w
|
||||||
t = getT bs k bias
|
t = getT bs k bias
|
||||||
|
|
||||||
decodeValues :: ByteSource m => BootString -> Int -> [Int] -> m [(Char,Int)]
|
decodeValues :: ByteSource m => BootString -> Int -> [Int] -> m [(Char,Int)]
|
||||||
decodeValues bs len xs = decodeValues' bs (init_n bs) 0 (init_bias bs) len xs
|
decodeValues bs len xs = decodeValues' bs (init_n bs) 0 (init_bias bs) len xs
|
||||||
@ -108,8 +108,8 @@ decodeValues' bs n i bias len xs = do
|
|||||||
insertDeltas :: [(a,Int)] -> [a] -> [a]
|
insertDeltas :: [(a,Int)] -> [a] -> [a]
|
||||||
insertDeltas [] str = str
|
insertDeltas [] str = str
|
||||||
insertDeltas ((c,p):xs) str = let
|
insertDeltas ((c,p):xs) str = let
|
||||||
(l,r) = splitAt p str
|
(l,r) = splitAt p str
|
||||||
in insertDeltas xs (l++[c]++r)
|
in insertDeltas xs (l++[c]++r)
|
||||||
|
|
||||||
punyDecode :: ByteSource m => [Word8] -> [Word8] -> m String
|
punyDecode :: ByteSource m => [Word8] -> [Word8] -> m String
|
||||||
punyDecode base ext = do
|
punyDecode base ext = do
|
||||||
@ -119,32 +119,32 @@ punyDecode base ext = do
|
|||||||
|
|
||||||
encodeValue :: BootString -> Int -> Int -> Int -> Int -> [Int]
|
encodeValue :: BootString -> Int -> Int -> Int -> Int -> [Int]
|
||||||
encodeValue bs bias delta n c = unfoldr (\(q,k,out) -> let
|
encodeValue bs bias delta n c = unfoldr (\(q,k,out) -> let
|
||||||
t = getT bs k bias
|
t = getT bs k bias
|
||||||
(nq,dc) = (q-t) `divMod` (base bs - t)
|
(nq,dc) = (q-t) `divMod` (base bs - t)
|
||||||
in if out
|
in if out
|
||||||
then Nothing
|
then Nothing
|
||||||
else (if q < t
|
else (if q < t
|
||||||
then Just (q,(q,k+base bs,True))
|
then Just (q,(q,k+base bs,True))
|
||||||
else Just (t + dc,(nq,k+base bs,False)))
|
else Just (t + dc,(nq,k+base bs,False)))
|
||||||
) (delta,base bs,False)
|
) (delta,base bs,False)
|
||||||
|
|
||||||
encodeValues' :: BootString -> Int -> Int -> Int -> Int -> Int -> [Int] -> (Int,Int,Int,[Int])
|
encodeValues' :: BootString -> Int -> Int -> Int -> Int -> Int -> [Int] -> (Int,Int,Int,[Int])
|
||||||
encodeValues' _ _ h bias delta _ [] = (delta,h,bias,[])
|
encodeValues' _ _ h bias delta _ [] = (delta,h,bias,[])
|
||||||
encodeValues' bs b h bias delta n (c:cs) = case compare c n of
|
encodeValues' bs b h bias delta n (c:cs) = case compare c n of
|
||||||
LT -> encodeValues' bs b h bias (delta+1) n cs
|
LT -> encodeValues' bs b h bias (delta+1) n cs
|
||||||
GT -> encodeValues' bs b h bias delta n cs
|
GT -> encodeValues' bs b h bias delta n cs
|
||||||
EQ -> let
|
EQ -> let
|
||||||
(ndelta,nh,nbias,rest) = encodeValues' bs b (h+1) (adapt bs delta (h+1) (h==b)) 0 n cs
|
(ndelta,nh,nbias,rest) = encodeValues' bs b (h+1) (adapt bs delta (h+1) (h==b)) 0 n cs
|
||||||
xs = encodeValue bs bias delta n c
|
xs = encodeValue bs bias delta n c
|
||||||
in (ndelta,nh,nbias,xs++rest)
|
in (ndelta,nh,nbias,xs++rest)
|
||||||
|
|
||||||
encodeValues :: BootString -> Int -> Int -> Int -> Int -> Int -> Int -> [Int] -> [Int]
|
encodeValues :: BootString -> Int -> Int -> Int -> Int -> Int -> Int -> [Int] -> [Int]
|
||||||
encodeValues bs b l h bias delta n cps
|
encodeValues bs b l h bias delta n cps
|
||||||
| h == l = []
|
| h == l = []
|
||||||
| otherwise = outp++encodeValues bs b l nh nbias (ndelta+1) (m+1) cps
|
| otherwise = outp++encodeValues bs b l nh nbias (ndelta+1) (m+1) cps
|
||||||
where
|
where
|
||||||
m = minimum (filter (>=n) cps)
|
m = minimum (filter (>=n) cps)
|
||||||
(ndelta,nh,nbias,outp) = encodeValues' bs b h bias (delta + (m - n)*(h + 1)) m cps
|
(ndelta,nh,nbias,outp) = encodeValues' bs b h bias (delta + (m - n)*(h + 1)) m cps
|
||||||
|
|
||||||
breakLast :: (a -> Bool) -> [a] -> Maybe ([a],[a])
|
breakLast :: (a -> Bool) -> [a] -> Maybe ([a],[a])
|
||||||
breakLast p xs = do
|
breakLast p xs = do
|
||||||
@ -166,7 +166,7 @@ instance Encoding BootString where
|
|||||||
encodeChar _ c = error "Data.Encoding.BootString.encodeChar: Please use 'encode' for encoding BootStrings"
|
encodeChar _ c = error "Data.Encoding.BootString.encodeChar: Please use 'encode' for encoding BootStrings"
|
||||||
decodeChar _ = error "Data.Encoding.BootString.decodeChar: Please use 'decode' for decoding BootStrings"
|
decodeChar _ = error "Data.Encoding.BootString.decodeChar: Please use 'decode' for decoding BootStrings"
|
||||||
encode bs str = let (base,nbase) = partition (\c -> ord c < init_n bs) str
|
encode bs str = let (base,nbase) = partition (\c -> ord c < init_n bs) str
|
||||||
b = length base
|
b = length base
|
||||||
in do
|
in do
|
||||||
res <- mapM punyChar $ encodeValues bs b (length str) b (init_bias bs) 0 (init_n bs) (map ord str)
|
res <- mapM punyChar $ encodeValues bs b (length str) b (init_bias bs) 0 (init_n bs) (map ord str)
|
||||||
when (not $ null base) $ do
|
when (not $ null base) $ do
|
||||||
@ -178,8 +178,8 @@ instance Encoding BootString where
|
|||||||
let m = fromIntegral $ ord '-'
|
let m = fromIntegral $ ord '-'
|
||||||
case breakLast (==m) wrds of
|
case breakLast (==m) wrds of
|
||||||
Just ([],_) -> throwException (IllegalCharacter m)
|
Just ([],_) -> throwException (IllegalCharacter m)
|
||||||
Just (base,_:nbase) -> case find (\w -> fromIntegral w > init_n bs) base of
|
Just (base,_:nbase) -> case find (\w -> fromIntegral w > init_n bs) base of
|
||||||
Nothing -> punyDecode base nbase
|
Nothing -> punyDecode base nbase
|
||||||
Just ww -> throwException (IllegalCharacter ww)
|
Just ww -> throwException (IllegalCharacter ww)
|
||||||
Nothing -> punyDecode [] wrds
|
Nothing -> punyDecode [] wrds
|
||||||
encodeable bs c = True -- XXX: hm, really?
|
encodeable bs c = True -- XXX: hm, really?
|
||||||
|
|||||||
@ -9,8 +9,8 @@ import Control.Monad.Identity
|
|||||||
-- | This exception type is thrown whenever something went wrong during the
|
-- | This exception type is thrown whenever something went wrong during the
|
||||||
-- encoding-process.
|
-- encoding-process.
|
||||||
data EncodingException
|
data EncodingException
|
||||||
= HasNoRepresentation Char -- ^ Thrown if a specific character
|
= HasNoRepresentation Char -- ^ Thrown if a specific character
|
||||||
-- is not representable in an encoding.
|
-- is not representable in an encoding.
|
||||||
deriving (Eq,Ord,Show,Read,Typeable)
|
deriving (Eq,Ord,Show,Read,Typeable)
|
||||||
|
|
||||||
instance Exception EncodingException
|
instance Exception EncodingException
|
||||||
@ -18,13 +18,13 @@ instance Exception EncodingException
|
|||||||
-- | This exception type is thrown whenever something went wrong during the
|
-- | This exception type is thrown whenever something went wrong during the
|
||||||
-- decoding-process.
|
-- decoding-process.
|
||||||
data DecodingException
|
data DecodingException
|
||||||
= IllegalCharacter Word8 -- ^ The sequence contained an illegal
|
= IllegalCharacter Word8 -- ^ The sequence contained an illegal
|
||||||
-- byte that couldn't be decoded.
|
-- byte that couldn't be decoded.
|
||||||
| UnexpectedEnd -- ^ more bytes were needed to allow a
|
| UnexpectedEnd -- ^ more bytes were needed to allow a
|
||||||
-- successfull decoding.
|
-- successfull decoding.
|
||||||
| OutOfRange -- ^ the decoded value was out of the unicode range
|
| OutOfRange -- ^ the decoded value was out of the unicode range
|
||||||
| IllegalRepresentation [Word8] -- ^ The character sequence encodes a
|
| IllegalRepresentation [Word8] -- ^ The character sequence encodes a
|
||||||
-- character, but is illegal.
|
-- character, but is illegal.
|
||||||
deriving (Eq,Ord,Show,Read,Typeable)
|
deriving (Eq,Ord,Show,Read,Typeable)
|
||||||
|
|
||||||
instance Exception DecodingException
|
instance Exception DecodingException
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
See <http://en.wikipedia.org/wiki/KOI8-R> for more information.
|
See <http://en.wikipedia.org/wiki/KOI8-R> for more information.
|
||||||
-}
|
-}
|
||||||
module Data.Encoding.KOI8R
|
module Data.Encoding.KOI8R
|
||||||
(KOI8R(..)) where
|
(KOI8R(..)) where
|
||||||
|
|
||||||
import Control.Throws
|
import Control.Throws
|
||||||
import Data.Array.Unboxed
|
import Data.Array.Unboxed
|
||||||
@ -28,23 +28,23 @@ koi8rMap = fromList (zip koi8rList [128..])
|
|||||||
|
|
||||||
koi8rList :: [Char]
|
koi8rList :: [Char]
|
||||||
koi8rList =
|
koi8rList =
|
||||||
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
|
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
|
||||||
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
|
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
|
||||||
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
|
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
|
||||||
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
|
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
|
||||||
,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556'
|
,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556'
|
||||||
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e'
|
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e'
|
||||||
,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565'
|
,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565'
|
||||||
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9'
|
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9'
|
||||||
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
|
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
|
||||||
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
|
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
|
||||||
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
|
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
|
||||||
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
|
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
|
||||||
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
|
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
|
||||||
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
|
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
|
||||||
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
|
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
|
||||||
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
|
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
|
||||||
]
|
]
|
||||||
|
|
||||||
instance Encoding KOI8R where
|
instance Encoding KOI8R where
|
||||||
decodeChar _ = do
|
decodeChar _ = do
|
||||||
@ -53,8 +53,8 @@ instance Encoding KOI8R where
|
|||||||
then return $ chr $ fromIntegral w
|
then return $ chr $ fromIntegral w
|
||||||
else return $ koi8rArr!w
|
else return $ koi8rArr!w
|
||||||
encodeChar _ ch
|
encodeChar _ ch
|
||||||
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
|
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
|
||||||
| otherwise = case lookup ch koi8rMap of
|
| otherwise = case lookup ch koi8rMap of
|
||||||
Just w -> pushWord8 w
|
Just w -> pushWord8 w
|
||||||
Nothing -> throwException (HasNoRepresentation ch)
|
Nothing -> throwException (HasNoRepresentation ch)
|
||||||
encodeable _ c = member c koi8rMap
|
encodeable _ c = member c koi8rMap
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
See <http://en.wikipedia.org/wiki/KOI8-U> for more information.
|
See <http://en.wikipedia.org/wiki/KOI8-U> for more information.
|
||||||
-}
|
-}
|
||||||
module Data.Encoding.KOI8U
|
module Data.Encoding.KOI8U
|
||||||
(KOI8U(..)) where
|
(KOI8U(..)) where
|
||||||
|
|
||||||
import Data.Encoding.Base
|
import Data.Encoding.Base
|
||||||
import Data.Encoding.ByteSource
|
import Data.Encoding.ByteSource
|
||||||
@ -28,23 +28,23 @@ koi8uMap = fromList (zip koi8uList [128..])
|
|||||||
|
|
||||||
koi8uList :: [Char]
|
koi8uList :: [Char]
|
||||||
koi8uList =
|
koi8uList =
|
||||||
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
|
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
|
||||||
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
|
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
|
||||||
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
|
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
|
||||||
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
|
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
|
||||||
,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457'
|
,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457'
|
||||||
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e'
|
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e'
|
||||||
,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407'
|
,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407'
|
||||||
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9'
|
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9'
|
||||||
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
|
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
|
||||||
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
|
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
|
||||||
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
|
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
|
||||||
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
|
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
|
||||||
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
|
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
|
||||||
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
|
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
|
||||||
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
|
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
|
||||||
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
|
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
|
||||||
]
|
]
|
||||||
|
|
||||||
instance Encoding KOI8U where
|
instance Encoding KOI8U where
|
||||||
decodeChar _ = do
|
decodeChar _ = do
|
||||||
@ -53,8 +53,8 @@ instance Encoding KOI8U where
|
|||||||
then return $ chr $ fromIntegral w
|
then return $ chr $ fromIntegral w
|
||||||
else return $ koi8uArr!w
|
else return $ koi8uArr!w
|
||||||
encodeChar _ ch
|
encodeChar _ ch
|
||||||
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
|
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
|
||||||
| otherwise = case lookup ch koi8uMap of
|
| otherwise = case lookup ch koi8uMap of
|
||||||
Just w -> pushWord8 w
|
Just w -> pushWord8 w
|
||||||
Nothing -> throwException (HasNoRepresentation ch)
|
Nothing -> throwException (HasNoRepresentation ch)
|
||||||
encodeable _ c = member c koi8uMap
|
encodeable _ c = member c koi8uMap
|
||||||
|
|||||||
@ -47,14 +47,14 @@ data CharacterMapping_normalization = CharacterMapping_normalization_undetermine
|
|||||||
deriving (Eq,Show)
|
deriving (Eq,Show)
|
||||||
data Stateful_siso = Stateful_siso Validity Validity
|
data Stateful_siso = Stateful_siso Validity Validity
|
||||||
deriving (Eq,Show)
|
deriving (Eq,Show)
|
||||||
newtype History = History (List1 Modified) deriving (Eq,Show)
|
newtype History = History (List1 Modified) deriving (Eq,Show)
|
||||||
data Modified = Modified Modified_Attrs String
|
data Modified = Modified Modified_Attrs String
|
||||||
deriving (Eq,Show)
|
deriving (Eq,Show)
|
||||||
data Modified_Attrs = Modified_Attrs
|
data Modified_Attrs = Modified_Attrs
|
||||||
{ modifiedVersion :: String
|
{ modifiedVersion :: String
|
||||||
, modifiedDate :: String
|
, modifiedDate :: String
|
||||||
} deriving (Eq,Show)
|
} deriving (Eq,Show)
|
||||||
newtype Validity = Validity (List1 State) deriving (Eq,Show)
|
newtype Validity = Validity (List1 State) deriving (Eq,Show)
|
||||||
data State = State
|
data State = State
|
||||||
{ stateType :: String
|
{ stateType :: String
|
||||||
, stateNext :: String
|
, stateNext :: String
|
||||||
@ -112,10 +112,10 @@ data Escape = Escape
|
|||||||
{ escapeSequence :: String
|
{ escapeSequence :: String
|
||||||
, escapeName :: String
|
, escapeName :: String
|
||||||
} deriving (Eq,Show)
|
} deriving (Eq,Show)
|
||||||
newtype Si = Si (List1 Designator) deriving (Eq,Show)
|
newtype Si = Si (List1 Designator) deriving (Eq,Show)
|
||||||
newtype So = So (List1 Designator) deriving (Eq,Show)
|
newtype So = So (List1 Designator) deriving (Eq,Show)
|
||||||
newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show)
|
newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show)
|
||||||
newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show)
|
newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show)
|
||||||
data Designator = Designator
|
data Designator = Designator
|
||||||
{ designatorSequence :: String
|
{ designatorSequence :: String
|
||||||
, designatorName :: String
|
, designatorName :: String
|
||||||
|
|||||||
@ -18,9 +18,9 @@ import Data.Typeable
|
|||||||
import Data.Word
|
import Data.Word
|
||||||
|
|
||||||
data UTF16
|
data UTF16
|
||||||
= UTF16 -- ^ Decodes big and little endian, encodes big endian.
|
= UTF16 -- ^ Decodes big and little endian, encodes big endian.
|
||||||
| UTF16BE -- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian.
|
| UTF16BE -- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian.
|
||||||
| UTF16LE -- ^ Little endian decoding and encoding.
|
| UTF16LE -- ^ Little endian decoding and encoding.
|
||||||
deriving (Eq,Show,Typeable)
|
deriving (Eq,Show,Typeable)
|
||||||
|
|
||||||
readBOM :: ByteSource m => m (Either Char UTF16)
|
readBOM :: ByteSource m => m (Either Char UTF16)
|
||||||
|
|||||||
@ -16,10 +16,10 @@ import Data.Typeable
|
|||||||
|
|
||||||
|
|
||||||
data UTF32
|
data UTF32
|
||||||
= UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present.
|
= UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present.
|
||||||
| UTF32BE -- ^ Encodes and decodes using the big endian encoding.
|
| UTF32BE -- ^ Encodes and decodes using the big endian encoding.
|
||||||
| UTF32LE -- ^ Encodes and decodes using the little endian encoding.
|
| UTF32LE -- ^ Encodes and decodes using the little endian encoding.
|
||||||
deriving (Eq,Show,Typeable)
|
deriving (Eq,Show,Typeable)
|
||||||
|
|
||||||
instance Encoding UTF32 where
|
instance Encoding UTF32 where
|
||||||
encodeChar UTF32LE ch = pushWord32le (fromIntegral $ ord ch)
|
encodeChar UTF32LE ch = pushWord32le (fromIntegral $ ord ch)
|
||||||
|
|||||||
@ -67,17 +67,23 @@ import Control.Monad.Reader (runReaderT)
|
|||||||
-- encoding.
|
-- encoding.
|
||||||
hGetContents :: (Encoding e,?enc :: e) => Handle -> IO String
|
hGetContents :: (Encoding e,?enc :: e) => Handle -> IO String
|
||||||
hGetContents h = do
|
hGetContents h = do
|
||||||
str <- LBS.hGetContents h
|
str <- LBS.hGetContents h
|
||||||
return $ decodeLazyByteString ?enc str
|
return $ decodeLazyByteString ?enc str
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.getContents', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
getContents :: (Encoding e,?enc :: e) => IO String
|
getContents :: (Encoding e,?enc :: e) => IO String
|
||||||
getContents = do
|
getContents = do
|
||||||
str <- LBS.getContents
|
str <- LBS.getContents
|
||||||
return $ decodeLazyByteString ?enc str
|
return $ decodeLazyByteString ?enc str
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.putStr', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
putStr :: (Encoding e,?enc :: e) => String -> IO ()
|
putStr :: (Encoding e,?enc :: e) => String -> IO ()
|
||||||
putStr = hPutStr stdout
|
putStr = hPutStr stdout
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.putStrLn', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
putStrLn :: (Encoding e,?enc :: e) => String -> IO ()
|
putStrLn :: (Encoding e,?enc :: e) => String -> IO ()
|
||||||
putStrLn = hPutStrLn stdout
|
putStrLn = hPutStrLn stdout
|
||||||
|
|
||||||
@ -86,46 +92,72 @@ putStrLn = hPutStrLn stdout
|
|||||||
hPutStr :: (Encoding e,?enc :: e) => Handle -> String -> IO ()
|
hPutStr :: (Encoding e,?enc :: e) => Handle -> String -> IO ()
|
||||||
hPutStr h str = LBS.hPut h (encodeLazyByteString ?enc str)
|
hPutStr h str = LBS.hPut h (encodeLazyByteString ?enc str)
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.hPutStrLn', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
hPutStrLn :: (Encoding e,?enc :: e) => Handle -> String -> IO ()
|
hPutStrLn :: (Encoding e,?enc :: e) => Handle -> String -> IO ()
|
||||||
hPutStrLn h str = do
|
hPutStrLn h str = do
|
||||||
LBS.hPut h (encodeLazyByteString ?enc str)
|
LBS.hPut h (encodeLazyByteString ?enc str)
|
||||||
LBS.hPut h (encodeLazyByteString ?enc "\n")
|
LBS.hPut h (encodeLazyByteString ?enc "\n")
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.print', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
print :: (Encoding e,Show a,?enc :: e) => a -> IO ()
|
print :: (Encoding e,Show a,?enc :: e) => a -> IO ()
|
||||||
print = hPrint stdout
|
print = hPrint stdout
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.hPrint', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
hPrint :: (Encoding e,Show a,?enc :: e) => Handle -> a -> IO ()
|
hPrint :: (Encoding e,Show a,?enc :: e) => Handle -> a -> IO ()
|
||||||
hPrint h x = hPutStrLn h (show x)
|
hPrint h x = hPutStrLn h (show x)
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.readFile', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
readFile :: (Encoding e,?enc :: e) => FilePath -> IO String
|
readFile :: (Encoding e,?enc :: e) => FilePath -> IO String
|
||||||
readFile fn = LBS.readFile fn >>= return.(decodeLazyByteString ?enc)
|
readFile fn = LBS.readFile fn >>= return.(decodeLazyByteString ?enc)
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.writeFile', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
writeFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO ()
|
writeFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO ()
|
||||||
writeFile fn str = LBS.writeFile fn $ encodeLazyByteString ?enc str
|
writeFile fn str = LBS.writeFile fn $ encodeLazyByteString ?enc str
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.appendFile', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
appendFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO ()
|
appendFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO ()
|
||||||
appendFile fn str = LBS.appendFile fn $ encodeLazyByteString ?enc str
|
appendFile fn str = LBS.appendFile fn $ encodeLazyByteString ?enc str
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.getChar', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
getChar :: (Encoding e,?enc :: e) => IO Char
|
getChar :: (Encoding e,?enc :: e) => IO Char
|
||||||
getChar = hGetChar stdin
|
getChar = hGetChar stdin
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.hGetChar', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
hGetChar :: (Encoding e,?enc :: e) => Handle -> IO Char
|
hGetChar :: (Encoding e,?enc :: e) => Handle -> IO Char
|
||||||
hGetChar h = runReaderT (decodeChar ?enc) h
|
hGetChar h = runReaderT (decodeChar ?enc) h
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.getLine', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
getLine :: (Encoding e,?enc :: e) => IO String
|
getLine :: (Encoding e,?enc :: e) => IO String
|
||||||
getLine = hGetLine stdin
|
getLine = hGetLine stdin
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.hGetLine', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
hGetLine :: (Encoding e,?enc :: e) => Handle -> IO String
|
hGetLine :: (Encoding e,?enc :: e) => Handle -> IO String
|
||||||
hGetLine h = do
|
hGetLine h = do
|
||||||
line <- BS.hGetLine h
|
line <- BS.hGetLine h
|
||||||
return $ decodeStrictByteString ?enc line
|
return $ decodeStrictByteString ?enc line
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.putChar', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
putChar :: (Encoding e,?enc :: e) => Char -> IO ()
|
putChar :: (Encoding e,?enc :: e) => Char -> IO ()
|
||||||
putChar = hPutChar stdout
|
putChar = hPutChar stdout
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.hPutChar', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
hPutChar :: (Encoding e,?enc :: e) => Handle -> Char -> IO ()
|
hPutChar :: (Encoding e,?enc :: e) => Handle -> Char -> IO ()
|
||||||
hPutChar h c = runReaderT (encodeChar ?enc c) h
|
hPutChar h c = runReaderT (encodeChar ?enc c) h
|
||||||
|
|
||||||
|
-- | Like the normal 'System.IO.interact', but decodes the input using an
|
||||||
|
-- encoding.
|
||||||
interact :: (Encoding e,?enc :: e) => (String -> String) -> IO ()
|
interact :: (Encoding e,?enc :: e) => (String -> String) -> IO ()
|
||||||
interact f = do
|
interact f = do
|
||||||
line <- hGetLine stdin
|
line <- hGetLine stdin
|
||||||
@ -133,7 +165,7 @@ interact f = do
|
|||||||
|
|
||||||
#ifdef SYSTEM_ENCODING
|
#ifdef SYSTEM_ENCODING
|
||||||
foreign import ccall "system_encoding.h get_system_encoding"
|
foreign import ccall "system_encoding.h get_system_encoding"
|
||||||
get_system_encoding :: IO CString
|
get_system_encoding :: IO CString
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
-- | Returns the encoding used on the current system. Currently only supported
|
-- | Returns the encoding used on the current system. Currently only supported
|
||||||
@ -141,9 +173,9 @@ foreign import ccall "system_encoding.h get_system_encoding"
|
|||||||
getSystemEncoding :: IO DynEncoding
|
getSystemEncoding :: IO DynEncoding
|
||||||
getSystemEncoding = do
|
getSystemEncoding = do
|
||||||
#ifdef SYSTEM_ENCODING
|
#ifdef SYSTEM_ENCODING
|
||||||
enc <- get_system_encoding
|
enc <- get_system_encoding
|
||||||
str <- peekCString enc
|
str <- peekCString enc
|
||||||
return $ encodingFromString str
|
return $ encodingFromString str
|
||||||
#else
|
#else
|
||||||
error "getSystemEncoding is not supported on this platform"
|
error "getSystemEncoding is not supported on this platform"
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -1,16 +1,16 @@
|
|||||||
Name: encoding
|
Name: encoding
|
||||||
Version: 0.8.1
|
Version: 0.8.1
|
||||||
Author: Henning Günther
|
Author: Henning Günther
|
||||||
Maintainer: daniel@wagner-home.com
|
Maintainer: daniel@wagner-home.com
|
||||||
License: BSD3
|
License: BSD3
|
||||||
License-File: LICENSE
|
License-File: LICENSE
|
||||||
Synopsis: A library for various character encodings
|
Synopsis: A library for various character encodings
|
||||||
Description:
|
Description:
|
||||||
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
|
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
|
||||||
Category: Codec
|
Category: Codec
|
||||||
Homepage: http://code.haskell.org/encoding/
|
Homepage: http://code.haskell.org/encoding/
|
||||||
Cabal-Version: >=1.6
|
Cabal-Version: >=1.8
|
||||||
Build-Type: Custom
|
Build-Type: Custom
|
||||||
Extra-Source-Files:
|
Extra-Source-Files:
|
||||||
CHANGELOG
|
CHANGELOG
|
||||||
Data/Encoding/Preprocessor/Mapping.hs
|
Data/Encoding/Preprocessor/Mapping.hs
|
||||||
@ -45,7 +45,7 @@ Custom-Setup
|
|||||||
Library
|
Library
|
||||||
Build-Depends: array,
|
Build-Depends: array,
|
||||||
base >=3 && <5,
|
base >=3 && <5,
|
||||||
binary < 0.8,
|
binary,
|
||||||
bytestring,
|
bytestring,
|
||||||
containers,
|
containers,
|
||||||
extensible-exceptions,
|
extensible-exceptions,
|
||||||
@ -132,3 +132,16 @@ Library
|
|||||||
C-Sources:
|
C-Sources:
|
||||||
system_encoding.c
|
system_encoding.c
|
||||||
CPP-Options: -DSYSTEM_ENCODING
|
CPP-Options: -DSYSTEM_ENCODING
|
||||||
|
|
||||||
|
test-suite encoding-test
|
||||||
|
type: exitcode-stdio-1.0
|
||||||
|
hs-source-dirs: tests
|
||||||
|
main-is: Main.hs
|
||||||
|
other-modules: Test.Tester
|
||||||
|
, Test.Tests
|
||||||
|
build-depends: base
|
||||||
|
, bytestring
|
||||||
|
, encoding
|
||||||
|
, HUnit
|
||||||
|
, QuickCheck
|
||||||
|
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||||
66
stack.yaml
Normal file
66
stack.yaml
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# This file was automatically generated by 'stack init'
|
||||||
|
#
|
||||||
|
# Some commonly used options have been documented as comments in this file.
|
||||||
|
# For advanced use and comprehensive documentation of the format, please see:
|
||||||
|
# http://docs.haskellstack.org/en/stable/yaml_configuration/
|
||||||
|
|
||||||
|
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
|
||||||
|
# A snapshot resolver dictates the compiler version and the set of packages
|
||||||
|
# to be used for project dependencies. For example:
|
||||||
|
#
|
||||||
|
# resolver: lts-3.5
|
||||||
|
# resolver: nightly-2015-09-21
|
||||||
|
# resolver: ghc-7.10.2
|
||||||
|
# resolver: ghcjs-0.1.0_ghc-7.10.2
|
||||||
|
# resolver:
|
||||||
|
# name: custom-snapshot
|
||||||
|
# location: "./custom-snapshot.yaml"
|
||||||
|
resolver: lts-8.22
|
||||||
|
|
||||||
|
# User packages to be built.
|
||||||
|
# Various formats can be used as shown in the example below.
|
||||||
|
#
|
||||||
|
# packages:
|
||||||
|
# - some-directory
|
||||||
|
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
|
||||||
|
# - location:
|
||||||
|
# git: https://github.com/commercialhaskell/stack.git
|
||||||
|
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
|
||||||
|
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
|
||||||
|
# extra-dep: true
|
||||||
|
# subdirs:
|
||||||
|
# - auto-update
|
||||||
|
# - wai
|
||||||
|
#
|
||||||
|
# A package marked 'extra-dep: true' will only be built if demanded by a
|
||||||
|
# non-dependency (i.e. a user package), and its test suites and benchmarks
|
||||||
|
# will not be run. This is useful for tweaking upstream packages.
|
||||||
|
packages:
|
||||||
|
- '.'
|
||||||
|
# Dependency packages to be pulled from upstream that are not in the resolver
|
||||||
|
# (e.g., acme-missiles-0.3)
|
||||||
|
extra-deps: []
|
||||||
|
|
||||||
|
# Override default flag values for local packages and extra-deps
|
||||||
|
flags: {}
|
||||||
|
|
||||||
|
# Extra package databases containing global packages
|
||||||
|
extra-package-dbs: []
|
||||||
|
|
||||||
|
# Control whether we use the GHC we find on the path
|
||||||
|
# system-ghc: true
|
||||||
|
#
|
||||||
|
# Require a specific version of stack, using version ranges
|
||||||
|
# require-stack-version: -any # Default
|
||||||
|
# require-stack-version: ">=1.4"
|
||||||
|
#
|
||||||
|
# Override the architecture used by stack, especially useful on Windows
|
||||||
|
# arch: i386
|
||||||
|
# arch: x86_64
|
||||||
|
#
|
||||||
|
# Extra directories used by stack for building
|
||||||
|
# extra-include-dirs: [/path/to/dir]
|
||||||
|
# extra-lib-dirs: [/path/to/dir]
|
||||||
|
#
|
||||||
|
# Allow a newer minor version of GHC than the snapshot specifies
|
||||||
|
# compiler-check: newer-minor
|
||||||
@ -3,16 +3,16 @@ import Test.HUnit
|
|||||||
import Test.Tests
|
import Test.Tests
|
||||||
|
|
||||||
hunitTests =
|
hunitTests =
|
||||||
[ ("utf8Tests", utf8Tests)
|
[ ("utf8Tests", utf8Tests)
|
||||||
, ("utf16Tests", utf16Tests)
|
, ("utf16Tests", utf16Tests)
|
||||||
, ("punycodeTests", punycodeTests)
|
, ("punycodeTests", punycodeTests)
|
||||||
, ("isoTests", isoTests)
|
, ("isoTests", isoTests)
|
||||||
, ("jisTests", jisTests)
|
, ("jisTests", jisTests)
|
||||||
, ("gb18030Tests", gb18030Tests)
|
, ("gb18030Tests", gb18030Tests)
|
||||||
]
|
]
|
||||||
|
|
||||||
main = do
|
main = do
|
||||||
identityTests
|
identityTests
|
||||||
forM_ hunitTests $ \(name, test) -> do
|
forM_ hunitTests $ \(name, test) -> do
|
||||||
putStrLn $ "running " ++ name
|
putStrLn $ "running " ++ name
|
||||||
runTestTT test >>= print
|
runTestTT test >>= print
|
||||||
|
|||||||
@ -13,14 +13,14 @@ import Prelude hiding (readFile)
|
|||||||
import System.IO.Encoding
|
import System.IO.Encoding
|
||||||
|
|
||||||
data EncodingTest
|
data EncodingTest
|
||||||
= forall enc. (Encoding enc,Show enc) =>
|
= forall enc. (Encoding enc,Show enc) =>
|
||||||
EncodingTest enc String [Word8]
|
EncodingTest enc String [Word8]
|
||||||
| forall enc. (Encoding enc,Show enc) =>
|
| forall enc. (Encoding enc,Show enc) =>
|
||||||
EncodingFileTest enc FilePath FilePath
|
EncodingFileTest enc FilePath FilePath
|
||||||
| forall enc. (Encoding enc,Show enc) =>
|
| forall enc. (Encoding enc,Show enc) =>
|
||||||
DecodingError enc [Word8] DecodingException
|
DecodingError enc [Word8] DecodingException
|
||||||
| forall enc. (Encoding enc,Show enc) =>
|
| forall enc. (Encoding enc,Show enc) =>
|
||||||
EncodingError enc String EncodingException
|
EncodingError enc String EncodingException
|
||||||
|
|
||||||
instance Testable EncodingTest where
|
instance Testable EncodingTest where
|
||||||
test (EncodingTest enc src trg)
|
test (EncodingTest enc src trg)
|
||||||
|
|||||||
@ -70,173 +70,173 @@ identityTests = do
|
|||||||
utf8Tests :: Test
|
utf8Tests :: Test
|
||||||
utf8Tests = TestList $ map test $ concat
|
utf8Tests = TestList $ map test $ concat
|
||||||
[[EncodingTest enc "\x0041\x2262\x0391\x002E"
|
[[EncodingTest enc "\x0041\x2262\x0391\x002E"
|
||||||
[0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E]
|
[0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E]
|
||||||
,EncodingTest enc "\xD55C\xAD6D\xC5B4"
|
,EncodingTest enc "\xD55C\xAD6D\xC5B4"
|
||||||
[0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4]
|
[0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4]
|
||||||
,EncodingTest enc "\x65E5\x672C\x8A9E"
|
,EncodingTest enc "\x65E5\x672C\x8A9E"
|
||||||
[0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E]
|
[0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E]
|
||||||
,EncodingTest enc "\x233B4"
|
,EncodingTest enc "\x233B4"
|
||||||
[0xF0,0xA3,0x8E,0xB4]
|
[0xF0,0xA3,0x8E,0xB4]
|
||||||
,EncodingTest enc ""
|
,EncodingTest enc ""
|
||||||
[]
|
[]
|
||||||
-- First possible sequence of a certain length
|
-- First possible sequence of a certain length
|
||||||
,EncodingTest enc "\x0000"
|
,EncodingTest enc "\x0000"
|
||||||
[0x00]
|
[0x00]
|
||||||
,EncodingTest enc "\x0080"
|
,EncodingTest enc "\x0080"
|
||||||
[0xC2,0x80]
|
[0xC2,0x80]
|
||||||
,EncodingTest enc "\x0800"
|
,EncodingTest enc "\x0800"
|
||||||
[0xE0,0xA0,0x80]
|
[0xE0,0xA0,0x80]
|
||||||
,EncodingTest enc "\x10000"
|
,EncodingTest enc "\x10000"
|
||||||
[0xF0,0x90,0x80,0x80]
|
[0xF0,0x90,0x80,0x80]
|
||||||
-- Last possible sequence of a certain length
|
-- Last possible sequence of a certain length
|
||||||
,EncodingTest enc "\x007F"
|
,EncodingTest enc "\x007F"
|
||||||
[0x7F]
|
[0x7F]
|
||||||
,EncodingTest enc "\x07FF"
|
,EncodingTest enc "\x07FF"
|
||||||
[0xDF,0xBF]
|
[0xDF,0xBF]
|
||||||
,EncodingTest enc "\xFFFF"
|
,EncodingTest enc "\xFFFF"
|
||||||
[0xEF,0xBF,0xBF]
|
[0xEF,0xBF,0xBF]
|
||||||
-- Other boundaries
|
-- Other boundaries
|
||||||
,EncodingTest enc "\xD7FF"
|
,EncodingTest enc "\xD7FF"
|
||||||
[0xED,0x9F,0xBF]
|
[0xED,0x9F,0xBF]
|
||||||
,EncodingTest enc "\xE000"
|
,EncodingTest enc "\xE000"
|
||||||
[0xEE,0x80,0x80]
|
[0xEE,0x80,0x80]
|
||||||
,EncodingTest enc "\xFFFD"
|
,EncodingTest enc "\xFFFD"
|
||||||
[0xEF,0xBF,0xBD]
|
[0xEF,0xBF,0xBD]
|
||||||
-- Illegal starting characters
|
-- Illegal starting characters
|
||||||
,DecodingError enc
|
,DecodingError enc
|
||||||
[0x65,0x55,0x85]
|
[0x65,0x55,0x85]
|
||||||
(IllegalCharacter 0x85)
|
(IllegalCharacter 0x85)
|
||||||
-- Unexpected end
|
-- Unexpected end
|
||||||
,DecodingError enc
|
,DecodingError enc
|
||||||
[0x41,0xE2,0x89,0xA2,0xCE]
|
[0x41,0xE2,0x89,0xA2,0xCE]
|
||||||
UnexpectedEnd
|
UnexpectedEnd
|
||||||
,DecodingError enc
|
,DecodingError enc
|
||||||
[0x41,0xE2,0x89]
|
[0x41,0xE2,0x89]
|
||||||
UnexpectedEnd
|
UnexpectedEnd
|
||||||
,DecodingError enc
|
,DecodingError enc
|
||||||
[0x41,0xE2]
|
[0x41,0xE2]
|
||||||
UnexpectedEnd
|
UnexpectedEnd
|
||||||
]
|
]
|
||||||
| enc <- [UTF8,UTF8Strict]
|
| enc <- [UTF8,UTF8Strict]
|
||||||
]++
|
]++
|
||||||
[DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE)
|
[DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE)
|
||||||
,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF)
|
,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF)
|
||||||
-- Overlong representations of '/'
|
-- Overlong representations of '/'
|
||||||
,DecodingError UTF8Strict [0xC0,0xAF]
|
,DecodingError UTF8Strict [0xC0,0xAF]
|
||||||
(IllegalRepresentation [0xC0,0xAF])
|
(IllegalRepresentation [0xC0,0xAF])
|
||||||
,DecodingError UTF8Strict [0xE0,0x80,0xAF]
|
,DecodingError UTF8Strict [0xE0,0x80,0xAF]
|
||||||
(IllegalRepresentation [0xE0,0x80,0xAF])
|
(IllegalRepresentation [0xE0,0x80,0xAF])
|
||||||
,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF]
|
,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF]
|
||||||
(IllegalRepresentation [0xF0,0x80,0x80,0xAF])
|
(IllegalRepresentation [0xF0,0x80,0x80,0xAF])
|
||||||
-- Maximum overlong sequences
|
-- Maximum overlong sequences
|
||||||
,DecodingError UTF8Strict [0xC1,0xBF]
|
,DecodingError UTF8Strict [0xC1,0xBF]
|
||||||
(IllegalRepresentation [0xC1,0xBF])
|
(IllegalRepresentation [0xC1,0xBF])
|
||||||
,DecodingError UTF8Strict [0xE0,0x9F,0xBF]
|
,DecodingError UTF8Strict [0xE0,0x9F,0xBF]
|
||||||
(IllegalRepresentation [0xE0,0x9F,0xBF])
|
(IllegalRepresentation [0xE0,0x9F,0xBF])
|
||||||
,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF]
|
,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF]
|
||||||
(IllegalRepresentation [0xF0,0x8F,0xBF,0xBF])
|
(IllegalRepresentation [0xF0,0x8F,0xBF,0xBF])
|
||||||
-- Overlong represenations of '\NUL'
|
-- Overlong represenations of '\NUL'
|
||||||
,DecodingError UTF8Strict [0xC0,0x80]
|
,DecodingError UTF8Strict [0xC0,0x80]
|
||||||
(IllegalRepresentation [0xC0,0x80])
|
(IllegalRepresentation [0xC0,0x80])
|
||||||
,DecodingError UTF8Strict [0xE0,0x80,0x80]
|
,DecodingError UTF8Strict [0xE0,0x80,0x80]
|
||||||
(IllegalRepresentation [0xE0,0x80,0x80])
|
(IllegalRepresentation [0xE0,0x80,0x80])
|
||||||
,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80]
|
,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80]
|
||||||
(IllegalRepresentation [0xF0,0x80,0x80,0x80])
|
(IllegalRepresentation [0xF0,0x80,0x80,0x80])
|
||||||
-- Invalid extends
|
-- Invalid extends
|
||||||
-- 2 of 2
|
-- 2 of 2
|
||||||
,DecodingError UTF8Strict [0xCC,0x1C,0xE0]
|
,DecodingError UTF8Strict [0xCC,0x1C,0xE0]
|
||||||
(IllegalCharacter 0x1C)
|
(IllegalCharacter 0x1C)
|
||||||
-- 2 of 3
|
-- 2 of 3
|
||||||
,DecodingError UTF8Strict [0xE3,0x6C,0xB3]
|
,DecodingError UTF8Strict [0xE3,0x6C,0xB3]
|
||||||
(IllegalCharacter 0x6C)
|
(IllegalCharacter 0x6C)
|
||||||
-- 3 of 3
|
-- 3 of 3
|
||||||
,DecodingError UTF8Strict [0xE3,0xB4,0x6D]
|
,DecodingError UTF8Strict [0xE3,0xB4,0x6D]
|
||||||
(IllegalCharacter 0x6D)
|
(IllegalCharacter 0x6D)
|
||||||
-- 2 of 4
|
-- 2 of 4
|
||||||
,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3]
|
,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3]
|
||||||
(IllegalCharacter 0x6C)
|
(IllegalCharacter 0x6C)
|
||||||
-- 3 of 4
|
-- 3 of 4
|
||||||
,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3]
|
,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3]
|
||||||
(IllegalCharacter 0x6C)
|
(IllegalCharacter 0x6C)
|
||||||
-- 4 of 4
|
-- 4 of 4
|
||||||
,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C]
|
,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C]
|
||||||
(IllegalCharacter 0x6C)
|
(IllegalCharacter 0x6C)
|
||||||
]
|
]
|
||||||
|
|
||||||
utf16Tests :: Test
|
utf16Tests :: Test
|
||||||
utf16Tests = TestList $ map test $
|
utf16Tests = TestList $ map test $
|
||||||
[EncodingTest UTF16BE "z"
|
[EncodingTest UTF16BE "z"
|
||||||
[0x00,0x7A]
|
[0x00,0x7A]
|
||||||
,EncodingTest UTF16BE "\x6C34"
|
,EncodingTest UTF16BE "\x6C34"
|
||||||
[0x6C,0x34]
|
[0x6C,0x34]
|
||||||
,EncodingTest UTF16BE "\x1D11E"
|
,EncodingTest UTF16BE "\x1D11E"
|
||||||
[0xD8,0x34,0xDD,0x1E]
|
[0xD8,0x34,0xDD,0x1E]
|
||||||
,EncodingTest UTF16 "\x6C34z\x1D11E"
|
,EncodingTest UTF16 "\x6C34z\x1D11E"
|
||||||
[0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E]
|
[0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E]
|
||||||
,EncodingTest UTF16BE "˨"
|
,EncodingTest UTF16BE "˨"
|
||||||
[0x02,0xE8]
|
[0x02,0xE8]
|
||||||
,DecodingError UTF16LE [0x65,0xDC]
|
,DecodingError UTF16LE [0x65,0xDC]
|
||||||
(IllegalCharacter 0xDC)
|
(IllegalCharacter 0xDC)
|
||||||
,DecodingError UTF16BE [0xDC,0x33]
|
,DecodingError UTF16BE [0xDC,0x33]
|
||||||
(IllegalCharacter 0xDC)
|
(IllegalCharacter 0xDC)
|
||||||
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33]
|
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33]
|
||||||
(IllegalCharacter 0xDA)
|
(IllegalCharacter 0xDA)
|
||||||
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66]
|
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66]
|
||||||
(IllegalCharacter 0xDA)
|
(IllegalCharacter 0xDA)
|
||||||
]
|
]
|
||||||
|
|
||||||
punycodeTests :: Test
|
punycodeTests :: Test
|
||||||
punycodeTests = TestList $ map test $
|
punycodeTests = TestList $ map test $
|
||||||
[EncodingTest punycode "abcdef"
|
[EncodingTest punycode "abcdef"
|
||||||
(map (fromIntegral.ord) "abcdef-")
|
(map (fromIntegral.ord) "abcdef-")
|
||||||
,EncodingTest punycode "abæcdöef"
|
,EncodingTest punycode "abæcdöef"
|
||||||
(map (fromIntegral.ord) "abcdef-qua4k")
|
(map (fromIntegral.ord) "abcdef-qua4k")
|
||||||
,EncodingTest punycode "schön"
|
,EncodingTest punycode "schön"
|
||||||
(map (fromIntegral.ord) "schn-7qa")
|
(map (fromIntegral.ord) "schn-7qa")
|
||||||
,EncodingTest punycode "ยจฆฟคฏข"
|
,EncodingTest punycode "ยจฆฟคฏข"
|
||||||
(map (fromIntegral.ord) "22cdfh1b8fsa")
|
(map (fromIntegral.ord) "22cdfh1b8fsa")
|
||||||
,EncodingTest punycode "☺"
|
,EncodingTest punycode "☺"
|
||||||
(map (fromIntegral.ord) "74h")
|
(map (fromIntegral.ord) "74h")
|
||||||
-- taken from http://tools.ietf.org/html/rfc3492#section-7
|
-- taken from http://tools.ietf.org/html/rfc3492#section-7
|
||||||
-- Arabic (Egyptian)
|
-- Arabic (Egyptian)
|
||||||
,punyTest "ليهمابتكلموشعربي؟"
|
,punyTest "ليهمابتكلموشعربي؟"
|
||||||
"egbpdaj6bu4bxfgehfvwxn"
|
"egbpdaj6bu4bxfgehfvwxn"
|
||||||
-- Chinese (simplified)
|
-- Chinese (simplified)
|
||||||
,punyTest "他们为什么不说中文"
|
,punyTest "他们为什么不说中文"
|
||||||
"ihqwcrb4cv8a8dqg056pqjye"
|
"ihqwcrb4cv8a8dqg056pqjye"
|
||||||
-- Chinese (traditional)
|
-- Chinese (traditional)
|
||||||
,punyTest "他們爲什麽不說中文"
|
,punyTest "他們爲什麽不說中文"
|
||||||
"ihqwctvzc91f659drss3x8bo0yb"
|
"ihqwctvzc91f659drss3x8bo0yb"
|
||||||
-- Czech
|
-- Czech
|
||||||
,punyTest "Pročprostěnemluvíčesky"
|
,punyTest "Pročprostěnemluvíčesky"
|
||||||
"Proprostnemluvesky-uyb24dma41a"
|
"Proprostnemluvesky-uyb24dma41a"
|
||||||
-- Hebrew
|
-- Hebrew
|
||||||
,punyTest "למההםפשוטלאמדבריםעברית"
|
,punyTest "למההםפשוטלאמדבריםעברית"
|
||||||
"4dbcagdahymbxekheh6e0a7fei0b"
|
"4dbcagdahymbxekheh6e0a7fei0b"
|
||||||
-- Hindi (Devanagari)
|
-- Hindi (Devanagari)
|
||||||
,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902"
|
,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902"
|
||||||
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
|
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
|
||||||
-- Japanese (kanji and hiragana)
|
-- Japanese (kanji and hiragana)
|
||||||
,punyTest "なぜみんな日本語を話してくれないのか"
|
,punyTest "なぜみんな日本語を話してくれないのか"
|
||||||
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
|
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
|
||||||
-- Korean (Hangul syllables)
|
-- Korean (Hangul syllables)
|
||||||
,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까"
|
,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까"
|
||||||
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
|
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
|
||||||
-- Russian (Cyrillic)
|
-- Russian (Cyrillic)
|
||||||
,punyTest "почемужеонинеговорятпорусски"
|
,punyTest "почемужеонинеговорятпорусски"
|
||||||
"b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there
|
"b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there
|
||||||
-- Spanish
|
-- Spanish
|
||||||
,punyTest "PorquénopuedensimplementehablarenEspañol"
|
,punyTest "PorquénopuedensimplementehablarenEspañol"
|
||||||
"PorqunopuedensimplementehablarenEspaol-fmd56a"
|
"PorqunopuedensimplementehablarenEspaol-fmd56a"
|
||||||
-- Vietnamese
|
-- Vietnamese
|
||||||
,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt"
|
,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt"
|
||||||
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
|
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
|
||||||
{-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\
|
{-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\
|
||||||
\bar\xE2\x80\x8B\xE2\x81\xA0\
|
\bar\xE2\x80\x8B\xE2\x81\xA0\
|
||||||
\baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF"
|
\baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF"
|
||||||
"foobarbaz"-}
|
"foobarbaz"-}
|
||||||
]
|
]
|
||||||
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
|
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
|
||||||
|
|
||||||
isoTests :: Test
|
isoTests :: Test
|
||||||
isoTests = TestList $ map test $
|
isoTests = TestList $ map test $
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user