Made package stack compatible

Created and added stack.yaml and .gitignore files.
Relaxed the version dependency on 'binary' package in cabal file.  Is that OK?
Also brought the minimum cabal version to >=1.8, so I could add a test target that pulls in the library.
Changed all tabs to spaces - I don't know when the Haskell compiler started giving warnings about that.
This commit is contained in:
Scott Sedgwick 2017-07-28 14:17:39 +10:00
parent 6284c1a677
commit 8727ac25a5
15 changed files with 478 additions and 345 deletions

22
.gitignore vendored Normal file
View File

@ -0,0 +1,22 @@
### Haskell ###
dist
dist-*
cabal-dev
*.o
*.hi
*.chi
*.chs.h
*.dyn_o
*.dyn_hi
.hpc
.hsenv
.cabal-sandbox/
cabal.sandbox.config
*.prof
*.aux
*.hp
*.eventlog
.stack-work/
cabal.project.local
.HTF/

View File

@ -359,6 +359,6 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of
-- | Takes the name of an encoding and creates a dynamic encoding from it. -- | Takes the name of an encoding and creates a dynamic encoding from it.
encodingFromString :: String -> DynEncoding encodingFromString :: String -> DynEncoding
encodingFromString str = maybe encodingFromString str = maybe
(error $ "Data.Encoding.encodingFromString: Unknown encoding: "++show str) (error $ "Data.Encoding.encodingFromString: Unknown encoding: "++show str)
id id
(encodingFromStringExplicit str) (encodingFromStringExplicit str)

View File

@ -2,8 +2,8 @@
{- | This implements BootString en- and decoding, the foundation of Punycode {- | This implements BootString en- and decoding, the foundation of Punycode
-} -}
module Data.Encoding.BootString module Data.Encoding.BootString
(BootString(..) (BootString(..)
,punycode) where ,punycode) where
import Data.Encoding.Base import Data.Encoding.Base
import Data.Encoding.Exception import Data.Encoding.Exception
@ -17,77 +17,77 @@ import Data.Typeable
import Control.Monad (when) import Control.Monad (when)
data BootString = BootString data BootString = BootString
{base :: Int {base :: Int
,tmin :: Int ,tmin :: Int
,tmax :: Int ,tmax :: Int
,skew :: Int ,skew :: Int
,damp :: Int ,damp :: Int
,init_bias :: Int ,init_bias :: Int
,init_n :: Int ,init_n :: Int
} }
deriving (Show,Eq,Typeable) deriving (Show,Eq,Typeable)
punycode :: BootString punycode :: BootString
punycode = BootString punycode = BootString
{base = 36 {base = 36
,tmin = 1 ,tmin = 1
,tmax = 26 ,tmax = 26
,skew = 38 ,skew = 38
,damp = 700 ,damp = 700
,init_bias = 72 ,init_bias = 72
,init_n = 0x80 ,init_n = 0x80
} }
punyValue :: ByteSource m => Word8 -> m Int punyValue :: ByteSource m => Word8 -> m Int
punyValue c punyValue c
| n < 0x30 = norep | n < 0x30 = norep
| n <= 0x39 = return $ n-0x30+26 | n <= 0x39 = return $ n-0x30+26
| n < 0x41 = norep | n < 0x41 = norep
| n <= 0x5A = return $ n-0x41 | n <= 0x5A = return $ n-0x41
| n < 0x61 = norep | n < 0x61 = norep
| n <= 0x7A = return $ n-0x61 | n <= 0x7A = return $ n-0x61
| otherwise = norep | otherwise = norep
where where
n = fromIntegral c n = fromIntegral c
norep = throwException (IllegalCharacter c) norep = throwException (IllegalCharacter c)
punyChar :: ByteSink m => Int -> m Word8 punyChar :: ByteSink m => Int -> m Word8
punyChar c punyChar c
| c < 0 = norep | c < 0 = norep
| c < 26 = return $ fromIntegral $ 0x61+c | c < 26 = return $ fromIntegral $ 0x61+c
| c < 36 = return $ fromIntegral $ 0x30+c-26 | c < 36 = return $ fromIntegral $ 0x30+c-26
| otherwise = norep | otherwise = norep
where where
norep = throwException (HasNoRepresentation (chr c)) norep = throwException (HasNoRepresentation (chr c))
getT :: BootString -> Int -> Int -> Int getT :: BootString -> Int -> Int -> Int
getT bs k bias getT bs k bias
| k <= bias + (tmin bs) = tmin bs | k <= bias + (tmin bs) = tmin bs
| k >= bias + (tmax bs) = tmax bs | k >= bias + (tmax bs) = tmax bs
| otherwise = k-bias | otherwise = k-bias
adapt :: BootString -> Int -> Int -> Bool -> Int adapt :: BootString -> Int -> Int -> Bool -> Int
adapt bs delta numpoints firsttime = let adapt bs delta numpoints firsttime = let
delta1 = if firsttime delta1 = if firsttime
then delta `div` (damp bs) then delta `div` (damp bs)
else delta `div` 2 else delta `div` 2
delta2 = delta1 + (delta1 `div` numpoints) delta2 = delta1 + (delta1 `div` numpoints)
(rd,rk) = head (rd,rk) = head
$ filter ((<=((base bs - tmin bs) * (tmax bs)) `div` 2).fst) $ filter ((<=((base bs - tmin bs) * (tmax bs)) `div` 2).fst)
$ iterate (\(d,k) -> (d `div` (base bs - tmin bs),k+(base bs))) (delta2,0) $ iterate (\(d,k) -> (d `div` (base bs - tmin bs),k+(base bs))) (delta2,0)
in rk + (((base bs - tmin bs +1) * rd) `div` (rd + skew bs)) in rk + (((base bs - tmin bs +1) * rd) `div` (rd + skew bs))
decodeValue :: ByteSource m => BootString -> Int -> Int -> Int -> Int -> [Int] -> m (Int,[Int]) decodeValue :: ByteSource m => BootString -> Int -> Int -> Int -> Int -> [Int] -> m (Int,[Int])
decodeValue bs bias i k w (x:xs) decodeValue bs bias i k w (x:xs)
| x >= base bs = throwException OutOfRange | x >= base bs = throwException OutOfRange
| x > (maxBound - i) `div` w = throwException OutOfRange | x > (maxBound - i) `div` w = throwException OutOfRange
| x < t = return (ni,xs) | x < t = return (ni,xs)
| w > maxBound `div` (base bs - t) = throwException OutOfRange | w > maxBound `div` (base bs - t) = throwException OutOfRange
| null xs = throwException OutOfRange | null xs = throwException OutOfRange
| otherwise = decodeValue bs bias ni (k+base bs) (w*(base bs - t)) xs | otherwise = decodeValue bs bias ni (k+base bs) (w*(base bs - t)) xs
where where
ni = i + x*w ni = i + x*w
t = getT bs k bias t = getT bs k bias
decodeValues :: ByteSource m => BootString -> Int -> [Int] -> m [(Char,Int)] decodeValues :: ByteSource m => BootString -> Int -> [Int] -> m [(Char,Int)]
decodeValues bs len xs = decodeValues' bs (init_n bs) 0 (init_bias bs) len xs decodeValues bs len xs = decodeValues' bs (init_n bs) 0 (init_bias bs) len xs
@ -108,8 +108,8 @@ decodeValues' bs n i bias len xs = do
insertDeltas :: [(a,Int)] -> [a] -> [a] insertDeltas :: [(a,Int)] -> [a] -> [a]
insertDeltas [] str = str insertDeltas [] str = str
insertDeltas ((c,p):xs) str = let insertDeltas ((c,p):xs) str = let
(l,r) = splitAt p str (l,r) = splitAt p str
in insertDeltas xs (l++[c]++r) in insertDeltas xs (l++[c]++r)
punyDecode :: ByteSource m => [Word8] -> [Word8] -> m String punyDecode :: ByteSource m => [Word8] -> [Word8] -> m String
punyDecode base ext = do punyDecode base ext = do
@ -119,32 +119,32 @@ punyDecode base ext = do
encodeValue :: BootString -> Int -> Int -> Int -> Int -> [Int] encodeValue :: BootString -> Int -> Int -> Int -> Int -> [Int]
encodeValue bs bias delta n c = unfoldr (\(q,k,out) -> let encodeValue bs bias delta n c = unfoldr (\(q,k,out) -> let
t = getT bs k bias t = getT bs k bias
(nq,dc) = (q-t) `divMod` (base bs - t) (nq,dc) = (q-t) `divMod` (base bs - t)
in if out in if out
then Nothing then Nothing
else (if q < t else (if q < t
then Just (q,(q,k+base bs,True)) then Just (q,(q,k+base bs,True))
else Just (t + dc,(nq,k+base bs,False))) else Just (t + dc,(nq,k+base bs,False)))
) (delta,base bs,False) ) (delta,base bs,False)
encodeValues' :: BootString -> Int -> Int -> Int -> Int -> Int -> [Int] -> (Int,Int,Int,[Int]) encodeValues' :: BootString -> Int -> Int -> Int -> Int -> Int -> [Int] -> (Int,Int,Int,[Int])
encodeValues' _ _ h bias delta _ [] = (delta,h,bias,[]) encodeValues' _ _ h bias delta _ [] = (delta,h,bias,[])
encodeValues' bs b h bias delta n (c:cs) = case compare c n of encodeValues' bs b h bias delta n (c:cs) = case compare c n of
LT -> encodeValues' bs b h bias (delta+1) n cs LT -> encodeValues' bs b h bias (delta+1) n cs
GT -> encodeValues' bs b h bias delta n cs GT -> encodeValues' bs b h bias delta n cs
EQ -> let EQ -> let
(ndelta,nh,nbias,rest) = encodeValues' bs b (h+1) (adapt bs delta (h+1) (h==b)) 0 n cs (ndelta,nh,nbias,rest) = encodeValues' bs b (h+1) (adapt bs delta (h+1) (h==b)) 0 n cs
xs = encodeValue bs bias delta n c xs = encodeValue bs bias delta n c
in (ndelta,nh,nbias,xs++rest) in (ndelta,nh,nbias,xs++rest)
encodeValues :: BootString -> Int -> Int -> Int -> Int -> Int -> Int -> [Int] -> [Int] encodeValues :: BootString -> Int -> Int -> Int -> Int -> Int -> Int -> [Int] -> [Int]
encodeValues bs b l h bias delta n cps encodeValues bs b l h bias delta n cps
| h == l = [] | h == l = []
| otherwise = outp++encodeValues bs b l nh nbias (ndelta+1) (m+1) cps | otherwise = outp++encodeValues bs b l nh nbias (ndelta+1) (m+1) cps
where where
m = minimum (filter (>=n) cps) m = minimum (filter (>=n) cps)
(ndelta,nh,nbias,outp) = encodeValues' bs b h bias (delta + (m - n)*(h + 1)) m cps (ndelta,nh,nbias,outp) = encodeValues' bs b h bias (delta + (m - n)*(h + 1)) m cps
breakLast :: (a -> Bool) -> [a] -> Maybe ([a],[a]) breakLast :: (a -> Bool) -> [a] -> Maybe ([a],[a])
breakLast p xs = do breakLast p xs = do
@ -166,7 +166,7 @@ instance Encoding BootString where
encodeChar _ c = error "Data.Encoding.BootString.encodeChar: Please use 'encode' for encoding BootStrings" encodeChar _ c = error "Data.Encoding.BootString.encodeChar: Please use 'encode' for encoding BootStrings"
decodeChar _ = error "Data.Encoding.BootString.decodeChar: Please use 'decode' for decoding BootStrings" decodeChar _ = error "Data.Encoding.BootString.decodeChar: Please use 'decode' for decoding BootStrings"
encode bs str = let (base,nbase) = partition (\c -> ord c < init_n bs) str encode bs str = let (base,nbase) = partition (\c -> ord c < init_n bs) str
b = length base b = length base
in do in do
res <- mapM punyChar $ encodeValues bs b (length str) b (init_bias bs) 0 (init_n bs) (map ord str) res <- mapM punyChar $ encodeValues bs b (length str) b (init_bias bs) 0 (init_n bs) (map ord str)
when (not $ null base) $ do when (not $ null base) $ do
@ -178,8 +178,8 @@ instance Encoding BootString where
let m = fromIntegral $ ord '-' let m = fromIntegral $ ord '-'
case breakLast (==m) wrds of case breakLast (==m) wrds of
Just ([],_) -> throwException (IllegalCharacter m) Just ([],_) -> throwException (IllegalCharacter m)
Just (base,_:nbase) -> case find (\w -> fromIntegral w > init_n bs) base of Just (base,_:nbase) -> case find (\w -> fromIntegral w > init_n bs) base of
Nothing -> punyDecode base nbase Nothing -> punyDecode base nbase
Just ww -> throwException (IllegalCharacter ww) Just ww -> throwException (IllegalCharacter ww)
Nothing -> punyDecode [] wrds Nothing -> punyDecode [] wrds
encodeable bs c = True -- XXX: hm, really? encodeable bs c = True -- XXX: hm, really?

View File

@ -9,8 +9,8 @@ import Control.Monad.Identity
-- | This exception type is thrown whenever something went wrong during the -- | This exception type is thrown whenever something went wrong during the
-- encoding-process. -- encoding-process.
data EncodingException data EncodingException
= HasNoRepresentation Char -- ^ Thrown if a specific character = HasNoRepresentation Char -- ^ Thrown if a specific character
-- is not representable in an encoding. -- is not representable in an encoding.
deriving (Eq,Ord,Show,Read,Typeable) deriving (Eq,Ord,Show,Read,Typeable)
instance Exception EncodingException instance Exception EncodingException
@ -18,13 +18,13 @@ instance Exception EncodingException
-- | This exception type is thrown whenever something went wrong during the -- | This exception type is thrown whenever something went wrong during the
-- decoding-process. -- decoding-process.
data DecodingException data DecodingException
= IllegalCharacter Word8 -- ^ The sequence contained an illegal = IllegalCharacter Word8 -- ^ The sequence contained an illegal
-- byte that couldn't be decoded. -- byte that couldn't be decoded.
| UnexpectedEnd -- ^ more bytes were needed to allow a | UnexpectedEnd -- ^ more bytes were needed to allow a
-- successfull decoding. -- successfull decoding.
| OutOfRange -- ^ the decoded value was out of the unicode range | OutOfRange -- ^ the decoded value was out of the unicode range
| IllegalRepresentation [Word8] -- ^ The character sequence encodes a | IllegalRepresentation [Word8] -- ^ The character sequence encodes a
-- character, but is illegal. -- character, but is illegal.
deriving (Eq,Ord,Show,Read,Typeable) deriving (Eq,Ord,Show,Read,Typeable)
instance Exception DecodingException instance Exception DecodingException

View File

@ -3,7 +3,7 @@
See <http://en.wikipedia.org/wiki/KOI8-R> for more information. See <http://en.wikipedia.org/wiki/KOI8-R> for more information.
-} -}
module Data.Encoding.KOI8R module Data.Encoding.KOI8R
(KOI8R(..)) where (KOI8R(..)) where
import Control.Throws import Control.Throws
import Data.Array.Unboxed import Data.Array.Unboxed
@ -28,23 +28,23 @@ koi8rMap = fromList (zip koi8rList [128..])
koi8rList :: [Char] koi8rList :: [Char]
koi8rList = koi8rList =
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524' ['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590' ,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248' ,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7' ,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556' ,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556'
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e' ,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e'
,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565' ,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565'
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9' ,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9'
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433' ,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e' ,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432' ,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a' ,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413' ,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e' ,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412' ,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a' ,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
] ]
instance Encoding KOI8R where instance Encoding KOI8R where
decodeChar _ = do decodeChar _ = do
@ -53,8 +53,8 @@ instance Encoding KOI8R where
then return $ chr $ fromIntegral w then return $ chr $ fromIntegral w
else return $ koi8rArr!w else return $ koi8rArr!w
encodeChar _ ch encodeChar _ ch
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch | ch < '\128' = pushWord8 $ fromIntegral $ ord ch
| otherwise = case lookup ch koi8rMap of | otherwise = case lookup ch koi8rMap of
Just w -> pushWord8 w Just w -> pushWord8 w
Nothing -> throwException (HasNoRepresentation ch) Nothing -> throwException (HasNoRepresentation ch)
encodeable _ c = member c koi8rMap encodeable _ c = member c koi8rMap

View File

@ -3,7 +3,7 @@
See <http://en.wikipedia.org/wiki/KOI8-U> for more information. See <http://en.wikipedia.org/wiki/KOI8-U> for more information.
-} -}
module Data.Encoding.KOI8U module Data.Encoding.KOI8U
(KOI8U(..)) where (KOI8U(..)) where
import Data.Encoding.Base import Data.Encoding.Base
import Data.Encoding.ByteSource import Data.Encoding.ByteSource
@ -28,23 +28,23 @@ koi8uMap = fromList (zip koi8uList [128..])
koi8uList :: [Char] koi8uList :: [Char]
koi8uList = koi8uList =
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524' ['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590' ,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248' ,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7' ,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457' ,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457'
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e' ,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e'
,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407' ,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407'
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9' ,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9'
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433' ,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e' ,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432' ,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a' ,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413' ,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e' ,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412' ,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a' ,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
] ]
instance Encoding KOI8U where instance Encoding KOI8U where
decodeChar _ = do decodeChar _ = do
@ -53,8 +53,8 @@ instance Encoding KOI8U where
then return $ chr $ fromIntegral w then return $ chr $ fromIntegral w
else return $ koi8uArr!w else return $ koi8uArr!w
encodeChar _ ch encodeChar _ ch
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch | ch < '\128' = pushWord8 $ fromIntegral $ ord ch
| otherwise = case lookup ch koi8uMap of | otherwise = case lookup ch koi8uMap of
Just w -> pushWord8 w Just w -> pushWord8 w
Nothing -> throwException (HasNoRepresentation ch) Nothing -> throwException (HasNoRepresentation ch)
encodeable _ c = member c koi8uMap encodeable _ c = member c koi8uMap

View File

@ -47,14 +47,14 @@ data CharacterMapping_normalization = CharacterMapping_normalization_undetermine
deriving (Eq,Show) deriving (Eq,Show)
data Stateful_siso = Stateful_siso Validity Validity data Stateful_siso = Stateful_siso Validity Validity
deriving (Eq,Show) deriving (Eq,Show)
newtype History = History (List1 Modified) deriving (Eq,Show) newtype History = History (List1 Modified) deriving (Eq,Show)
data Modified = Modified Modified_Attrs String data Modified = Modified Modified_Attrs String
deriving (Eq,Show) deriving (Eq,Show)
data Modified_Attrs = Modified_Attrs data Modified_Attrs = Modified_Attrs
{ modifiedVersion :: String { modifiedVersion :: String
, modifiedDate :: String , modifiedDate :: String
} deriving (Eq,Show) } deriving (Eq,Show)
newtype Validity = Validity (List1 State) deriving (Eq,Show) newtype Validity = Validity (List1 State) deriving (Eq,Show)
data State = State data State = State
{ stateType :: String { stateType :: String
, stateNext :: String , stateNext :: String
@ -112,10 +112,10 @@ data Escape = Escape
{ escapeSequence :: String { escapeSequence :: String
, escapeName :: String , escapeName :: String
} deriving (Eq,Show) } deriving (Eq,Show)
newtype Si = Si (List1 Designator) deriving (Eq,Show) newtype Si = Si (List1 Designator) deriving (Eq,Show)
newtype So = So (List1 Designator) deriving (Eq,Show) newtype So = So (List1 Designator) deriving (Eq,Show)
newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show) newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show)
newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show) newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show)
data Designator = Designator data Designator = Designator
{ designatorSequence :: String { designatorSequence :: String
, designatorName :: String , designatorName :: String

View File

@ -18,9 +18,9 @@ import Data.Typeable
import Data.Word import Data.Word
data UTF16 data UTF16
= UTF16 -- ^ Decodes big and little endian, encodes big endian. = UTF16 -- ^ Decodes big and little endian, encodes big endian.
| UTF16BE -- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian. | UTF16BE -- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian.
| UTF16LE -- ^ Little endian decoding and encoding. | UTF16LE -- ^ Little endian decoding and encoding.
deriving (Eq,Show,Typeable) deriving (Eq,Show,Typeable)
readBOM :: ByteSource m => m (Either Char UTF16) readBOM :: ByteSource m => m (Either Char UTF16)

View File

@ -16,10 +16,10 @@ import Data.Typeable
data UTF32 data UTF32
= UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present. = UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present.
| UTF32BE -- ^ Encodes and decodes using the big endian encoding. | UTF32BE -- ^ Encodes and decodes using the big endian encoding.
| UTF32LE -- ^ Encodes and decodes using the little endian encoding. | UTF32LE -- ^ Encodes and decodes using the little endian encoding.
deriving (Eq,Show,Typeable) deriving (Eq,Show,Typeable)
instance Encoding UTF32 where instance Encoding UTF32 where
encodeChar UTF32LE ch = pushWord32le (fromIntegral $ ord ch) encodeChar UTF32LE ch = pushWord32le (fromIntegral $ ord ch)

View File

@ -67,17 +67,23 @@ import Control.Monad.Reader (runReaderT)
-- encoding. -- encoding.
hGetContents :: (Encoding e,?enc :: e) => Handle -> IO String hGetContents :: (Encoding e,?enc :: e) => Handle -> IO String
hGetContents h = do hGetContents h = do
str <- LBS.hGetContents h str <- LBS.hGetContents h
return $ decodeLazyByteString ?enc str return $ decodeLazyByteString ?enc str
-- | Like the normal 'System.IO.getContents', but decodes the input using an
-- encoding.
getContents :: (Encoding e,?enc :: e) => IO String getContents :: (Encoding e,?enc :: e) => IO String
getContents = do getContents = do
str <- LBS.getContents str <- LBS.getContents
return $ decodeLazyByteString ?enc str return $ decodeLazyByteString ?enc str
-- | Like the normal 'System.IO.putStr', but decodes the input using an
-- encoding.
putStr :: (Encoding e,?enc :: e) => String -> IO () putStr :: (Encoding e,?enc :: e) => String -> IO ()
putStr = hPutStr stdout putStr = hPutStr stdout
-- | Like the normal 'System.IO.putStrLn', but decodes the input using an
-- encoding.
putStrLn :: (Encoding e,?enc :: e) => String -> IO () putStrLn :: (Encoding e,?enc :: e) => String -> IO ()
putStrLn = hPutStrLn stdout putStrLn = hPutStrLn stdout
@ -86,46 +92,72 @@ putStrLn = hPutStrLn stdout
hPutStr :: (Encoding e,?enc :: e) => Handle -> String -> IO () hPutStr :: (Encoding e,?enc :: e) => Handle -> String -> IO ()
hPutStr h str = LBS.hPut h (encodeLazyByteString ?enc str) hPutStr h str = LBS.hPut h (encodeLazyByteString ?enc str)
-- | Like the normal 'System.IO.hPutStrLn', but decodes the input using an
-- encoding.
hPutStrLn :: (Encoding e,?enc :: e) => Handle -> String -> IO () hPutStrLn :: (Encoding e,?enc :: e) => Handle -> String -> IO ()
hPutStrLn h str = do hPutStrLn h str = do
LBS.hPut h (encodeLazyByteString ?enc str) LBS.hPut h (encodeLazyByteString ?enc str)
LBS.hPut h (encodeLazyByteString ?enc "\n") LBS.hPut h (encodeLazyByteString ?enc "\n")
-- | Like the normal 'System.IO.print', but decodes the input using an
-- encoding.
print :: (Encoding e,Show a,?enc :: e) => a -> IO () print :: (Encoding e,Show a,?enc :: e) => a -> IO ()
print = hPrint stdout print = hPrint stdout
-- | Like the normal 'System.IO.hPrint', but decodes the input using an
-- encoding.
hPrint :: (Encoding e,Show a,?enc :: e) => Handle -> a -> IO () hPrint :: (Encoding e,Show a,?enc :: e) => Handle -> a -> IO ()
hPrint h x = hPutStrLn h (show x) hPrint h x = hPutStrLn h (show x)
-- | Like the normal 'System.IO.readFile', but decodes the input using an
-- encoding.
readFile :: (Encoding e,?enc :: e) => FilePath -> IO String readFile :: (Encoding e,?enc :: e) => FilePath -> IO String
readFile fn = LBS.readFile fn >>= return.(decodeLazyByteString ?enc) readFile fn = LBS.readFile fn >>= return.(decodeLazyByteString ?enc)
-- | Like the normal 'System.IO.writeFile', but decodes the input using an
-- encoding.
writeFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO () writeFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO ()
writeFile fn str = LBS.writeFile fn $ encodeLazyByteString ?enc str writeFile fn str = LBS.writeFile fn $ encodeLazyByteString ?enc str
-- | Like the normal 'System.IO.appendFile', but decodes the input using an
-- encoding.
appendFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO () appendFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO ()
appendFile fn str = LBS.appendFile fn $ encodeLazyByteString ?enc str appendFile fn str = LBS.appendFile fn $ encodeLazyByteString ?enc str
-- | Like the normal 'System.IO.getChar', but decodes the input using an
-- encoding.
getChar :: (Encoding e,?enc :: e) => IO Char getChar :: (Encoding e,?enc :: e) => IO Char
getChar = hGetChar stdin getChar = hGetChar stdin
-- | Like the normal 'System.IO.hGetChar', but decodes the input using an
-- encoding.
hGetChar :: (Encoding e,?enc :: e) => Handle -> IO Char hGetChar :: (Encoding e,?enc :: e) => Handle -> IO Char
hGetChar h = runReaderT (decodeChar ?enc) h hGetChar h = runReaderT (decodeChar ?enc) h
-- | Like the normal 'System.IO.getLine', but decodes the input using an
-- encoding.
getLine :: (Encoding e,?enc :: e) => IO String getLine :: (Encoding e,?enc :: e) => IO String
getLine = hGetLine stdin getLine = hGetLine stdin
-- | Like the normal 'System.IO.hGetLine', but decodes the input using an
-- encoding.
hGetLine :: (Encoding e,?enc :: e) => Handle -> IO String hGetLine :: (Encoding e,?enc :: e) => Handle -> IO String
hGetLine h = do hGetLine h = do
line <- BS.hGetLine h line <- BS.hGetLine h
return $ decodeStrictByteString ?enc line return $ decodeStrictByteString ?enc line
-- | Like the normal 'System.IO.putChar', but decodes the input using an
-- encoding.
putChar :: (Encoding e,?enc :: e) => Char -> IO () putChar :: (Encoding e,?enc :: e) => Char -> IO ()
putChar = hPutChar stdout putChar = hPutChar stdout
-- | Like the normal 'System.IO.hPutChar', but decodes the input using an
-- encoding.
hPutChar :: (Encoding e,?enc :: e) => Handle -> Char -> IO () hPutChar :: (Encoding e,?enc :: e) => Handle -> Char -> IO ()
hPutChar h c = runReaderT (encodeChar ?enc c) h hPutChar h c = runReaderT (encodeChar ?enc c) h
-- | Like the normal 'System.IO.interact', but decodes the input using an
-- encoding.
interact :: (Encoding e,?enc :: e) => (String -> String) -> IO () interact :: (Encoding e,?enc :: e) => (String -> String) -> IO ()
interact f = do interact f = do
line <- hGetLine stdin line <- hGetLine stdin
@ -133,7 +165,7 @@ interact f = do
#ifdef SYSTEM_ENCODING #ifdef SYSTEM_ENCODING
foreign import ccall "system_encoding.h get_system_encoding" foreign import ccall "system_encoding.h get_system_encoding"
get_system_encoding :: IO CString get_system_encoding :: IO CString
#endif #endif
-- | Returns the encoding used on the current system. Currently only supported -- | Returns the encoding used on the current system. Currently only supported
@ -141,9 +173,9 @@ foreign import ccall "system_encoding.h get_system_encoding"
getSystemEncoding :: IO DynEncoding getSystemEncoding :: IO DynEncoding
getSystemEncoding = do getSystemEncoding = do
#ifdef SYSTEM_ENCODING #ifdef SYSTEM_ENCODING
enc <- get_system_encoding enc <- get_system_encoding
str <- peekCString enc str <- peekCString enc
return $ encodingFromString str return $ encodingFromString str
#else #else
error "getSystemEncoding is not supported on this platform" error "getSystemEncoding is not supported on this platform"
#endif #endif

View File

@ -1,16 +1,16 @@
Name: encoding Name: encoding
Version: 0.8.1 Version: 0.8.1
Author: Henning Günther Author: Henning Günther
Maintainer: daniel@wagner-home.com Maintainer: daniel@wagner-home.com
License: BSD3 License: BSD3
License-File: LICENSE License-File: LICENSE
Synopsis: A library for various character encodings Synopsis: A library for various character encodings
Description: Description:
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that. Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
Category: Codec Category: Codec
Homepage: http://code.haskell.org/encoding/ Homepage: http://code.haskell.org/encoding/
Cabal-Version: >=1.6 Cabal-Version: >=1.8
Build-Type: Custom Build-Type: Custom
Extra-Source-Files: Extra-Source-Files:
CHANGELOG CHANGELOG
Data/Encoding/Preprocessor/Mapping.hs Data/Encoding/Preprocessor/Mapping.hs
@ -45,7 +45,7 @@ Custom-Setup
Library Library
Build-Depends: array, Build-Depends: array,
base >=3 && <5, base >=3 && <5,
binary < 0.8, binary,
bytestring, bytestring,
containers, containers,
extensible-exceptions, extensible-exceptions,
@ -132,3 +132,16 @@ Library
C-Sources: C-Sources:
system_encoding.c system_encoding.c
CPP-Options: -DSYSTEM_ENCODING CPP-Options: -DSYSTEM_ENCODING
test-suite encoding-test
type: exitcode-stdio-1.0
hs-source-dirs: tests
main-is: Main.hs
other-modules: Test.Tester
, Test.Tests
build-depends: base
, bytestring
, encoding
, HUnit
, QuickCheck
ghc-options: -threaded -rtsopts -with-rtsopts=-N

66
stack.yaml Normal file
View File

@ -0,0 +1,66 @@
# This file was automatically generated by 'stack init'
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# http://docs.haskellstack.org/en/stable/yaml_configuration/
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
# resolver: ghcjs-0.1.0_ghc-7.10.2
# resolver:
# name: custom-snapshot
# location: "./custom-snapshot.yaml"
resolver: lts-8.22
# User packages to be built.
# Various formats can be used as shown in the example below.
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
# - location:
# git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# extra-dep: true
# subdirs:
# - auto-update
# - wai
#
# A package marked 'extra-dep: true' will only be built if demanded by a
# non-dependency (i.e. a user package), and its test suites and benchmarks
# will not be run. This is useful for tweaking upstream packages.
packages:
- '.'
# Dependency packages to be pulled from upstream that are not in the resolver
# (e.g., acme-missiles-0.3)
extra-deps: []
# Override default flag values for local packages and extra-deps
flags: {}
# Extra package databases containing global packages
extra-package-dbs: []
# Control whether we use the GHC we find on the path
# system-ghc: true
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=1.4"
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor

View File

@ -3,16 +3,16 @@ import Test.HUnit
import Test.Tests import Test.Tests
hunitTests = hunitTests =
[ ("utf8Tests", utf8Tests) [ ("utf8Tests", utf8Tests)
, ("utf16Tests", utf16Tests) , ("utf16Tests", utf16Tests)
, ("punycodeTests", punycodeTests) , ("punycodeTests", punycodeTests)
, ("isoTests", isoTests) , ("isoTests", isoTests)
, ("jisTests", jisTests) , ("jisTests", jisTests)
, ("gb18030Tests", gb18030Tests) , ("gb18030Tests", gb18030Tests)
] ]
main = do main = do
identityTests identityTests
forM_ hunitTests $ \(name, test) -> do forM_ hunitTests $ \(name, test) -> do
putStrLn $ "running " ++ name putStrLn $ "running " ++ name
runTestTT test >>= print runTestTT test >>= print

View File

@ -13,14 +13,14 @@ import Prelude hiding (readFile)
import System.IO.Encoding import System.IO.Encoding
data EncodingTest data EncodingTest
= forall enc. (Encoding enc,Show enc) => = forall enc. (Encoding enc,Show enc) =>
EncodingTest enc String [Word8] EncodingTest enc String [Word8]
| forall enc. (Encoding enc,Show enc) => | forall enc. (Encoding enc,Show enc) =>
EncodingFileTest enc FilePath FilePath EncodingFileTest enc FilePath FilePath
| forall enc. (Encoding enc,Show enc) => | forall enc. (Encoding enc,Show enc) =>
DecodingError enc [Word8] DecodingException DecodingError enc [Word8] DecodingException
| forall enc. (Encoding enc,Show enc) => | forall enc. (Encoding enc,Show enc) =>
EncodingError enc String EncodingException EncodingError enc String EncodingException
instance Testable EncodingTest where instance Testable EncodingTest where
test (EncodingTest enc src trg) test (EncodingTest enc src trg)

View File

@ -70,173 +70,173 @@ identityTests = do
utf8Tests :: Test utf8Tests :: Test
utf8Tests = TestList $ map test $ concat utf8Tests = TestList $ map test $ concat
[[EncodingTest enc "\x0041\x2262\x0391\x002E" [[EncodingTest enc "\x0041\x2262\x0391\x002E"
[0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E] [0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E]
,EncodingTest enc "\xD55C\xAD6D\xC5B4" ,EncodingTest enc "\xD55C\xAD6D\xC5B4"
[0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4] [0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4]
,EncodingTest enc "\x65E5\x672C\x8A9E" ,EncodingTest enc "\x65E5\x672C\x8A9E"
[0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E] [0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E]
,EncodingTest enc "\x233B4" ,EncodingTest enc "\x233B4"
[0xF0,0xA3,0x8E,0xB4] [0xF0,0xA3,0x8E,0xB4]
,EncodingTest enc "" ,EncodingTest enc ""
[] []
-- First possible sequence of a certain length -- First possible sequence of a certain length
,EncodingTest enc "\x0000" ,EncodingTest enc "\x0000"
[0x00] [0x00]
,EncodingTest enc "\x0080" ,EncodingTest enc "\x0080"
[0xC2,0x80] [0xC2,0x80]
,EncodingTest enc "\x0800" ,EncodingTest enc "\x0800"
[0xE0,0xA0,0x80] [0xE0,0xA0,0x80]
,EncodingTest enc "\x10000" ,EncodingTest enc "\x10000"
[0xF0,0x90,0x80,0x80] [0xF0,0x90,0x80,0x80]
-- Last possible sequence of a certain length -- Last possible sequence of a certain length
,EncodingTest enc "\x007F" ,EncodingTest enc "\x007F"
[0x7F] [0x7F]
,EncodingTest enc "\x07FF" ,EncodingTest enc "\x07FF"
[0xDF,0xBF] [0xDF,0xBF]
,EncodingTest enc "\xFFFF" ,EncodingTest enc "\xFFFF"
[0xEF,0xBF,0xBF] [0xEF,0xBF,0xBF]
-- Other boundaries -- Other boundaries
,EncodingTest enc "\xD7FF" ,EncodingTest enc "\xD7FF"
[0xED,0x9F,0xBF] [0xED,0x9F,0xBF]
,EncodingTest enc "\xE000" ,EncodingTest enc "\xE000"
[0xEE,0x80,0x80] [0xEE,0x80,0x80]
,EncodingTest enc "\xFFFD" ,EncodingTest enc "\xFFFD"
[0xEF,0xBF,0xBD] [0xEF,0xBF,0xBD]
-- Illegal starting characters -- Illegal starting characters
,DecodingError enc ,DecodingError enc
[0x65,0x55,0x85] [0x65,0x55,0x85]
(IllegalCharacter 0x85) (IllegalCharacter 0x85)
-- Unexpected end -- Unexpected end
,DecodingError enc ,DecodingError enc
[0x41,0xE2,0x89,0xA2,0xCE] [0x41,0xE2,0x89,0xA2,0xCE]
UnexpectedEnd UnexpectedEnd
,DecodingError enc ,DecodingError enc
[0x41,0xE2,0x89] [0x41,0xE2,0x89]
UnexpectedEnd UnexpectedEnd
,DecodingError enc ,DecodingError enc
[0x41,0xE2] [0x41,0xE2]
UnexpectedEnd UnexpectedEnd
] ]
| enc <- [UTF8,UTF8Strict] | enc <- [UTF8,UTF8Strict]
]++ ]++
[DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE) [DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE)
,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF) ,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF)
-- Overlong representations of '/' -- Overlong representations of '/'
,DecodingError UTF8Strict [0xC0,0xAF] ,DecodingError UTF8Strict [0xC0,0xAF]
(IllegalRepresentation [0xC0,0xAF]) (IllegalRepresentation [0xC0,0xAF])
,DecodingError UTF8Strict [0xE0,0x80,0xAF] ,DecodingError UTF8Strict [0xE0,0x80,0xAF]
(IllegalRepresentation [0xE0,0x80,0xAF]) (IllegalRepresentation [0xE0,0x80,0xAF])
,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF] ,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF]
(IllegalRepresentation [0xF0,0x80,0x80,0xAF]) (IllegalRepresentation [0xF0,0x80,0x80,0xAF])
-- Maximum overlong sequences -- Maximum overlong sequences
,DecodingError UTF8Strict [0xC1,0xBF] ,DecodingError UTF8Strict [0xC1,0xBF]
(IllegalRepresentation [0xC1,0xBF]) (IllegalRepresentation [0xC1,0xBF])
,DecodingError UTF8Strict [0xE0,0x9F,0xBF] ,DecodingError UTF8Strict [0xE0,0x9F,0xBF]
(IllegalRepresentation [0xE0,0x9F,0xBF]) (IllegalRepresentation [0xE0,0x9F,0xBF])
,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF] ,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF]
(IllegalRepresentation [0xF0,0x8F,0xBF,0xBF]) (IllegalRepresentation [0xF0,0x8F,0xBF,0xBF])
-- Overlong represenations of '\NUL' -- Overlong represenations of '\NUL'
,DecodingError UTF8Strict [0xC0,0x80] ,DecodingError UTF8Strict [0xC0,0x80]
(IllegalRepresentation [0xC0,0x80]) (IllegalRepresentation [0xC0,0x80])
,DecodingError UTF8Strict [0xE0,0x80,0x80] ,DecodingError UTF8Strict [0xE0,0x80,0x80]
(IllegalRepresentation [0xE0,0x80,0x80]) (IllegalRepresentation [0xE0,0x80,0x80])
,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80] ,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80]
(IllegalRepresentation [0xF0,0x80,0x80,0x80]) (IllegalRepresentation [0xF0,0x80,0x80,0x80])
-- Invalid extends -- Invalid extends
-- 2 of 2 -- 2 of 2
,DecodingError UTF8Strict [0xCC,0x1C,0xE0] ,DecodingError UTF8Strict [0xCC,0x1C,0xE0]
(IllegalCharacter 0x1C) (IllegalCharacter 0x1C)
-- 2 of 3 -- 2 of 3
,DecodingError UTF8Strict [0xE3,0x6C,0xB3] ,DecodingError UTF8Strict [0xE3,0x6C,0xB3]
(IllegalCharacter 0x6C) (IllegalCharacter 0x6C)
-- 3 of 3 -- 3 of 3
,DecodingError UTF8Strict [0xE3,0xB4,0x6D] ,DecodingError UTF8Strict [0xE3,0xB4,0x6D]
(IllegalCharacter 0x6D) (IllegalCharacter 0x6D)
-- 2 of 4 -- 2 of 4
,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3] ,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3]
(IllegalCharacter 0x6C) (IllegalCharacter 0x6C)
-- 3 of 4 -- 3 of 4
,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3] ,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3]
(IllegalCharacter 0x6C) (IllegalCharacter 0x6C)
-- 4 of 4 -- 4 of 4
,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C] ,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C]
(IllegalCharacter 0x6C) (IllegalCharacter 0x6C)
] ]
utf16Tests :: Test utf16Tests :: Test
utf16Tests = TestList $ map test $ utf16Tests = TestList $ map test $
[EncodingTest UTF16BE "z" [EncodingTest UTF16BE "z"
[0x00,0x7A] [0x00,0x7A]
,EncodingTest UTF16BE "\x6C34" ,EncodingTest UTF16BE "\x6C34"
[0x6C,0x34] [0x6C,0x34]
,EncodingTest UTF16BE "\x1D11E" ,EncodingTest UTF16BE "\x1D11E"
[0xD8,0x34,0xDD,0x1E] [0xD8,0x34,0xDD,0x1E]
,EncodingTest UTF16 "\x6C34z\x1D11E" ,EncodingTest UTF16 "\x6C34z\x1D11E"
[0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E] [0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E]
,EncodingTest UTF16BE "˨" ,EncodingTest UTF16BE "˨"
[0x02,0xE8] [0x02,0xE8]
,DecodingError UTF16LE [0x65,0xDC] ,DecodingError UTF16LE [0x65,0xDC]
(IllegalCharacter 0xDC) (IllegalCharacter 0xDC)
,DecodingError UTF16BE [0xDC,0x33] ,DecodingError UTF16BE [0xDC,0x33]
(IllegalCharacter 0xDC) (IllegalCharacter 0xDC)
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33] ,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33]
(IllegalCharacter 0xDA) (IllegalCharacter 0xDA)
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66] ,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66]
(IllegalCharacter 0xDA) (IllegalCharacter 0xDA)
] ]
punycodeTests :: Test punycodeTests :: Test
punycodeTests = TestList $ map test $ punycodeTests = TestList $ map test $
[EncodingTest punycode "abcdef" [EncodingTest punycode "abcdef"
(map (fromIntegral.ord) "abcdef-") (map (fromIntegral.ord) "abcdef-")
,EncodingTest punycode "abæcdöef" ,EncodingTest punycode "abæcdöef"
(map (fromIntegral.ord) "abcdef-qua4k") (map (fromIntegral.ord) "abcdef-qua4k")
,EncodingTest punycode "schön" ,EncodingTest punycode "schön"
(map (fromIntegral.ord) "schn-7qa") (map (fromIntegral.ord) "schn-7qa")
,EncodingTest punycode "ยจฆฟคฏข" ,EncodingTest punycode "ยจฆฟคฏข"
(map (fromIntegral.ord) "22cdfh1b8fsa") (map (fromIntegral.ord) "22cdfh1b8fsa")
,EncodingTest punycode "" ,EncodingTest punycode ""
(map (fromIntegral.ord) "74h") (map (fromIntegral.ord) "74h")
-- taken from http://tools.ietf.org/html/rfc3492#section-7 -- taken from http://tools.ietf.org/html/rfc3492#section-7
-- Arabic (Egyptian) -- Arabic (Egyptian)
,punyTest "ليهمابتكلموشعربي؟" ,punyTest "ليهمابتكلموشعربي؟"
"egbpdaj6bu4bxfgehfvwxn" "egbpdaj6bu4bxfgehfvwxn"
-- Chinese (simplified) -- Chinese (simplified)
,punyTest "他们为什么不说中文" ,punyTest "他们为什么不说中文"
"ihqwcrb4cv8a8dqg056pqjye" "ihqwcrb4cv8a8dqg056pqjye"
-- Chinese (traditional) -- Chinese (traditional)
,punyTest "他們爲什麽不說中文" ,punyTest "他們爲什麽不說中文"
"ihqwctvzc91f659drss3x8bo0yb" "ihqwctvzc91f659drss3x8bo0yb"
-- Czech -- Czech
,punyTest "Pročprostěnemluvíčesky" ,punyTest "Pročprostěnemluvíčesky"
"Proprostnemluvesky-uyb24dma41a" "Proprostnemluvesky-uyb24dma41a"
-- Hebrew -- Hebrew
,punyTest "למההםפשוטלאמדבריםעברית" ,punyTest "למההםפשוטלאמדבריםעברית"
"4dbcagdahymbxekheh6e0a7fei0b" "4dbcagdahymbxekheh6e0a7fei0b"
-- Hindi (Devanagari) -- Hindi (Devanagari)
,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902" ,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902"
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
-- Japanese (kanji and hiragana) -- Japanese (kanji and hiragana)
,punyTest "なぜみんな日本語を話してくれないのか" ,punyTest "なぜみんな日本語を話してくれないのか"
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
-- Korean (Hangul syllables) -- Korean (Hangul syllables)
,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까" ,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까"
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
-- Russian (Cyrillic) -- Russian (Cyrillic)
,punyTest "почемужеонинеговорятпорусски" ,punyTest "почемужеонинеговорятпорусски"
"b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there "b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there
-- Spanish -- Spanish
,punyTest "PorquénopuedensimplementehablarenEspañol" ,punyTest "PorquénopuedensimplementehablarenEspañol"
"PorqunopuedensimplementehablarenEspaol-fmd56a" "PorqunopuedensimplementehablarenEspaol-fmd56a"
-- Vietnamese -- Vietnamese
,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt" ,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt"
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
{-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\ {-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\
\bar\xE2\x80\x8B\xE2\x81\xA0\ \bar\xE2\x80\x8B\xE2\x81\xA0\
\baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF" \baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF"
"foobarbaz"-} "foobarbaz"-}
] ]
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp) where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
isoTests :: Test isoTests :: Test
isoTests = TestList $ map test $ isoTests = TestList $ map test $