Made package stack compatible
Created and added stack.yaml and .gitignore files. Relaxed the version dependency on 'binary' package in cabal file. Is that OK? Also brought the minimum cabal version to >=1.8, so I could add a test target that pulls in the library. Changed all tabs to spaces - I don't know when the Haskell compiler started giving warnings about that.
This commit is contained in:
parent
6284c1a677
commit
8727ac25a5
22
.gitignore
vendored
Normal file
22
.gitignore
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
### Haskell ###
|
||||
dist
|
||||
dist-*
|
||||
cabal-dev
|
||||
*.o
|
||||
*.hi
|
||||
*.chi
|
||||
*.chs.h
|
||||
*.dyn_o
|
||||
*.dyn_hi
|
||||
.hpc
|
||||
.hsenv
|
||||
.cabal-sandbox/
|
||||
cabal.sandbox.config
|
||||
*.prof
|
||||
*.aux
|
||||
*.hp
|
||||
*.eventlog
|
||||
.stack-work/
|
||||
cabal.project.local
|
||||
.HTF/
|
||||
|
||||
@ -359,6 +359,6 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of
|
||||
-- | Takes the name of an encoding and creates a dynamic encoding from it.
|
||||
encodingFromString :: String -> DynEncoding
|
||||
encodingFromString str = maybe
|
||||
(error $ "Data.Encoding.encodingFromString: Unknown encoding: "++show str)
|
||||
id
|
||||
(encodingFromStringExplicit str)
|
||||
(error $ "Data.Encoding.encodingFromString: Unknown encoding: "++show str)
|
||||
id
|
||||
(encodingFromStringExplicit str)
|
||||
|
||||
@ -2,8 +2,8 @@
|
||||
{- | This implements BootString en- and decoding, the foundation of Punycode
|
||||
-}
|
||||
module Data.Encoding.BootString
|
||||
(BootString(..)
|
||||
,punycode) where
|
||||
(BootString(..)
|
||||
,punycode) where
|
||||
|
||||
import Data.Encoding.Base
|
||||
import Data.Encoding.Exception
|
||||
@ -17,77 +17,77 @@ import Data.Typeable
|
||||
import Control.Monad (when)
|
||||
|
||||
data BootString = BootString
|
||||
{base :: Int
|
||||
,tmin :: Int
|
||||
,tmax :: Int
|
||||
,skew :: Int
|
||||
,damp :: Int
|
||||
,init_bias :: Int
|
||||
,init_n :: Int
|
||||
}
|
||||
deriving (Show,Eq,Typeable)
|
||||
{base :: Int
|
||||
,tmin :: Int
|
||||
,tmax :: Int
|
||||
,skew :: Int
|
||||
,damp :: Int
|
||||
,init_bias :: Int
|
||||
,init_n :: Int
|
||||
}
|
||||
deriving (Show,Eq,Typeable)
|
||||
|
||||
punycode :: BootString
|
||||
punycode = BootString
|
||||
{base = 36
|
||||
,tmin = 1
|
||||
,tmax = 26
|
||||
,skew = 38
|
||||
,damp = 700
|
||||
,init_bias = 72
|
||||
,init_n = 0x80
|
||||
}
|
||||
{base = 36
|
||||
,tmin = 1
|
||||
,tmax = 26
|
||||
,skew = 38
|
||||
,damp = 700
|
||||
,init_bias = 72
|
||||
,init_n = 0x80
|
||||
}
|
||||
|
||||
punyValue :: ByteSource m => Word8 -> m Int
|
||||
punyValue c
|
||||
| n < 0x30 = norep
|
||||
| n <= 0x39 = return $ n-0x30+26
|
||||
| n < 0x41 = norep
|
||||
| n <= 0x5A = return $ n-0x41
|
||||
| n < 0x61 = norep
|
||||
| n <= 0x7A = return $ n-0x61
|
||||
| otherwise = norep
|
||||
where
|
||||
n = fromIntegral c
|
||||
norep = throwException (IllegalCharacter c)
|
||||
| n < 0x30 = norep
|
||||
| n <= 0x39 = return $ n-0x30+26
|
||||
| n < 0x41 = norep
|
||||
| n <= 0x5A = return $ n-0x41
|
||||
| n < 0x61 = norep
|
||||
| n <= 0x7A = return $ n-0x61
|
||||
| otherwise = norep
|
||||
where
|
||||
n = fromIntegral c
|
||||
norep = throwException (IllegalCharacter c)
|
||||
|
||||
punyChar :: ByteSink m => Int -> m Word8
|
||||
punyChar c
|
||||
| c < 0 = norep
|
||||
| c < 26 = return $ fromIntegral $ 0x61+c
|
||||
| c < 36 = return $ fromIntegral $ 0x30+c-26
|
||||
| otherwise = norep
|
||||
where
|
||||
norep = throwException (HasNoRepresentation (chr c))
|
||||
| c < 0 = norep
|
||||
| c < 26 = return $ fromIntegral $ 0x61+c
|
||||
| c < 36 = return $ fromIntegral $ 0x30+c-26
|
||||
| otherwise = norep
|
||||
where
|
||||
norep = throwException (HasNoRepresentation (chr c))
|
||||
|
||||
getT :: BootString -> Int -> Int -> Int
|
||||
getT bs k bias
|
||||
| k <= bias + (tmin bs) = tmin bs
|
||||
| k >= bias + (tmax bs) = tmax bs
|
||||
| otherwise = k-bias
|
||||
| k <= bias + (tmin bs) = tmin bs
|
||||
| k >= bias + (tmax bs) = tmax bs
|
||||
| otherwise = k-bias
|
||||
|
||||
adapt :: BootString -> Int -> Int -> Bool -> Int
|
||||
adapt bs delta numpoints firsttime = let
|
||||
delta1 = if firsttime
|
||||
then delta `div` (damp bs)
|
||||
else delta `div` 2
|
||||
delta2 = delta1 + (delta1 `div` numpoints)
|
||||
(rd,rk) = head
|
||||
$ filter ((<=((base bs - tmin bs) * (tmax bs)) `div` 2).fst)
|
||||
$ iterate (\(d,k) -> (d `div` (base bs - tmin bs),k+(base bs))) (delta2,0)
|
||||
in rk + (((base bs - tmin bs +1) * rd) `div` (rd + skew bs))
|
||||
delta1 = if firsttime
|
||||
then delta `div` (damp bs)
|
||||
else delta `div` 2
|
||||
delta2 = delta1 + (delta1 `div` numpoints)
|
||||
(rd,rk) = head
|
||||
$ filter ((<=((base bs - tmin bs) * (tmax bs)) `div` 2).fst)
|
||||
$ iterate (\(d,k) -> (d `div` (base bs - tmin bs),k+(base bs))) (delta2,0)
|
||||
in rk + (((base bs - tmin bs +1) * rd) `div` (rd + skew bs))
|
||||
|
||||
decodeValue :: ByteSource m => BootString -> Int -> Int -> Int -> Int -> [Int] -> m (Int,[Int])
|
||||
decodeValue bs bias i k w (x:xs)
|
||||
| x >= base bs = throwException OutOfRange
|
||||
| x > (maxBound - i) `div` w = throwException OutOfRange
|
||||
| x < t = return (ni,xs)
|
||||
| w > maxBound `div` (base bs - t) = throwException OutOfRange
|
||||
| x >= base bs = throwException OutOfRange
|
||||
| x > (maxBound - i) `div` w = throwException OutOfRange
|
||||
| x < t = return (ni,xs)
|
||||
| w > maxBound `div` (base bs - t) = throwException OutOfRange
|
||||
| null xs = throwException OutOfRange
|
||||
| otherwise = decodeValue bs bias ni (k+base bs) (w*(base bs - t)) xs
|
||||
where
|
||||
ni = i + x*w
|
||||
t = getT bs k bias
|
||||
| otherwise = decodeValue bs bias ni (k+base bs) (w*(base bs - t)) xs
|
||||
where
|
||||
ni = i + x*w
|
||||
t = getT bs k bias
|
||||
|
||||
decodeValues :: ByteSource m => BootString -> Int -> [Int] -> m [(Char,Int)]
|
||||
decodeValues bs len xs = decodeValues' bs (init_n bs) 0 (init_bias bs) len xs
|
||||
@ -108,8 +108,8 @@ decodeValues' bs n i bias len xs = do
|
||||
insertDeltas :: [(a,Int)] -> [a] -> [a]
|
||||
insertDeltas [] str = str
|
||||
insertDeltas ((c,p):xs) str = let
|
||||
(l,r) = splitAt p str
|
||||
in insertDeltas xs (l++[c]++r)
|
||||
(l,r) = splitAt p str
|
||||
in insertDeltas xs (l++[c]++r)
|
||||
|
||||
punyDecode :: ByteSource m => [Word8] -> [Word8] -> m String
|
||||
punyDecode base ext = do
|
||||
@ -119,32 +119,32 @@ punyDecode base ext = do
|
||||
|
||||
encodeValue :: BootString -> Int -> Int -> Int -> Int -> [Int]
|
||||
encodeValue bs bias delta n c = unfoldr (\(q,k,out) -> let
|
||||
t = getT bs k bias
|
||||
(nq,dc) = (q-t) `divMod` (base bs - t)
|
||||
in if out
|
||||
then Nothing
|
||||
else (if q < t
|
||||
then Just (q,(q,k+base bs,True))
|
||||
else Just (t + dc,(nq,k+base bs,False)))
|
||||
) (delta,base bs,False)
|
||||
t = getT bs k bias
|
||||
(nq,dc) = (q-t) `divMod` (base bs - t)
|
||||
in if out
|
||||
then Nothing
|
||||
else (if q < t
|
||||
then Just (q,(q,k+base bs,True))
|
||||
else Just (t + dc,(nq,k+base bs,False)))
|
||||
) (delta,base bs,False)
|
||||
|
||||
encodeValues' :: BootString -> Int -> Int -> Int -> Int -> Int -> [Int] -> (Int,Int,Int,[Int])
|
||||
encodeValues' _ _ h bias delta _ [] = (delta,h,bias,[])
|
||||
encodeValues' bs b h bias delta n (c:cs) = case compare c n of
|
||||
LT -> encodeValues' bs b h bias (delta+1) n cs
|
||||
GT -> encodeValues' bs b h bias delta n cs
|
||||
EQ -> let
|
||||
(ndelta,nh,nbias,rest) = encodeValues' bs b (h+1) (adapt bs delta (h+1) (h==b)) 0 n cs
|
||||
xs = encodeValue bs bias delta n c
|
||||
in (ndelta,nh,nbias,xs++rest)
|
||||
LT -> encodeValues' bs b h bias (delta+1) n cs
|
||||
GT -> encodeValues' bs b h bias delta n cs
|
||||
EQ -> let
|
||||
(ndelta,nh,nbias,rest) = encodeValues' bs b (h+1) (adapt bs delta (h+1) (h==b)) 0 n cs
|
||||
xs = encodeValue bs bias delta n c
|
||||
in (ndelta,nh,nbias,xs++rest)
|
||||
|
||||
encodeValues :: BootString -> Int -> Int -> Int -> Int -> Int -> Int -> [Int] -> [Int]
|
||||
encodeValues bs b l h bias delta n cps
|
||||
| h == l = []
|
||||
| otherwise = outp++encodeValues bs b l nh nbias (ndelta+1) (m+1) cps
|
||||
where
|
||||
m = minimum (filter (>=n) cps)
|
||||
(ndelta,nh,nbias,outp) = encodeValues' bs b h bias (delta + (m - n)*(h + 1)) m cps
|
||||
| h == l = []
|
||||
| otherwise = outp++encodeValues bs b l nh nbias (ndelta+1) (m+1) cps
|
||||
where
|
||||
m = minimum (filter (>=n) cps)
|
||||
(ndelta,nh,nbias,outp) = encodeValues' bs b h bias (delta + (m - n)*(h + 1)) m cps
|
||||
|
||||
breakLast :: (a -> Bool) -> [a] -> Maybe ([a],[a])
|
||||
breakLast p xs = do
|
||||
@ -166,7 +166,7 @@ instance Encoding BootString where
|
||||
encodeChar _ c = error "Data.Encoding.BootString.encodeChar: Please use 'encode' for encoding BootStrings"
|
||||
decodeChar _ = error "Data.Encoding.BootString.decodeChar: Please use 'decode' for decoding BootStrings"
|
||||
encode bs str = let (base,nbase) = partition (\c -> ord c < init_n bs) str
|
||||
b = length base
|
||||
b = length base
|
||||
in do
|
||||
res <- mapM punyChar $ encodeValues bs b (length str) b (init_bias bs) 0 (init_n bs) (map ord str)
|
||||
when (not $ null base) $ do
|
||||
@ -178,8 +178,8 @@ instance Encoding BootString where
|
||||
let m = fromIntegral $ ord '-'
|
||||
case breakLast (==m) wrds of
|
||||
Just ([],_) -> throwException (IllegalCharacter m)
|
||||
Just (base,_:nbase) -> case find (\w -> fromIntegral w > init_n bs) base of
|
||||
Nothing -> punyDecode base nbase
|
||||
Just ww -> throwException (IllegalCharacter ww)
|
||||
Nothing -> punyDecode [] wrds
|
||||
Just (base,_:nbase) -> case find (\w -> fromIntegral w > init_n bs) base of
|
||||
Nothing -> punyDecode base nbase
|
||||
Just ww -> throwException (IllegalCharacter ww)
|
||||
Nothing -> punyDecode [] wrds
|
||||
encodeable bs c = True -- XXX: hm, really?
|
||||
|
||||
@ -9,8 +9,8 @@ import Control.Monad.Identity
|
||||
-- | This exception type is thrown whenever something went wrong during the
|
||||
-- encoding-process.
|
||||
data EncodingException
|
||||
= HasNoRepresentation Char -- ^ Thrown if a specific character
|
||||
-- is not representable in an encoding.
|
||||
= HasNoRepresentation Char -- ^ Thrown if a specific character
|
||||
-- is not representable in an encoding.
|
||||
deriving (Eq,Ord,Show,Read,Typeable)
|
||||
|
||||
instance Exception EncodingException
|
||||
@ -18,13 +18,13 @@ instance Exception EncodingException
|
||||
-- | This exception type is thrown whenever something went wrong during the
|
||||
-- decoding-process.
|
||||
data DecodingException
|
||||
= IllegalCharacter Word8 -- ^ The sequence contained an illegal
|
||||
-- byte that couldn't be decoded.
|
||||
| UnexpectedEnd -- ^ more bytes were needed to allow a
|
||||
-- successfull decoding.
|
||||
| OutOfRange -- ^ the decoded value was out of the unicode range
|
||||
| IllegalRepresentation [Word8] -- ^ The character sequence encodes a
|
||||
-- character, but is illegal.
|
||||
= IllegalCharacter Word8 -- ^ The sequence contained an illegal
|
||||
-- byte that couldn't be decoded.
|
||||
| UnexpectedEnd -- ^ more bytes were needed to allow a
|
||||
-- successfull decoding.
|
||||
| OutOfRange -- ^ the decoded value was out of the unicode range
|
||||
| IllegalRepresentation [Word8] -- ^ The character sequence encodes a
|
||||
-- character, but is illegal.
|
||||
deriving (Eq,Ord,Show,Read,Typeable)
|
||||
|
||||
instance Exception DecodingException
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
See <http://en.wikipedia.org/wiki/KOI8-R> for more information.
|
||||
-}
|
||||
module Data.Encoding.KOI8R
|
||||
(KOI8R(..)) where
|
||||
(KOI8R(..)) where
|
||||
|
||||
import Control.Throws
|
||||
import Data.Array.Unboxed
|
||||
@ -28,23 +28,23 @@ koi8rMap = fromList (zip koi8rList [128..])
|
||||
|
||||
koi8rList :: [Char]
|
||||
koi8rList =
|
||||
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
|
||||
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
|
||||
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
|
||||
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
|
||||
,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556'
|
||||
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e'
|
||||
,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565'
|
||||
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9'
|
||||
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
|
||||
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
|
||||
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
|
||||
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
|
||||
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
|
||||
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
|
||||
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
|
||||
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
|
||||
]
|
||||
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
|
||||
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
|
||||
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
|
||||
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
|
||||
,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556'
|
||||
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e'
|
||||
,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565'
|
||||
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9'
|
||||
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
|
||||
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
|
||||
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
|
||||
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
|
||||
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
|
||||
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
|
||||
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
|
||||
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
|
||||
]
|
||||
|
||||
instance Encoding KOI8R where
|
||||
decodeChar _ = do
|
||||
@ -53,8 +53,8 @@ instance Encoding KOI8R where
|
||||
then return $ chr $ fromIntegral w
|
||||
else return $ koi8rArr!w
|
||||
encodeChar _ ch
|
||||
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
|
||||
| otherwise = case lookup ch koi8rMap of
|
||||
Just w -> pushWord8 w
|
||||
Nothing -> throwException (HasNoRepresentation ch)
|
||||
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
|
||||
| otherwise = case lookup ch koi8rMap of
|
||||
Just w -> pushWord8 w
|
||||
Nothing -> throwException (HasNoRepresentation ch)
|
||||
encodeable _ c = member c koi8rMap
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
See <http://en.wikipedia.org/wiki/KOI8-U> for more information.
|
||||
-}
|
||||
module Data.Encoding.KOI8U
|
||||
(KOI8U(..)) where
|
||||
(KOI8U(..)) where
|
||||
|
||||
import Data.Encoding.Base
|
||||
import Data.Encoding.ByteSource
|
||||
@ -28,23 +28,23 @@ koi8uMap = fromList (zip koi8uList [128..])
|
||||
|
||||
koi8uList :: [Char]
|
||||
koi8uList =
|
||||
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
|
||||
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
|
||||
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
|
||||
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
|
||||
,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457'
|
||||
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e'
|
||||
,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407'
|
||||
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9'
|
||||
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
|
||||
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
|
||||
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
|
||||
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
|
||||
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
|
||||
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
|
||||
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
|
||||
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
|
||||
]
|
||||
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
|
||||
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
|
||||
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
|
||||
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
|
||||
,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457'
|
||||
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e'
|
||||
,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407'
|
||||
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9'
|
||||
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
|
||||
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
|
||||
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
|
||||
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
|
||||
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
|
||||
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
|
||||
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
|
||||
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
|
||||
]
|
||||
|
||||
instance Encoding KOI8U where
|
||||
decodeChar _ = do
|
||||
@ -53,8 +53,8 @@ instance Encoding KOI8U where
|
||||
then return $ chr $ fromIntegral w
|
||||
else return $ koi8uArr!w
|
||||
encodeChar _ ch
|
||||
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
|
||||
| otherwise = case lookup ch koi8uMap of
|
||||
Just w -> pushWord8 w
|
||||
Nothing -> throwException (HasNoRepresentation ch)
|
||||
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
|
||||
| otherwise = case lookup ch koi8uMap of
|
||||
Just w -> pushWord8 w
|
||||
Nothing -> throwException (HasNoRepresentation ch)
|
||||
encodeable _ c = member c koi8uMap
|
||||
|
||||
@ -47,14 +47,14 @@ data CharacterMapping_normalization = CharacterMapping_normalization_undetermine
|
||||
deriving (Eq,Show)
|
||||
data Stateful_siso = Stateful_siso Validity Validity
|
||||
deriving (Eq,Show)
|
||||
newtype History = History (List1 Modified) deriving (Eq,Show)
|
||||
newtype History = History (List1 Modified) deriving (Eq,Show)
|
||||
data Modified = Modified Modified_Attrs String
|
||||
deriving (Eq,Show)
|
||||
data Modified_Attrs = Modified_Attrs
|
||||
{ modifiedVersion :: String
|
||||
, modifiedDate :: String
|
||||
} deriving (Eq,Show)
|
||||
newtype Validity = Validity (List1 State) deriving (Eq,Show)
|
||||
newtype Validity = Validity (List1 State) deriving (Eq,Show)
|
||||
data State = State
|
||||
{ stateType :: String
|
||||
, stateNext :: String
|
||||
@ -112,10 +112,10 @@ data Escape = Escape
|
||||
{ escapeSequence :: String
|
||||
, escapeName :: String
|
||||
} deriving (Eq,Show)
|
||||
newtype Si = Si (List1 Designator) deriving (Eq,Show)
|
||||
newtype So = So (List1 Designator) deriving (Eq,Show)
|
||||
newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show)
|
||||
newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show)
|
||||
newtype Si = Si (List1 Designator) deriving (Eq,Show)
|
||||
newtype So = So (List1 Designator) deriving (Eq,Show)
|
||||
newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show)
|
||||
newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show)
|
||||
data Designator = Designator
|
||||
{ designatorSequence :: String
|
||||
, designatorName :: String
|
||||
|
||||
@ -18,9 +18,9 @@ import Data.Typeable
|
||||
import Data.Word
|
||||
|
||||
data UTF16
|
||||
= UTF16 -- ^ Decodes big and little endian, encodes big endian.
|
||||
| UTF16BE -- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian.
|
||||
| UTF16LE -- ^ Little endian decoding and encoding.
|
||||
= UTF16 -- ^ Decodes big and little endian, encodes big endian.
|
||||
| UTF16BE -- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian.
|
||||
| UTF16LE -- ^ Little endian decoding and encoding.
|
||||
deriving (Eq,Show,Typeable)
|
||||
|
||||
readBOM :: ByteSource m => m (Either Char UTF16)
|
||||
|
||||
@ -16,10 +16,10 @@ import Data.Typeable
|
||||
|
||||
|
||||
data UTF32
|
||||
= UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present.
|
||||
| UTF32BE -- ^ Encodes and decodes using the big endian encoding.
|
||||
| UTF32LE -- ^ Encodes and decodes using the little endian encoding.
|
||||
deriving (Eq,Show,Typeable)
|
||||
= UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present.
|
||||
| UTF32BE -- ^ Encodes and decodes using the big endian encoding.
|
||||
| UTF32LE -- ^ Encodes and decodes using the little endian encoding.
|
||||
deriving (Eq,Show,Typeable)
|
||||
|
||||
instance Encoding UTF32 where
|
||||
encodeChar UTF32LE ch = pushWord32le (fromIntegral $ ord ch)
|
||||
|
||||
@ -67,17 +67,23 @@ import Control.Monad.Reader (runReaderT)
|
||||
-- encoding.
|
||||
hGetContents :: (Encoding e,?enc :: e) => Handle -> IO String
|
||||
hGetContents h = do
|
||||
str <- LBS.hGetContents h
|
||||
return $ decodeLazyByteString ?enc str
|
||||
str <- LBS.hGetContents h
|
||||
return $ decodeLazyByteString ?enc str
|
||||
|
||||
-- | Like the normal 'System.IO.getContents', but decodes the input using an
|
||||
-- encoding.
|
||||
getContents :: (Encoding e,?enc :: e) => IO String
|
||||
getContents = do
|
||||
str <- LBS.getContents
|
||||
return $ decodeLazyByteString ?enc str
|
||||
|
||||
-- | Like the normal 'System.IO.putStr', but decodes the input using an
|
||||
-- encoding.
|
||||
putStr :: (Encoding e,?enc :: e) => String -> IO ()
|
||||
putStr = hPutStr stdout
|
||||
|
||||
-- | Like the normal 'System.IO.putStrLn', but decodes the input using an
|
||||
-- encoding.
|
||||
putStrLn :: (Encoding e,?enc :: e) => String -> IO ()
|
||||
putStrLn = hPutStrLn stdout
|
||||
|
||||
@ -86,46 +92,72 @@ putStrLn = hPutStrLn stdout
|
||||
hPutStr :: (Encoding e,?enc :: e) => Handle -> String -> IO ()
|
||||
hPutStr h str = LBS.hPut h (encodeLazyByteString ?enc str)
|
||||
|
||||
-- | Like the normal 'System.IO.hPutStrLn', but decodes the input using an
|
||||
-- encoding.
|
||||
hPutStrLn :: (Encoding e,?enc :: e) => Handle -> String -> IO ()
|
||||
hPutStrLn h str = do
|
||||
LBS.hPut h (encodeLazyByteString ?enc str)
|
||||
LBS.hPut h (encodeLazyByteString ?enc "\n")
|
||||
|
||||
-- | Like the normal 'System.IO.print', but decodes the input using an
|
||||
-- encoding.
|
||||
print :: (Encoding e,Show a,?enc :: e) => a -> IO ()
|
||||
print = hPrint stdout
|
||||
|
||||
-- | Like the normal 'System.IO.hPrint', but decodes the input using an
|
||||
-- encoding.
|
||||
hPrint :: (Encoding e,Show a,?enc :: e) => Handle -> a -> IO ()
|
||||
hPrint h x = hPutStrLn h (show x)
|
||||
|
||||
-- | Like the normal 'System.IO.readFile', but decodes the input using an
|
||||
-- encoding.
|
||||
readFile :: (Encoding e,?enc :: e) => FilePath -> IO String
|
||||
readFile fn = LBS.readFile fn >>= return.(decodeLazyByteString ?enc)
|
||||
|
||||
-- | Like the normal 'System.IO.writeFile', but decodes the input using an
|
||||
-- encoding.
|
||||
writeFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO ()
|
||||
writeFile fn str = LBS.writeFile fn $ encodeLazyByteString ?enc str
|
||||
|
||||
-- | Like the normal 'System.IO.appendFile', but decodes the input using an
|
||||
-- encoding.
|
||||
appendFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO ()
|
||||
appendFile fn str = LBS.appendFile fn $ encodeLazyByteString ?enc str
|
||||
|
||||
-- | Like the normal 'System.IO.getChar', but decodes the input using an
|
||||
-- encoding.
|
||||
getChar :: (Encoding e,?enc :: e) => IO Char
|
||||
getChar = hGetChar stdin
|
||||
|
||||
-- | Like the normal 'System.IO.hGetChar', but decodes the input using an
|
||||
-- encoding.
|
||||
hGetChar :: (Encoding e,?enc :: e) => Handle -> IO Char
|
||||
hGetChar h = runReaderT (decodeChar ?enc) h
|
||||
|
||||
-- | Like the normal 'System.IO.getLine', but decodes the input using an
|
||||
-- encoding.
|
||||
getLine :: (Encoding e,?enc :: e) => IO String
|
||||
getLine = hGetLine stdin
|
||||
|
||||
-- | Like the normal 'System.IO.hGetLine', but decodes the input using an
|
||||
-- encoding.
|
||||
hGetLine :: (Encoding e,?enc :: e) => Handle -> IO String
|
||||
hGetLine h = do
|
||||
line <- BS.hGetLine h
|
||||
return $ decodeStrictByteString ?enc line
|
||||
|
||||
-- | Like the normal 'System.IO.putChar', but decodes the input using an
|
||||
-- encoding.
|
||||
putChar :: (Encoding e,?enc :: e) => Char -> IO ()
|
||||
putChar = hPutChar stdout
|
||||
|
||||
-- | Like the normal 'System.IO.hPutChar', but decodes the input using an
|
||||
-- encoding.
|
||||
hPutChar :: (Encoding e,?enc :: e) => Handle -> Char -> IO ()
|
||||
hPutChar h c = runReaderT (encodeChar ?enc c) h
|
||||
|
||||
-- | Like the normal 'System.IO.interact', but decodes the input using an
|
||||
-- encoding.
|
||||
interact :: (Encoding e,?enc :: e) => (String -> String) -> IO ()
|
||||
interact f = do
|
||||
line <- hGetLine stdin
|
||||
@ -133,7 +165,7 @@ interact f = do
|
||||
|
||||
#ifdef SYSTEM_ENCODING
|
||||
foreign import ccall "system_encoding.h get_system_encoding"
|
||||
get_system_encoding :: IO CString
|
||||
get_system_encoding :: IO CString
|
||||
#endif
|
||||
|
||||
-- | Returns the encoding used on the current system. Currently only supported
|
||||
@ -141,9 +173,9 @@ foreign import ccall "system_encoding.h get_system_encoding"
|
||||
getSystemEncoding :: IO DynEncoding
|
||||
getSystemEncoding = do
|
||||
#ifdef SYSTEM_ENCODING
|
||||
enc <- get_system_encoding
|
||||
str <- peekCString enc
|
||||
return $ encodingFromString str
|
||||
enc <- get_system_encoding
|
||||
str <- peekCString enc
|
||||
return $ encodingFromString str
|
||||
#else
|
||||
error "getSystemEncoding is not supported on this platform"
|
||||
error "getSystemEncoding is not supported on this platform"
|
||||
#endif
|
||||
|
||||
@ -1,16 +1,16 @@
|
||||
Name: encoding
|
||||
Version: 0.8.1
|
||||
Author: Henning Günther
|
||||
Maintainer: daniel@wagner-home.com
|
||||
License: BSD3
|
||||
Name: encoding
|
||||
Version: 0.8.1
|
||||
Author: Henning Günther
|
||||
Maintainer: daniel@wagner-home.com
|
||||
License: BSD3
|
||||
License-File: LICENSE
|
||||
Synopsis: A library for various character encodings
|
||||
Synopsis: A library for various character encodings
|
||||
Description:
|
||||
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
|
||||
Category: Codec
|
||||
Homepage: http://code.haskell.org/encoding/
|
||||
Cabal-Version: >=1.6
|
||||
Build-Type: Custom
|
||||
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
|
||||
Category: Codec
|
||||
Homepage: http://code.haskell.org/encoding/
|
||||
Cabal-Version: >=1.8
|
||||
Build-Type: Custom
|
||||
Extra-Source-Files:
|
||||
CHANGELOG
|
||||
Data/Encoding/Preprocessor/Mapping.hs
|
||||
@ -45,7 +45,7 @@ Custom-Setup
|
||||
Library
|
||||
Build-Depends: array,
|
||||
base >=3 && <5,
|
||||
binary < 0.8,
|
||||
binary,
|
||||
bytestring,
|
||||
containers,
|
||||
extensible-exceptions,
|
||||
@ -132,3 +132,16 @@ Library
|
||||
C-Sources:
|
||||
system_encoding.c
|
||||
CPP-Options: -DSYSTEM_ENCODING
|
||||
|
||||
test-suite encoding-test
|
||||
type: exitcode-stdio-1.0
|
||||
hs-source-dirs: tests
|
||||
main-is: Main.hs
|
||||
other-modules: Test.Tester
|
||||
, Test.Tests
|
||||
build-depends: base
|
||||
, bytestring
|
||||
, encoding
|
||||
, HUnit
|
||||
, QuickCheck
|
||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||
66
stack.yaml
Normal file
66
stack.yaml
Normal file
@ -0,0 +1,66 @@
|
||||
# This file was automatically generated by 'stack init'
|
||||
#
|
||||
# Some commonly used options have been documented as comments in this file.
|
||||
# For advanced use and comprehensive documentation of the format, please see:
|
||||
# http://docs.haskellstack.org/en/stable/yaml_configuration/
|
||||
|
||||
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
|
||||
# A snapshot resolver dictates the compiler version and the set of packages
|
||||
# to be used for project dependencies. For example:
|
||||
#
|
||||
# resolver: lts-3.5
|
||||
# resolver: nightly-2015-09-21
|
||||
# resolver: ghc-7.10.2
|
||||
# resolver: ghcjs-0.1.0_ghc-7.10.2
|
||||
# resolver:
|
||||
# name: custom-snapshot
|
||||
# location: "./custom-snapshot.yaml"
|
||||
resolver: lts-8.22
|
||||
|
||||
# User packages to be built.
|
||||
# Various formats can be used as shown in the example below.
|
||||
#
|
||||
# packages:
|
||||
# - some-directory
|
||||
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
|
||||
# - location:
|
||||
# git: https://github.com/commercialhaskell/stack.git
|
||||
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
|
||||
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
|
||||
# extra-dep: true
|
||||
# subdirs:
|
||||
# - auto-update
|
||||
# - wai
|
||||
#
|
||||
# A package marked 'extra-dep: true' will only be built if demanded by a
|
||||
# non-dependency (i.e. a user package), and its test suites and benchmarks
|
||||
# will not be run. This is useful for tweaking upstream packages.
|
||||
packages:
|
||||
- '.'
|
||||
# Dependency packages to be pulled from upstream that are not in the resolver
|
||||
# (e.g., acme-missiles-0.3)
|
||||
extra-deps: []
|
||||
|
||||
# Override default flag values for local packages and extra-deps
|
||||
flags: {}
|
||||
|
||||
# Extra package databases containing global packages
|
||||
extra-package-dbs: []
|
||||
|
||||
# Control whether we use the GHC we find on the path
|
||||
# system-ghc: true
|
||||
#
|
||||
# Require a specific version of stack, using version ranges
|
||||
# require-stack-version: -any # Default
|
||||
# require-stack-version: ">=1.4"
|
||||
#
|
||||
# Override the architecture used by stack, especially useful on Windows
|
||||
# arch: i386
|
||||
# arch: x86_64
|
||||
#
|
||||
# Extra directories used by stack for building
|
||||
# extra-include-dirs: [/path/to/dir]
|
||||
# extra-lib-dirs: [/path/to/dir]
|
||||
#
|
||||
# Allow a newer minor version of GHC than the snapshot specifies
|
||||
# compiler-check: newer-minor
|
||||
@ -3,16 +3,16 @@ import Test.HUnit
|
||||
import Test.Tests
|
||||
|
||||
hunitTests =
|
||||
[ ("utf8Tests", utf8Tests)
|
||||
, ("utf16Tests", utf16Tests)
|
||||
, ("punycodeTests", punycodeTests)
|
||||
, ("isoTests", isoTests)
|
||||
, ("jisTests", jisTests)
|
||||
, ("gb18030Tests", gb18030Tests)
|
||||
]
|
||||
[ ("utf8Tests", utf8Tests)
|
||||
, ("utf16Tests", utf16Tests)
|
||||
, ("punycodeTests", punycodeTests)
|
||||
, ("isoTests", isoTests)
|
||||
, ("jisTests", jisTests)
|
||||
, ("gb18030Tests", gb18030Tests)
|
||||
]
|
||||
|
||||
main = do
|
||||
identityTests
|
||||
forM_ hunitTests $ \(name, test) -> do
|
||||
putStrLn $ "running " ++ name
|
||||
runTestTT test >>= print
|
||||
identityTests
|
||||
forM_ hunitTests $ \(name, test) -> do
|
||||
putStrLn $ "running " ++ name
|
||||
runTestTT test >>= print
|
||||
|
||||
@ -13,14 +13,14 @@ import Prelude hiding (readFile)
|
||||
import System.IO.Encoding
|
||||
|
||||
data EncodingTest
|
||||
= forall enc. (Encoding enc,Show enc) =>
|
||||
EncodingTest enc String [Word8]
|
||||
| forall enc. (Encoding enc,Show enc) =>
|
||||
EncodingFileTest enc FilePath FilePath
|
||||
| forall enc. (Encoding enc,Show enc) =>
|
||||
DecodingError enc [Word8] DecodingException
|
||||
| forall enc. (Encoding enc,Show enc) =>
|
||||
EncodingError enc String EncodingException
|
||||
= forall enc. (Encoding enc,Show enc) =>
|
||||
EncodingTest enc String [Word8]
|
||||
| forall enc. (Encoding enc,Show enc) =>
|
||||
EncodingFileTest enc FilePath FilePath
|
||||
| forall enc. (Encoding enc,Show enc) =>
|
||||
DecodingError enc [Word8] DecodingException
|
||||
| forall enc. (Encoding enc,Show enc) =>
|
||||
EncodingError enc String EncodingException
|
||||
|
||||
instance Testable EncodingTest where
|
||||
test (EncodingTest enc src trg)
|
||||
|
||||
@ -70,173 +70,173 @@ identityTests = do
|
||||
utf8Tests :: Test
|
||||
utf8Tests = TestList $ map test $ concat
|
||||
[[EncodingTest enc "\x0041\x2262\x0391\x002E"
|
||||
[0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E]
|
||||
,EncodingTest enc "\xD55C\xAD6D\xC5B4"
|
||||
[0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4]
|
||||
,EncodingTest enc "\x65E5\x672C\x8A9E"
|
||||
[0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E]
|
||||
,EncodingTest enc "\x233B4"
|
||||
[0xF0,0xA3,0x8E,0xB4]
|
||||
,EncodingTest enc ""
|
||||
[]
|
||||
-- First possible sequence of a certain length
|
||||
,EncodingTest enc "\x0000"
|
||||
[0x00]
|
||||
,EncodingTest enc "\x0080"
|
||||
[0xC2,0x80]
|
||||
,EncodingTest enc "\x0800"
|
||||
[0xE0,0xA0,0x80]
|
||||
,EncodingTest enc "\x10000"
|
||||
[0xF0,0x90,0x80,0x80]
|
||||
-- Last possible sequence of a certain length
|
||||
,EncodingTest enc "\x007F"
|
||||
[0x7F]
|
||||
,EncodingTest enc "\x07FF"
|
||||
[0xDF,0xBF]
|
||||
,EncodingTest enc "\xFFFF"
|
||||
[0xEF,0xBF,0xBF]
|
||||
-- Other boundaries
|
||||
,EncodingTest enc "\xD7FF"
|
||||
[0xED,0x9F,0xBF]
|
||||
,EncodingTest enc "\xE000"
|
||||
[0xEE,0x80,0x80]
|
||||
,EncodingTest enc "\xFFFD"
|
||||
[0xEF,0xBF,0xBD]
|
||||
-- Illegal starting characters
|
||||
,DecodingError enc
|
||||
[0x65,0x55,0x85]
|
||||
(IllegalCharacter 0x85)
|
||||
-- Unexpected end
|
||||
,DecodingError enc
|
||||
[0x41,0xE2,0x89,0xA2,0xCE]
|
||||
UnexpectedEnd
|
||||
,DecodingError enc
|
||||
[0x41,0xE2,0x89]
|
||||
UnexpectedEnd
|
||||
,DecodingError enc
|
||||
[0x41,0xE2]
|
||||
UnexpectedEnd
|
||||
[0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E]
|
||||
,EncodingTest enc "\xD55C\xAD6D\xC5B4"
|
||||
[0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4]
|
||||
,EncodingTest enc "\x65E5\x672C\x8A9E"
|
||||
[0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E]
|
||||
,EncodingTest enc "\x233B4"
|
||||
[0xF0,0xA3,0x8E,0xB4]
|
||||
,EncodingTest enc ""
|
||||
[]
|
||||
-- First possible sequence of a certain length
|
||||
,EncodingTest enc "\x0000"
|
||||
[0x00]
|
||||
,EncodingTest enc "\x0080"
|
||||
[0xC2,0x80]
|
||||
,EncodingTest enc "\x0800"
|
||||
[0xE0,0xA0,0x80]
|
||||
,EncodingTest enc "\x10000"
|
||||
[0xF0,0x90,0x80,0x80]
|
||||
-- Last possible sequence of a certain length
|
||||
,EncodingTest enc "\x007F"
|
||||
[0x7F]
|
||||
,EncodingTest enc "\x07FF"
|
||||
[0xDF,0xBF]
|
||||
,EncodingTest enc "\xFFFF"
|
||||
[0xEF,0xBF,0xBF]
|
||||
-- Other boundaries
|
||||
,EncodingTest enc "\xD7FF"
|
||||
[0xED,0x9F,0xBF]
|
||||
,EncodingTest enc "\xE000"
|
||||
[0xEE,0x80,0x80]
|
||||
,EncodingTest enc "\xFFFD"
|
||||
[0xEF,0xBF,0xBD]
|
||||
-- Illegal starting characters
|
||||
,DecodingError enc
|
||||
[0x65,0x55,0x85]
|
||||
(IllegalCharacter 0x85)
|
||||
-- Unexpected end
|
||||
,DecodingError enc
|
||||
[0x41,0xE2,0x89,0xA2,0xCE]
|
||||
UnexpectedEnd
|
||||
,DecodingError enc
|
||||
[0x41,0xE2,0x89]
|
||||
UnexpectedEnd
|
||||
,DecodingError enc
|
||||
[0x41,0xE2]
|
||||
UnexpectedEnd
|
||||
]
|
||||
| enc <- [UTF8,UTF8Strict]
|
||||
]++
|
||||
[DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE)
|
||||
,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF)
|
||||
-- Overlong representations of '/'
|
||||
,DecodingError UTF8Strict [0xC0,0xAF]
|
||||
(IllegalRepresentation [0xC0,0xAF])
|
||||
,DecodingError UTF8Strict [0xE0,0x80,0xAF]
|
||||
(IllegalRepresentation [0xE0,0x80,0xAF])
|
||||
,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF]
|
||||
(IllegalRepresentation [0xF0,0x80,0x80,0xAF])
|
||||
-- Maximum overlong sequences
|
||||
,DecodingError UTF8Strict [0xC1,0xBF]
|
||||
(IllegalRepresentation [0xC1,0xBF])
|
||||
,DecodingError UTF8Strict [0xE0,0x9F,0xBF]
|
||||
(IllegalRepresentation [0xE0,0x9F,0xBF])
|
||||
,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF]
|
||||
(IllegalRepresentation [0xF0,0x8F,0xBF,0xBF])
|
||||
-- Overlong represenations of '\NUL'
|
||||
,DecodingError UTF8Strict [0xC0,0x80]
|
||||
(IllegalRepresentation [0xC0,0x80])
|
||||
,DecodingError UTF8Strict [0xE0,0x80,0x80]
|
||||
(IllegalRepresentation [0xE0,0x80,0x80])
|
||||
,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80]
|
||||
(IllegalRepresentation [0xF0,0x80,0x80,0x80])
|
||||
-- Invalid extends
|
||||
-- 2 of 2
|
||||
,DecodingError UTF8Strict [0xCC,0x1C,0xE0]
|
||||
(IllegalCharacter 0x1C)
|
||||
-- 2 of 3
|
||||
,DecodingError UTF8Strict [0xE3,0x6C,0xB3]
|
||||
(IllegalCharacter 0x6C)
|
||||
-- 3 of 3
|
||||
,DecodingError UTF8Strict [0xE3,0xB4,0x6D]
|
||||
(IllegalCharacter 0x6D)
|
||||
-- 2 of 4
|
||||
,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3]
|
||||
(IllegalCharacter 0x6C)
|
||||
-- 3 of 4
|
||||
,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3]
|
||||
(IllegalCharacter 0x6C)
|
||||
-- 4 of 4
|
||||
,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C]
|
||||
(IllegalCharacter 0x6C)
|
||||
]
|
||||
[DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE)
|
||||
,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF)
|
||||
-- Overlong representations of '/'
|
||||
,DecodingError UTF8Strict [0xC0,0xAF]
|
||||
(IllegalRepresentation [0xC0,0xAF])
|
||||
,DecodingError UTF8Strict [0xE0,0x80,0xAF]
|
||||
(IllegalRepresentation [0xE0,0x80,0xAF])
|
||||
,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF]
|
||||
(IllegalRepresentation [0xF0,0x80,0x80,0xAF])
|
||||
-- Maximum overlong sequences
|
||||
,DecodingError UTF8Strict [0xC1,0xBF]
|
||||
(IllegalRepresentation [0xC1,0xBF])
|
||||
,DecodingError UTF8Strict [0xE0,0x9F,0xBF]
|
||||
(IllegalRepresentation [0xE0,0x9F,0xBF])
|
||||
,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF]
|
||||
(IllegalRepresentation [0xF0,0x8F,0xBF,0xBF])
|
||||
-- Overlong represenations of '\NUL'
|
||||
,DecodingError UTF8Strict [0xC0,0x80]
|
||||
(IllegalRepresentation [0xC0,0x80])
|
||||
,DecodingError UTF8Strict [0xE0,0x80,0x80]
|
||||
(IllegalRepresentation [0xE0,0x80,0x80])
|
||||
,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80]
|
||||
(IllegalRepresentation [0xF0,0x80,0x80,0x80])
|
||||
-- Invalid extends
|
||||
-- 2 of 2
|
||||
,DecodingError UTF8Strict [0xCC,0x1C,0xE0]
|
||||
(IllegalCharacter 0x1C)
|
||||
-- 2 of 3
|
||||
,DecodingError UTF8Strict [0xE3,0x6C,0xB3]
|
||||
(IllegalCharacter 0x6C)
|
||||
-- 3 of 3
|
||||
,DecodingError UTF8Strict [0xE3,0xB4,0x6D]
|
||||
(IllegalCharacter 0x6D)
|
||||
-- 2 of 4
|
||||
,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3]
|
||||
(IllegalCharacter 0x6C)
|
||||
-- 3 of 4
|
||||
,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3]
|
||||
(IllegalCharacter 0x6C)
|
||||
-- 4 of 4
|
||||
,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C]
|
||||
(IllegalCharacter 0x6C)
|
||||
]
|
||||
|
||||
utf16Tests :: Test
|
||||
utf16Tests = TestList $ map test $
|
||||
[EncodingTest UTF16BE "z"
|
||||
[0x00,0x7A]
|
||||
,EncodingTest UTF16BE "\x6C34"
|
||||
[0x6C,0x34]
|
||||
,EncodingTest UTF16BE "\x1D11E"
|
||||
[0xD8,0x34,0xDD,0x1E]
|
||||
,EncodingTest UTF16 "\x6C34z\x1D11E"
|
||||
[0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E]
|
||||
,EncodingTest UTF16BE "˨"
|
||||
[0x02,0xE8]
|
||||
,DecodingError UTF16LE [0x65,0xDC]
|
||||
(IllegalCharacter 0xDC)
|
||||
,DecodingError UTF16BE [0xDC,0x33]
|
||||
(IllegalCharacter 0xDC)
|
||||
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33]
|
||||
(IllegalCharacter 0xDA)
|
||||
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66]
|
||||
(IllegalCharacter 0xDA)
|
||||
]
|
||||
[EncodingTest UTF16BE "z"
|
||||
[0x00,0x7A]
|
||||
,EncodingTest UTF16BE "\x6C34"
|
||||
[0x6C,0x34]
|
||||
,EncodingTest UTF16BE "\x1D11E"
|
||||
[0xD8,0x34,0xDD,0x1E]
|
||||
,EncodingTest UTF16 "\x6C34z\x1D11E"
|
||||
[0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E]
|
||||
,EncodingTest UTF16BE "˨"
|
||||
[0x02,0xE8]
|
||||
,DecodingError UTF16LE [0x65,0xDC]
|
||||
(IllegalCharacter 0xDC)
|
||||
,DecodingError UTF16BE [0xDC,0x33]
|
||||
(IllegalCharacter 0xDC)
|
||||
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33]
|
||||
(IllegalCharacter 0xDA)
|
||||
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66]
|
||||
(IllegalCharacter 0xDA)
|
||||
]
|
||||
|
||||
punycodeTests :: Test
|
||||
punycodeTests = TestList $ map test $
|
||||
[EncodingTest punycode "abcdef"
|
||||
(map (fromIntegral.ord) "abcdef-")
|
||||
,EncodingTest punycode "abæcdöef"
|
||||
(map (fromIntegral.ord) "abcdef-qua4k")
|
||||
,EncodingTest punycode "schön"
|
||||
(map (fromIntegral.ord) "schn-7qa")
|
||||
,EncodingTest punycode "ยจฆฟคฏข"
|
||||
(map (fromIntegral.ord) "22cdfh1b8fsa")
|
||||
,EncodingTest punycode "☺"
|
||||
(map (fromIntegral.ord) "74h")
|
||||
-- taken from http://tools.ietf.org/html/rfc3492#section-7
|
||||
-- Arabic (Egyptian)
|
||||
,punyTest "ليهمابتكلموشعربي؟"
|
||||
"egbpdaj6bu4bxfgehfvwxn"
|
||||
-- Chinese (simplified)
|
||||
,punyTest "他们为什么不说中文"
|
||||
"ihqwcrb4cv8a8dqg056pqjye"
|
||||
-- Chinese (traditional)
|
||||
,punyTest "他們爲什麽不說中文"
|
||||
"ihqwctvzc91f659drss3x8bo0yb"
|
||||
-- Czech
|
||||
,punyTest "Pročprostěnemluvíčesky"
|
||||
"Proprostnemluvesky-uyb24dma41a"
|
||||
-- Hebrew
|
||||
,punyTest "למההםפשוטלאמדבריםעברית"
|
||||
"4dbcagdahymbxekheh6e0a7fei0b"
|
||||
-- Hindi (Devanagari)
|
||||
,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902"
|
||||
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
|
||||
-- Japanese (kanji and hiragana)
|
||||
,punyTest "なぜみんな日本語を話してくれないのか"
|
||||
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
|
||||
-- Korean (Hangul syllables)
|
||||
,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까"
|
||||
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
|
||||
-- Russian (Cyrillic)
|
||||
,punyTest "почемужеонинеговорятпорусски"
|
||||
"b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there
|
||||
-- Spanish
|
||||
,punyTest "PorquénopuedensimplementehablarenEspañol"
|
||||
"PorqunopuedensimplementehablarenEspaol-fmd56a"
|
||||
-- Vietnamese
|
||||
,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt"
|
||||
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
|
||||
{-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\
|
||||
\bar\xE2\x80\x8B\xE2\x81\xA0\
|
||||
\baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF"
|
||||
"foobarbaz"-}
|
||||
]
|
||||
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
|
||||
[EncodingTest punycode "abcdef"
|
||||
(map (fromIntegral.ord) "abcdef-")
|
||||
,EncodingTest punycode "abæcdöef"
|
||||
(map (fromIntegral.ord) "abcdef-qua4k")
|
||||
,EncodingTest punycode "schön"
|
||||
(map (fromIntegral.ord) "schn-7qa")
|
||||
,EncodingTest punycode "ยจฆฟคฏข"
|
||||
(map (fromIntegral.ord) "22cdfh1b8fsa")
|
||||
,EncodingTest punycode "☺"
|
||||
(map (fromIntegral.ord) "74h")
|
||||
-- taken from http://tools.ietf.org/html/rfc3492#section-7
|
||||
-- Arabic (Egyptian)
|
||||
,punyTest "ليهمابتكلموشعربي؟"
|
||||
"egbpdaj6bu4bxfgehfvwxn"
|
||||
-- Chinese (simplified)
|
||||
,punyTest "他们为什么不说中文"
|
||||
"ihqwcrb4cv8a8dqg056pqjye"
|
||||
-- Chinese (traditional)
|
||||
,punyTest "他們爲什麽不說中文"
|
||||
"ihqwctvzc91f659drss3x8bo0yb"
|
||||
-- Czech
|
||||
,punyTest "Pročprostěnemluvíčesky"
|
||||
"Proprostnemluvesky-uyb24dma41a"
|
||||
-- Hebrew
|
||||
,punyTest "למההםפשוטלאמדבריםעברית"
|
||||
"4dbcagdahymbxekheh6e0a7fei0b"
|
||||
-- Hindi (Devanagari)
|
||||
,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902"
|
||||
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
|
||||
-- Japanese (kanji and hiragana)
|
||||
,punyTest "なぜみんな日本語を話してくれないのか"
|
||||
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
|
||||
-- Korean (Hangul syllables)
|
||||
,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까"
|
||||
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
|
||||
-- Russian (Cyrillic)
|
||||
,punyTest "почемужеонинеговорятпорусски"
|
||||
"b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there
|
||||
-- Spanish
|
||||
,punyTest "PorquénopuedensimplementehablarenEspañol"
|
||||
"PorqunopuedensimplementehablarenEspaol-fmd56a"
|
||||
-- Vietnamese
|
||||
,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt"
|
||||
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
|
||||
{-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\
|
||||
\bar\xE2\x80\x8B\xE2\x81\xA0\
|
||||
\baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF"
|
||||
"foobarbaz"-}
|
||||
]
|
||||
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
|
||||
|
||||
isoTests :: Test
|
||||
isoTests = TestList $ map test $
|
||||
|
||||
Loading…
Reference in New Issue
Block a user