Made package stack compatible

Created and added stack.yaml and .gitignore files.
Relaxed the version dependency on 'binary' package in cabal file.  Is that OK?
Also brought the minimum cabal version to >=1.8, so I could add a test target that pulls in the library.
Changed all tabs to spaces - I don't know when the Haskell compiler started giving warnings about that.
This commit is contained in:
Scott Sedgwick 2017-07-28 14:17:39 +10:00
parent 6284c1a677
commit 8727ac25a5
15 changed files with 478 additions and 345 deletions

22
.gitignore vendored Normal file
View File

@ -0,0 +1,22 @@
### Haskell ###
dist
dist-*
cabal-dev
*.o
*.hi
*.chi
*.chs.h
*.dyn_o
*.dyn_hi
.hpc
.hsenv
.cabal-sandbox/
cabal.sandbox.config
*.prof
*.aux
*.hp
*.eventlog
.stack-work/
cabal.project.local
.HTF/

View File

@ -359,6 +359,6 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of
-- | Takes the name of an encoding and creates a dynamic encoding from it.
encodingFromString :: String -> DynEncoding
encodingFromString str = maybe
(error $ "Data.Encoding.encodingFromString: Unknown encoding: "++show str)
id
(encodingFromStringExplicit str)
(error $ "Data.Encoding.encodingFromString: Unknown encoding: "++show str)
id
(encodingFromStringExplicit str)

View File

@ -2,8 +2,8 @@
{- | This implements BootString en- and decoding, the foundation of Punycode
-}
module Data.Encoding.BootString
(BootString(..)
,punycode) where
(BootString(..)
,punycode) where
import Data.Encoding.Base
import Data.Encoding.Exception
@ -17,77 +17,77 @@ import Data.Typeable
import Control.Monad (when)
data BootString = BootString
{base :: Int
,tmin :: Int
,tmax :: Int
,skew :: Int
,damp :: Int
,init_bias :: Int
,init_n :: Int
}
deriving (Show,Eq,Typeable)
{base :: Int
,tmin :: Int
,tmax :: Int
,skew :: Int
,damp :: Int
,init_bias :: Int
,init_n :: Int
}
deriving (Show,Eq,Typeable)
punycode :: BootString
punycode = BootString
{base = 36
,tmin = 1
,tmax = 26
,skew = 38
,damp = 700
,init_bias = 72
,init_n = 0x80
}
{base = 36
,tmin = 1
,tmax = 26
,skew = 38
,damp = 700
,init_bias = 72
,init_n = 0x80
}
punyValue :: ByteSource m => Word8 -> m Int
punyValue c
| n < 0x30 = norep
| n <= 0x39 = return $ n-0x30+26
| n < 0x41 = norep
| n <= 0x5A = return $ n-0x41
| n < 0x61 = norep
| n <= 0x7A = return $ n-0x61
| otherwise = norep
where
n = fromIntegral c
norep = throwException (IllegalCharacter c)
| n < 0x30 = norep
| n <= 0x39 = return $ n-0x30+26
| n < 0x41 = norep
| n <= 0x5A = return $ n-0x41
| n < 0x61 = norep
| n <= 0x7A = return $ n-0x61
| otherwise = norep
where
n = fromIntegral c
norep = throwException (IllegalCharacter c)
punyChar :: ByteSink m => Int -> m Word8
punyChar c
| c < 0 = norep
| c < 26 = return $ fromIntegral $ 0x61+c
| c < 36 = return $ fromIntegral $ 0x30+c-26
| otherwise = norep
where
norep = throwException (HasNoRepresentation (chr c))
| c < 0 = norep
| c < 26 = return $ fromIntegral $ 0x61+c
| c < 36 = return $ fromIntegral $ 0x30+c-26
| otherwise = norep
where
norep = throwException (HasNoRepresentation (chr c))
getT :: BootString -> Int -> Int -> Int
getT bs k bias
| k <= bias + (tmin bs) = tmin bs
| k >= bias + (tmax bs) = tmax bs
| otherwise = k-bias
| k <= bias + (tmin bs) = tmin bs
| k >= bias + (tmax bs) = tmax bs
| otherwise = k-bias
adapt :: BootString -> Int -> Int -> Bool -> Int
adapt bs delta numpoints firsttime = let
delta1 = if firsttime
then delta `div` (damp bs)
else delta `div` 2
delta2 = delta1 + (delta1 `div` numpoints)
(rd,rk) = head
$ filter ((<=((base bs - tmin bs) * (tmax bs)) `div` 2).fst)
$ iterate (\(d,k) -> (d `div` (base bs - tmin bs),k+(base bs))) (delta2,0)
in rk + (((base bs - tmin bs +1) * rd) `div` (rd + skew bs))
delta1 = if firsttime
then delta `div` (damp bs)
else delta `div` 2
delta2 = delta1 + (delta1 `div` numpoints)
(rd,rk) = head
$ filter ((<=((base bs - tmin bs) * (tmax bs)) `div` 2).fst)
$ iterate (\(d,k) -> (d `div` (base bs - tmin bs),k+(base bs))) (delta2,0)
in rk + (((base bs - tmin bs +1) * rd) `div` (rd + skew bs))
decodeValue :: ByteSource m => BootString -> Int -> Int -> Int -> Int -> [Int] -> m (Int,[Int])
decodeValue bs bias i k w (x:xs)
| x >= base bs = throwException OutOfRange
| x > (maxBound - i) `div` w = throwException OutOfRange
| x < t = return (ni,xs)
| w > maxBound `div` (base bs - t) = throwException OutOfRange
| x >= base bs = throwException OutOfRange
| x > (maxBound - i) `div` w = throwException OutOfRange
| x < t = return (ni,xs)
| w > maxBound `div` (base bs - t) = throwException OutOfRange
| null xs = throwException OutOfRange
| otherwise = decodeValue bs bias ni (k+base bs) (w*(base bs - t)) xs
where
ni = i + x*w
t = getT bs k bias
| otherwise = decodeValue bs bias ni (k+base bs) (w*(base bs - t)) xs
where
ni = i + x*w
t = getT bs k bias
decodeValues :: ByteSource m => BootString -> Int -> [Int] -> m [(Char,Int)]
decodeValues bs len xs = decodeValues' bs (init_n bs) 0 (init_bias bs) len xs
@ -108,8 +108,8 @@ decodeValues' bs n i bias len xs = do
insertDeltas :: [(a,Int)] -> [a] -> [a]
insertDeltas [] str = str
insertDeltas ((c,p):xs) str = let
(l,r) = splitAt p str
in insertDeltas xs (l++[c]++r)
(l,r) = splitAt p str
in insertDeltas xs (l++[c]++r)
punyDecode :: ByteSource m => [Word8] -> [Word8] -> m String
punyDecode base ext = do
@ -119,32 +119,32 @@ punyDecode base ext = do
encodeValue :: BootString -> Int -> Int -> Int -> Int -> [Int]
encodeValue bs bias delta n c = unfoldr (\(q,k,out) -> let
t = getT bs k bias
(nq,dc) = (q-t) `divMod` (base bs - t)
in if out
then Nothing
else (if q < t
then Just (q,(q,k+base bs,True))
else Just (t + dc,(nq,k+base bs,False)))
) (delta,base bs,False)
t = getT bs k bias
(nq,dc) = (q-t) `divMod` (base bs - t)
in if out
then Nothing
else (if q < t
then Just (q,(q,k+base bs,True))
else Just (t + dc,(nq,k+base bs,False)))
) (delta,base bs,False)
encodeValues' :: BootString -> Int -> Int -> Int -> Int -> Int -> [Int] -> (Int,Int,Int,[Int])
encodeValues' _ _ h bias delta _ [] = (delta,h,bias,[])
encodeValues' bs b h bias delta n (c:cs) = case compare c n of
LT -> encodeValues' bs b h bias (delta+1) n cs
GT -> encodeValues' bs b h bias delta n cs
EQ -> let
(ndelta,nh,nbias,rest) = encodeValues' bs b (h+1) (adapt bs delta (h+1) (h==b)) 0 n cs
xs = encodeValue bs bias delta n c
in (ndelta,nh,nbias,xs++rest)
LT -> encodeValues' bs b h bias (delta+1) n cs
GT -> encodeValues' bs b h bias delta n cs
EQ -> let
(ndelta,nh,nbias,rest) = encodeValues' bs b (h+1) (adapt bs delta (h+1) (h==b)) 0 n cs
xs = encodeValue bs bias delta n c
in (ndelta,nh,nbias,xs++rest)
encodeValues :: BootString -> Int -> Int -> Int -> Int -> Int -> Int -> [Int] -> [Int]
encodeValues bs b l h bias delta n cps
| h == l = []
| otherwise = outp++encodeValues bs b l nh nbias (ndelta+1) (m+1) cps
where
m = minimum (filter (>=n) cps)
(ndelta,nh,nbias,outp) = encodeValues' bs b h bias (delta + (m - n)*(h + 1)) m cps
| h == l = []
| otherwise = outp++encodeValues bs b l nh nbias (ndelta+1) (m+1) cps
where
m = minimum (filter (>=n) cps)
(ndelta,nh,nbias,outp) = encodeValues' bs b h bias (delta + (m - n)*(h + 1)) m cps
breakLast :: (a -> Bool) -> [a] -> Maybe ([a],[a])
breakLast p xs = do
@ -166,7 +166,7 @@ instance Encoding BootString where
encodeChar _ c = error "Data.Encoding.BootString.encodeChar: Please use 'encode' for encoding BootStrings"
decodeChar _ = error "Data.Encoding.BootString.decodeChar: Please use 'decode' for decoding BootStrings"
encode bs str = let (base,nbase) = partition (\c -> ord c < init_n bs) str
b = length base
b = length base
in do
res <- mapM punyChar $ encodeValues bs b (length str) b (init_bias bs) 0 (init_n bs) (map ord str)
when (not $ null base) $ do
@ -178,8 +178,8 @@ instance Encoding BootString where
let m = fromIntegral $ ord '-'
case breakLast (==m) wrds of
Just ([],_) -> throwException (IllegalCharacter m)
Just (base,_:nbase) -> case find (\w -> fromIntegral w > init_n bs) base of
Nothing -> punyDecode base nbase
Just ww -> throwException (IllegalCharacter ww)
Nothing -> punyDecode [] wrds
Just (base,_:nbase) -> case find (\w -> fromIntegral w > init_n bs) base of
Nothing -> punyDecode base nbase
Just ww -> throwException (IllegalCharacter ww)
Nothing -> punyDecode [] wrds
encodeable bs c = True -- XXX: hm, really?

View File

@ -9,8 +9,8 @@ import Control.Monad.Identity
-- | This exception type is thrown whenever something went wrong during the
-- encoding-process.
data EncodingException
= HasNoRepresentation Char -- ^ Thrown if a specific character
-- is not representable in an encoding.
= HasNoRepresentation Char -- ^ Thrown if a specific character
-- is not representable in an encoding.
deriving (Eq,Ord,Show,Read,Typeable)
instance Exception EncodingException
@ -18,13 +18,13 @@ instance Exception EncodingException
-- | This exception type is thrown whenever something went wrong during the
-- decoding-process.
data DecodingException
= IllegalCharacter Word8 -- ^ The sequence contained an illegal
-- byte that couldn't be decoded.
| UnexpectedEnd -- ^ more bytes were needed to allow a
-- successfull decoding.
| OutOfRange -- ^ the decoded value was out of the unicode range
| IllegalRepresentation [Word8] -- ^ The character sequence encodes a
-- character, but is illegal.
= IllegalCharacter Word8 -- ^ The sequence contained an illegal
-- byte that couldn't be decoded.
| UnexpectedEnd -- ^ more bytes were needed to allow a
-- successfull decoding.
| OutOfRange -- ^ the decoded value was out of the unicode range
| IllegalRepresentation [Word8] -- ^ The character sequence encodes a
-- character, but is illegal.
deriving (Eq,Ord,Show,Read,Typeable)
instance Exception DecodingException

View File

@ -3,7 +3,7 @@
See <http://en.wikipedia.org/wiki/KOI8-R> for more information.
-}
module Data.Encoding.KOI8R
(KOI8R(..)) where
(KOI8R(..)) where
import Control.Throws
import Data.Array.Unboxed
@ -28,23 +28,23 @@ koi8rMap = fromList (zip koi8rList [128..])
koi8rList :: [Char]
koi8rList =
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556'
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e'
,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565'
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9'
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
]
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556'
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e'
,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565'
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9'
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
]
instance Encoding KOI8R where
decodeChar _ = do
@ -53,8 +53,8 @@ instance Encoding KOI8R where
then return $ chr $ fromIntegral w
else return $ koi8rArr!w
encodeChar _ ch
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
| otherwise = case lookup ch koi8rMap of
Just w -> pushWord8 w
Nothing -> throwException (HasNoRepresentation ch)
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
| otherwise = case lookup ch koi8rMap of
Just w -> pushWord8 w
Nothing -> throwException (HasNoRepresentation ch)
encodeable _ c = member c koi8rMap

View File

@ -3,7 +3,7 @@
See <http://en.wikipedia.org/wiki/KOI8-U> for more information.
-}
module Data.Encoding.KOI8U
(KOI8U(..)) where
(KOI8U(..)) where
import Data.Encoding.Base
import Data.Encoding.ByteSource
@ -28,23 +28,23 @@ koi8uMap = fromList (zip koi8uList [128..])
koi8uList :: [Char]
koi8uList =
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457'
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e'
,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407'
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9'
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
]
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457'
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e'
,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407'
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9'
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
]
instance Encoding KOI8U where
decodeChar _ = do
@ -53,8 +53,8 @@ instance Encoding KOI8U where
then return $ chr $ fromIntegral w
else return $ koi8uArr!w
encodeChar _ ch
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
| otherwise = case lookup ch koi8uMap of
Just w -> pushWord8 w
Nothing -> throwException (HasNoRepresentation ch)
| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
| otherwise = case lookup ch koi8uMap of
Just w -> pushWord8 w
Nothing -> throwException (HasNoRepresentation ch)
encodeable _ c = member c koi8uMap

View File

@ -47,14 +47,14 @@ data CharacterMapping_normalization = CharacterMapping_normalization_undetermine
deriving (Eq,Show)
data Stateful_siso = Stateful_siso Validity Validity
deriving (Eq,Show)
newtype History = History (List1 Modified) deriving (Eq,Show)
newtype History = History (List1 Modified) deriving (Eq,Show)
data Modified = Modified Modified_Attrs String
deriving (Eq,Show)
data Modified_Attrs = Modified_Attrs
{ modifiedVersion :: String
, modifiedDate :: String
} deriving (Eq,Show)
newtype Validity = Validity (List1 State) deriving (Eq,Show)
newtype Validity = Validity (List1 State) deriving (Eq,Show)
data State = State
{ stateType :: String
, stateNext :: String
@ -112,10 +112,10 @@ data Escape = Escape
{ escapeSequence :: String
, escapeName :: String
} deriving (Eq,Show)
newtype Si = Si (List1 Designator) deriving (Eq,Show)
newtype So = So (List1 Designator) deriving (Eq,Show)
newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show)
newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show)
newtype Si = Si (List1 Designator) deriving (Eq,Show)
newtype So = So (List1 Designator) deriving (Eq,Show)
newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show)
newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show)
data Designator = Designator
{ designatorSequence :: String
, designatorName :: String

View File

@ -18,9 +18,9 @@ import Data.Typeable
import Data.Word
data UTF16
= UTF16 -- ^ Decodes big and little endian, encodes big endian.
| UTF16BE -- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian.
| UTF16LE -- ^ Little endian decoding and encoding.
= UTF16 -- ^ Decodes big and little endian, encodes big endian.
| UTF16BE -- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian.
| UTF16LE -- ^ Little endian decoding and encoding.
deriving (Eq,Show,Typeable)
readBOM :: ByteSource m => m (Either Char UTF16)

View File

@ -16,10 +16,10 @@ import Data.Typeable
data UTF32
= UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present.
| UTF32BE -- ^ Encodes and decodes using the big endian encoding.
| UTF32LE -- ^ Encodes and decodes using the little endian encoding.
deriving (Eq,Show,Typeable)
= UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present.
| UTF32BE -- ^ Encodes and decodes using the big endian encoding.
| UTF32LE -- ^ Encodes and decodes using the little endian encoding.
deriving (Eq,Show,Typeable)
instance Encoding UTF32 where
encodeChar UTF32LE ch = pushWord32le (fromIntegral $ ord ch)

View File

@ -67,17 +67,23 @@ import Control.Monad.Reader (runReaderT)
-- encoding.
hGetContents :: (Encoding e,?enc :: e) => Handle -> IO String
hGetContents h = do
str <- LBS.hGetContents h
return $ decodeLazyByteString ?enc str
str <- LBS.hGetContents h
return $ decodeLazyByteString ?enc str
-- | Like the normal 'System.IO.getContents', but decodes the input using an
-- encoding.
getContents :: (Encoding e,?enc :: e) => IO String
getContents = do
str <- LBS.getContents
return $ decodeLazyByteString ?enc str
-- | Like the normal 'System.IO.putStr', but decodes the input using an
-- encoding.
putStr :: (Encoding e,?enc :: e) => String -> IO ()
putStr = hPutStr stdout
-- | Like the normal 'System.IO.putStrLn', but decodes the input using an
-- encoding.
putStrLn :: (Encoding e,?enc :: e) => String -> IO ()
putStrLn = hPutStrLn stdout
@ -86,46 +92,72 @@ putStrLn = hPutStrLn stdout
hPutStr :: (Encoding e,?enc :: e) => Handle -> String -> IO ()
hPutStr h str = LBS.hPut h (encodeLazyByteString ?enc str)
-- | Like the normal 'System.IO.hPutStrLn', but decodes the input using an
-- encoding.
hPutStrLn :: (Encoding e,?enc :: e) => Handle -> String -> IO ()
hPutStrLn h str = do
LBS.hPut h (encodeLazyByteString ?enc str)
LBS.hPut h (encodeLazyByteString ?enc "\n")
-- | Like the normal 'System.IO.print', but decodes the input using an
-- encoding.
print :: (Encoding e,Show a,?enc :: e) => a -> IO ()
print = hPrint stdout
-- | Like the normal 'System.IO.hPrint', but decodes the input using an
-- encoding.
hPrint :: (Encoding e,Show a,?enc :: e) => Handle -> a -> IO ()
hPrint h x = hPutStrLn h (show x)
-- | Like the normal 'System.IO.readFile', but decodes the input using an
-- encoding.
readFile :: (Encoding e,?enc :: e) => FilePath -> IO String
readFile fn = LBS.readFile fn >>= return.(decodeLazyByteString ?enc)
-- | Like the normal 'System.IO.writeFile', but decodes the input using an
-- encoding.
writeFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO ()
writeFile fn str = LBS.writeFile fn $ encodeLazyByteString ?enc str
-- | Like the normal 'System.IO.appendFile', but decodes the input using an
-- encoding.
appendFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO ()
appendFile fn str = LBS.appendFile fn $ encodeLazyByteString ?enc str
-- | Like the normal 'System.IO.getChar', but decodes the input using an
-- encoding.
getChar :: (Encoding e,?enc :: e) => IO Char
getChar = hGetChar stdin
-- | Like the normal 'System.IO.hGetChar', but decodes the input using an
-- encoding.
hGetChar :: (Encoding e,?enc :: e) => Handle -> IO Char
hGetChar h = runReaderT (decodeChar ?enc) h
-- | Like the normal 'System.IO.getLine', but decodes the input using an
-- encoding.
getLine :: (Encoding e,?enc :: e) => IO String
getLine = hGetLine stdin
-- | Like the normal 'System.IO.hGetLine', but decodes the input using an
-- encoding.
hGetLine :: (Encoding e,?enc :: e) => Handle -> IO String
hGetLine h = do
line <- BS.hGetLine h
return $ decodeStrictByteString ?enc line
-- | Like the normal 'System.IO.putChar', but decodes the input using an
-- encoding.
putChar :: (Encoding e,?enc :: e) => Char -> IO ()
putChar = hPutChar stdout
-- | Like the normal 'System.IO.hPutChar', but decodes the input using an
-- encoding.
hPutChar :: (Encoding e,?enc :: e) => Handle -> Char -> IO ()
hPutChar h c = runReaderT (encodeChar ?enc c) h
-- | Like the normal 'System.IO.interact', but decodes the input using an
-- encoding.
interact :: (Encoding e,?enc :: e) => (String -> String) -> IO ()
interact f = do
line <- hGetLine stdin
@ -133,7 +165,7 @@ interact f = do
#ifdef SYSTEM_ENCODING
foreign import ccall "system_encoding.h get_system_encoding"
get_system_encoding :: IO CString
get_system_encoding :: IO CString
#endif
-- | Returns the encoding used on the current system. Currently only supported
@ -141,9 +173,9 @@ foreign import ccall "system_encoding.h get_system_encoding"
getSystemEncoding :: IO DynEncoding
getSystemEncoding = do
#ifdef SYSTEM_ENCODING
enc <- get_system_encoding
str <- peekCString enc
return $ encodingFromString str
enc <- get_system_encoding
str <- peekCString enc
return $ encodingFromString str
#else
error "getSystemEncoding is not supported on this platform"
error "getSystemEncoding is not supported on this platform"
#endif

View File

@ -1,16 +1,16 @@
Name: encoding
Version: 0.8.1
Author: Henning Günther
Maintainer: daniel@wagner-home.com
License: BSD3
Name: encoding
Version: 0.8.1
Author: Henning Günther
Maintainer: daniel@wagner-home.com
License: BSD3
License-File: LICENSE
Synopsis: A library for various character encodings
Synopsis: A library for various character encodings
Description:
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
Category: Codec
Homepage: http://code.haskell.org/encoding/
Cabal-Version: >=1.6
Build-Type: Custom
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
Category: Codec
Homepage: http://code.haskell.org/encoding/
Cabal-Version: >=1.8
Build-Type: Custom
Extra-Source-Files:
CHANGELOG
Data/Encoding/Preprocessor/Mapping.hs
@ -45,7 +45,7 @@ Custom-Setup
Library
Build-Depends: array,
base >=3 && <5,
binary < 0.8,
binary,
bytestring,
containers,
extensible-exceptions,
@ -132,3 +132,16 @@ Library
C-Sources:
system_encoding.c
CPP-Options: -DSYSTEM_ENCODING
test-suite encoding-test
type: exitcode-stdio-1.0
hs-source-dirs: tests
main-is: Main.hs
other-modules: Test.Tester
, Test.Tests
build-depends: base
, bytestring
, encoding
, HUnit
, QuickCheck
ghc-options: -threaded -rtsopts -with-rtsopts=-N

66
stack.yaml Normal file
View File

@ -0,0 +1,66 @@
# This file was automatically generated by 'stack init'
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# http://docs.haskellstack.org/en/stable/yaml_configuration/
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
# resolver: ghcjs-0.1.0_ghc-7.10.2
# resolver:
# name: custom-snapshot
# location: "./custom-snapshot.yaml"
resolver: lts-8.22
# User packages to be built.
# Various formats can be used as shown in the example below.
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
# - location:
# git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# extra-dep: true
# subdirs:
# - auto-update
# - wai
#
# A package marked 'extra-dep: true' will only be built if demanded by a
# non-dependency (i.e. a user package), and its test suites and benchmarks
# will not be run. This is useful for tweaking upstream packages.
packages:
- '.'
# Dependency packages to be pulled from upstream that are not in the resolver
# (e.g., acme-missiles-0.3)
extra-deps: []
# Override default flag values for local packages and extra-deps
flags: {}
# Extra package databases containing global packages
extra-package-dbs: []
# Control whether we use the GHC we find on the path
# system-ghc: true
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=1.4"
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor

View File

@ -3,16 +3,16 @@ import Test.HUnit
import Test.Tests
hunitTests =
[ ("utf8Tests", utf8Tests)
, ("utf16Tests", utf16Tests)
, ("punycodeTests", punycodeTests)
, ("isoTests", isoTests)
, ("jisTests", jisTests)
, ("gb18030Tests", gb18030Tests)
]
[ ("utf8Tests", utf8Tests)
, ("utf16Tests", utf16Tests)
, ("punycodeTests", punycodeTests)
, ("isoTests", isoTests)
, ("jisTests", jisTests)
, ("gb18030Tests", gb18030Tests)
]
main = do
identityTests
forM_ hunitTests $ \(name, test) -> do
putStrLn $ "running " ++ name
runTestTT test >>= print
identityTests
forM_ hunitTests $ \(name, test) -> do
putStrLn $ "running " ++ name
runTestTT test >>= print

View File

@ -13,14 +13,14 @@ import Prelude hiding (readFile)
import System.IO.Encoding
data EncodingTest
= forall enc. (Encoding enc,Show enc) =>
EncodingTest enc String [Word8]
| forall enc. (Encoding enc,Show enc) =>
EncodingFileTest enc FilePath FilePath
| forall enc. (Encoding enc,Show enc) =>
DecodingError enc [Word8] DecodingException
| forall enc. (Encoding enc,Show enc) =>
EncodingError enc String EncodingException
= forall enc. (Encoding enc,Show enc) =>
EncodingTest enc String [Word8]
| forall enc. (Encoding enc,Show enc) =>
EncodingFileTest enc FilePath FilePath
| forall enc. (Encoding enc,Show enc) =>
DecodingError enc [Word8] DecodingException
| forall enc. (Encoding enc,Show enc) =>
EncodingError enc String EncodingException
instance Testable EncodingTest where
test (EncodingTest enc src trg)

View File

@ -70,173 +70,173 @@ identityTests = do
utf8Tests :: Test
utf8Tests = TestList $ map test $ concat
[[EncodingTest enc "\x0041\x2262\x0391\x002E"
[0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E]
,EncodingTest enc "\xD55C\xAD6D\xC5B4"
[0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4]
,EncodingTest enc "\x65E5\x672C\x8A9E"
[0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E]
,EncodingTest enc "\x233B4"
[0xF0,0xA3,0x8E,0xB4]
,EncodingTest enc ""
[]
-- First possible sequence of a certain length
,EncodingTest enc "\x0000"
[0x00]
,EncodingTest enc "\x0080"
[0xC2,0x80]
,EncodingTest enc "\x0800"
[0xE0,0xA0,0x80]
,EncodingTest enc "\x10000"
[0xF0,0x90,0x80,0x80]
-- Last possible sequence of a certain length
,EncodingTest enc "\x007F"
[0x7F]
,EncodingTest enc "\x07FF"
[0xDF,0xBF]
,EncodingTest enc "\xFFFF"
[0xEF,0xBF,0xBF]
-- Other boundaries
,EncodingTest enc "\xD7FF"
[0xED,0x9F,0xBF]
,EncodingTest enc "\xE000"
[0xEE,0x80,0x80]
,EncodingTest enc "\xFFFD"
[0xEF,0xBF,0xBD]
-- Illegal starting characters
,DecodingError enc
[0x65,0x55,0x85]
(IllegalCharacter 0x85)
-- Unexpected end
,DecodingError enc
[0x41,0xE2,0x89,0xA2,0xCE]
UnexpectedEnd
,DecodingError enc
[0x41,0xE2,0x89]
UnexpectedEnd
,DecodingError enc
[0x41,0xE2]
UnexpectedEnd
[0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E]
,EncodingTest enc "\xD55C\xAD6D\xC5B4"
[0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4]
,EncodingTest enc "\x65E5\x672C\x8A9E"
[0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E]
,EncodingTest enc "\x233B4"
[0xF0,0xA3,0x8E,0xB4]
,EncodingTest enc ""
[]
-- First possible sequence of a certain length
,EncodingTest enc "\x0000"
[0x00]
,EncodingTest enc "\x0080"
[0xC2,0x80]
,EncodingTest enc "\x0800"
[0xE0,0xA0,0x80]
,EncodingTest enc "\x10000"
[0xF0,0x90,0x80,0x80]
-- Last possible sequence of a certain length
,EncodingTest enc "\x007F"
[0x7F]
,EncodingTest enc "\x07FF"
[0xDF,0xBF]
,EncodingTest enc "\xFFFF"
[0xEF,0xBF,0xBF]
-- Other boundaries
,EncodingTest enc "\xD7FF"
[0xED,0x9F,0xBF]
,EncodingTest enc "\xE000"
[0xEE,0x80,0x80]
,EncodingTest enc "\xFFFD"
[0xEF,0xBF,0xBD]
-- Illegal starting characters
,DecodingError enc
[0x65,0x55,0x85]
(IllegalCharacter 0x85)
-- Unexpected end
,DecodingError enc
[0x41,0xE2,0x89,0xA2,0xCE]
UnexpectedEnd
,DecodingError enc
[0x41,0xE2,0x89]
UnexpectedEnd
,DecodingError enc
[0x41,0xE2]
UnexpectedEnd
]
| enc <- [UTF8,UTF8Strict]
]++
[DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE)
,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF)
-- Overlong representations of '/'
,DecodingError UTF8Strict [0xC0,0xAF]
(IllegalRepresentation [0xC0,0xAF])
,DecodingError UTF8Strict [0xE0,0x80,0xAF]
(IllegalRepresentation [0xE0,0x80,0xAF])
,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF]
(IllegalRepresentation [0xF0,0x80,0x80,0xAF])
-- Maximum overlong sequences
,DecodingError UTF8Strict [0xC1,0xBF]
(IllegalRepresentation [0xC1,0xBF])
,DecodingError UTF8Strict [0xE0,0x9F,0xBF]
(IllegalRepresentation [0xE0,0x9F,0xBF])
,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF]
(IllegalRepresentation [0xF0,0x8F,0xBF,0xBF])
-- Overlong represenations of '\NUL'
,DecodingError UTF8Strict [0xC0,0x80]
(IllegalRepresentation [0xC0,0x80])
,DecodingError UTF8Strict [0xE0,0x80,0x80]
(IllegalRepresentation [0xE0,0x80,0x80])
,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80]
(IllegalRepresentation [0xF0,0x80,0x80,0x80])
-- Invalid extends
-- 2 of 2
,DecodingError UTF8Strict [0xCC,0x1C,0xE0]
(IllegalCharacter 0x1C)
-- 2 of 3
,DecodingError UTF8Strict [0xE3,0x6C,0xB3]
(IllegalCharacter 0x6C)
-- 3 of 3
,DecodingError UTF8Strict [0xE3,0xB4,0x6D]
(IllegalCharacter 0x6D)
-- 2 of 4
,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3]
(IllegalCharacter 0x6C)
-- 3 of 4
,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3]
(IllegalCharacter 0x6C)
-- 4 of 4
,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C]
(IllegalCharacter 0x6C)
]
[DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE)
,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF)
-- Overlong representations of '/'
,DecodingError UTF8Strict [0xC0,0xAF]
(IllegalRepresentation [0xC0,0xAF])
,DecodingError UTF8Strict [0xE0,0x80,0xAF]
(IllegalRepresentation [0xE0,0x80,0xAF])
,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF]
(IllegalRepresentation [0xF0,0x80,0x80,0xAF])
-- Maximum overlong sequences
,DecodingError UTF8Strict [0xC1,0xBF]
(IllegalRepresentation [0xC1,0xBF])
,DecodingError UTF8Strict [0xE0,0x9F,0xBF]
(IllegalRepresentation [0xE0,0x9F,0xBF])
,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF]
(IllegalRepresentation [0xF0,0x8F,0xBF,0xBF])
-- Overlong represenations of '\NUL'
,DecodingError UTF8Strict [0xC0,0x80]
(IllegalRepresentation [0xC0,0x80])
,DecodingError UTF8Strict [0xE0,0x80,0x80]
(IllegalRepresentation [0xE0,0x80,0x80])
,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80]
(IllegalRepresentation [0xF0,0x80,0x80,0x80])
-- Invalid extends
-- 2 of 2
,DecodingError UTF8Strict [0xCC,0x1C,0xE0]
(IllegalCharacter 0x1C)
-- 2 of 3
,DecodingError UTF8Strict [0xE3,0x6C,0xB3]
(IllegalCharacter 0x6C)
-- 3 of 3
,DecodingError UTF8Strict [0xE3,0xB4,0x6D]
(IllegalCharacter 0x6D)
-- 2 of 4
,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3]
(IllegalCharacter 0x6C)
-- 3 of 4
,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3]
(IllegalCharacter 0x6C)
-- 4 of 4
,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C]
(IllegalCharacter 0x6C)
]
utf16Tests :: Test
utf16Tests = TestList $ map test $
[EncodingTest UTF16BE "z"
[0x00,0x7A]
,EncodingTest UTF16BE "\x6C34"
[0x6C,0x34]
,EncodingTest UTF16BE "\x1D11E"
[0xD8,0x34,0xDD,0x1E]
,EncodingTest UTF16 "\x6C34z\x1D11E"
[0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E]
,EncodingTest UTF16BE "˨"
[0x02,0xE8]
,DecodingError UTF16LE [0x65,0xDC]
(IllegalCharacter 0xDC)
,DecodingError UTF16BE [0xDC,0x33]
(IllegalCharacter 0xDC)
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33]
(IllegalCharacter 0xDA)
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66]
(IllegalCharacter 0xDA)
]
[EncodingTest UTF16BE "z"
[0x00,0x7A]
,EncodingTest UTF16BE "\x6C34"
[0x6C,0x34]
,EncodingTest UTF16BE "\x1D11E"
[0xD8,0x34,0xDD,0x1E]
,EncodingTest UTF16 "\x6C34z\x1D11E"
[0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E]
,EncodingTest UTF16BE "˨"
[0x02,0xE8]
,DecodingError UTF16LE [0x65,0xDC]
(IllegalCharacter 0xDC)
,DecodingError UTF16BE [0xDC,0x33]
(IllegalCharacter 0xDC)
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33]
(IllegalCharacter 0xDA)
,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66]
(IllegalCharacter 0xDA)
]
punycodeTests :: Test
punycodeTests = TestList $ map test $
[EncodingTest punycode "abcdef"
(map (fromIntegral.ord) "abcdef-")
,EncodingTest punycode "abæcdöef"
(map (fromIntegral.ord) "abcdef-qua4k")
,EncodingTest punycode "schön"
(map (fromIntegral.ord) "schn-7qa")
,EncodingTest punycode "ยจฆฟคฏข"
(map (fromIntegral.ord) "22cdfh1b8fsa")
,EncodingTest punycode ""
(map (fromIntegral.ord) "74h")
-- taken from http://tools.ietf.org/html/rfc3492#section-7
-- Arabic (Egyptian)
,punyTest "ليهمابتكلموشعربي؟"
"egbpdaj6bu4bxfgehfvwxn"
-- Chinese (simplified)
,punyTest "他们为什么不说中文"
"ihqwcrb4cv8a8dqg056pqjye"
-- Chinese (traditional)
,punyTest "他們爲什麽不說中文"
"ihqwctvzc91f659drss3x8bo0yb"
-- Czech
,punyTest "Pročprostěnemluvíčesky"
"Proprostnemluvesky-uyb24dma41a"
-- Hebrew
,punyTest "למההםפשוטלאמדבריםעברית"
"4dbcagdahymbxekheh6e0a7fei0b"
-- Hindi (Devanagari)
,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902"
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
-- Japanese (kanji and hiragana)
,punyTest "なぜみんな日本語を話してくれないのか"
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
-- Korean (Hangul syllables)
,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까"
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
-- Russian (Cyrillic)
,punyTest "почемужеонинеговорятпорусски"
"b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there
-- Spanish
,punyTest "PorquénopuedensimplementehablarenEspañol"
"PorqunopuedensimplementehablarenEspaol-fmd56a"
-- Vietnamese
,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt"
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
{-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\
\bar\xE2\x80\x8B\xE2\x81\xA0\
\baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF"
"foobarbaz"-}
]
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
[EncodingTest punycode "abcdef"
(map (fromIntegral.ord) "abcdef-")
,EncodingTest punycode "abæcdöef"
(map (fromIntegral.ord) "abcdef-qua4k")
,EncodingTest punycode "schön"
(map (fromIntegral.ord) "schn-7qa")
,EncodingTest punycode "ยจฆฟคฏข"
(map (fromIntegral.ord) "22cdfh1b8fsa")
,EncodingTest punycode ""
(map (fromIntegral.ord) "74h")
-- taken from http://tools.ietf.org/html/rfc3492#section-7
-- Arabic (Egyptian)
,punyTest "ليهمابتكلموشعربي؟"
"egbpdaj6bu4bxfgehfvwxn"
-- Chinese (simplified)
,punyTest "他们为什么不说中文"
"ihqwcrb4cv8a8dqg056pqjye"
-- Chinese (traditional)
,punyTest "他們爲什麽不說中文"
"ihqwctvzc91f659drss3x8bo0yb"
-- Czech
,punyTest "Pročprostěnemluvíčesky"
"Proprostnemluvesky-uyb24dma41a"
-- Hebrew
,punyTest "למההםפשוטלאמדבריםעברית"
"4dbcagdahymbxekheh6e0a7fei0b"
-- Hindi (Devanagari)
,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902"
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
-- Japanese (kanji and hiragana)
,punyTest "なぜみんな日本語を話してくれないのか"
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
-- Korean (Hangul syllables)
,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까"
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
-- Russian (Cyrillic)
,punyTest "почемужеонинеговорятпорусски"
"b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there
-- Spanish
,punyTest "PorquénopuedensimplementehablarenEspañol"
"PorqunopuedensimplementehablarenEspaol-fmd56a"
-- Vietnamese
,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt"
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
{-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\
\bar\xE2\x80\x8B\xE2\x81\xA0\
\baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF"
"foobarbaz"-}
]
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
isoTests :: Test
isoTests = TestList $ map test $