From 8727ac25a5e552d9ec9de3b7385dfe6459519f4f Mon Sep 17 00:00:00 2001 From: Scott Sedgwick Date: Fri, 28 Jul 2017 14:17:39 +1000 Subject: [PATCH] Made package stack compatible Created and added stack.yaml and .gitignore files. Relaxed the version dependency on 'binary' package in cabal file. Is that OK? Also brought the minimum cabal version to >=1.8, so I could add a test target that pulls in the library. Changed all tabs to spaces - I don't know when the Haskell compiler started giving warnings about that. --- .gitignore | 22 ++ Data/Encoding.hs | 6 +- Data/Encoding/BootString.hs | 160 ++++++------ Data/Encoding/Exception.hs | 18 +- Data/Encoding/KOI8R.hs | 44 ++-- Data/Encoding/KOI8U.hs | 44 ++-- Data/Encoding/Preprocessor/XMLMapping.hs | 12 +- Data/Encoding/UTF16.hs | 6 +- Data/Encoding/UTF32.hs | 8 +- System/IO/Encoding.hs | 46 +++- encoding.cabal | 37 ++- stack.yaml | 66 +++++ tests/Main.hs | 22 +- tests/Test/Tester.hs | 16 +- tests/Test/Tests.hs | 316 +++++++++++------------ 15 files changed, 478 insertions(+), 345 deletions(-) create mode 100644 .gitignore create mode 100644 stack.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1f628fe --- /dev/null +++ b/.gitignore @@ -0,0 +1,22 @@ +### Haskell ### +dist +dist-* +cabal-dev +*.o +*.hi +*.chi +*.chs.h +*.dyn_o +*.dyn_hi +.hpc +.hsenv +.cabal-sandbox/ +cabal.sandbox.config +*.prof +*.aux +*.hp +*.eventlog +.stack-work/ +cabal.project.local +.HTF/ + diff --git a/Data/Encoding.hs b/Data/Encoding.hs index 447088c..78c086d 100644 --- a/Data/Encoding.hs +++ b/Data/Encoding.hs @@ -359,6 +359,6 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of -- | Takes the name of an encoding and creates a dynamic encoding from it. encodingFromString :: String -> DynEncoding encodingFromString str = maybe - (error $ "Data.Encoding.encodingFromString: Unknown encoding: "++show str) - id - (encodingFromStringExplicit str) + (error $ "Data.Encoding.encodingFromString: Unknown encoding: "++show str) + id + (encodingFromStringExplicit str) diff --git a/Data/Encoding/BootString.hs b/Data/Encoding/BootString.hs index 8e86971..b7f0cd4 100644 --- a/Data/Encoding/BootString.hs +++ b/Data/Encoding/BootString.hs @@ -2,8 +2,8 @@ {- | This implements BootString en- and decoding, the foundation of Punycode -} module Data.Encoding.BootString - (BootString(..) - ,punycode) where + (BootString(..) + ,punycode) where import Data.Encoding.Base import Data.Encoding.Exception @@ -17,77 +17,77 @@ import Data.Typeable import Control.Monad (when) data BootString = BootString - {base :: Int - ,tmin :: Int - ,tmax :: Int - ,skew :: Int - ,damp :: Int - ,init_bias :: Int - ,init_n :: Int - } - deriving (Show,Eq,Typeable) + {base :: Int + ,tmin :: Int + ,tmax :: Int + ,skew :: Int + ,damp :: Int + ,init_bias :: Int + ,init_n :: Int + } + deriving (Show,Eq,Typeable) punycode :: BootString punycode = BootString - {base = 36 - ,tmin = 1 - ,tmax = 26 - ,skew = 38 - ,damp = 700 - ,init_bias = 72 - ,init_n = 0x80 - } + {base = 36 + ,tmin = 1 + ,tmax = 26 + ,skew = 38 + ,damp = 700 + ,init_bias = 72 + ,init_n = 0x80 + } punyValue :: ByteSource m => Word8 -> m Int punyValue c - | n < 0x30 = norep - | n <= 0x39 = return $ n-0x30+26 - | n < 0x41 = norep - | n <= 0x5A = return $ n-0x41 - | n < 0x61 = norep - | n <= 0x7A = return $ n-0x61 - | otherwise = norep - where - n = fromIntegral c - norep = throwException (IllegalCharacter c) + | n < 0x30 = norep + | n <= 0x39 = return $ n-0x30+26 + | n < 0x41 = norep + | n <= 0x5A = return $ n-0x41 + | n < 0x61 = norep + | n <= 0x7A = return $ n-0x61 + | otherwise = norep + where + n = fromIntegral c + norep = throwException (IllegalCharacter c) punyChar :: ByteSink m => Int -> m Word8 punyChar c - | c < 0 = norep - | c < 26 = return $ fromIntegral $ 0x61+c - | c < 36 = return $ fromIntegral $ 0x30+c-26 - | otherwise = norep - where - norep = throwException (HasNoRepresentation (chr c)) + | c < 0 = norep + | c < 26 = return $ fromIntegral $ 0x61+c + | c < 36 = return $ fromIntegral $ 0x30+c-26 + | otherwise = norep + where + norep = throwException (HasNoRepresentation (chr c)) getT :: BootString -> Int -> Int -> Int getT bs k bias - | k <= bias + (tmin bs) = tmin bs - | k >= bias + (tmax bs) = tmax bs - | otherwise = k-bias + | k <= bias + (tmin bs) = tmin bs + | k >= bias + (tmax bs) = tmax bs + | otherwise = k-bias adapt :: BootString -> Int -> Int -> Bool -> Int adapt bs delta numpoints firsttime = let - delta1 = if firsttime - then delta `div` (damp bs) - else delta `div` 2 - delta2 = delta1 + (delta1 `div` numpoints) - (rd,rk) = head - $ filter ((<=((base bs - tmin bs) * (tmax bs)) `div` 2).fst) - $ iterate (\(d,k) -> (d `div` (base bs - tmin bs),k+(base bs))) (delta2,0) - in rk + (((base bs - tmin bs +1) * rd) `div` (rd + skew bs)) + delta1 = if firsttime + then delta `div` (damp bs) + else delta `div` 2 + delta2 = delta1 + (delta1 `div` numpoints) + (rd,rk) = head + $ filter ((<=((base bs - tmin bs) * (tmax bs)) `div` 2).fst) + $ iterate (\(d,k) -> (d `div` (base bs - tmin bs),k+(base bs))) (delta2,0) + in rk + (((base bs - tmin bs +1) * rd) `div` (rd + skew bs)) decodeValue :: ByteSource m => BootString -> Int -> Int -> Int -> Int -> [Int] -> m (Int,[Int]) decodeValue bs bias i k w (x:xs) - | x >= base bs = throwException OutOfRange - | x > (maxBound - i) `div` w = throwException OutOfRange - | x < t = return (ni,xs) - | w > maxBound `div` (base bs - t) = throwException OutOfRange + | x >= base bs = throwException OutOfRange + | x > (maxBound - i) `div` w = throwException OutOfRange + | x < t = return (ni,xs) + | w > maxBound `div` (base bs - t) = throwException OutOfRange | null xs = throwException OutOfRange - | otherwise = decodeValue bs bias ni (k+base bs) (w*(base bs - t)) xs - where - ni = i + x*w - t = getT bs k bias + | otherwise = decodeValue bs bias ni (k+base bs) (w*(base bs - t)) xs + where + ni = i + x*w + t = getT bs k bias decodeValues :: ByteSource m => BootString -> Int -> [Int] -> m [(Char,Int)] decodeValues bs len xs = decodeValues' bs (init_n bs) 0 (init_bias bs) len xs @@ -108,8 +108,8 @@ decodeValues' bs n i bias len xs = do insertDeltas :: [(a,Int)] -> [a] -> [a] insertDeltas [] str = str insertDeltas ((c,p):xs) str = let - (l,r) = splitAt p str - in insertDeltas xs (l++[c]++r) + (l,r) = splitAt p str + in insertDeltas xs (l++[c]++r) punyDecode :: ByteSource m => [Word8] -> [Word8] -> m String punyDecode base ext = do @@ -119,32 +119,32 @@ punyDecode base ext = do encodeValue :: BootString -> Int -> Int -> Int -> Int -> [Int] encodeValue bs bias delta n c = unfoldr (\(q,k,out) -> let - t = getT bs k bias - (nq,dc) = (q-t) `divMod` (base bs - t) - in if out - then Nothing - else (if q < t - then Just (q,(q,k+base bs,True)) - else Just (t + dc,(nq,k+base bs,False))) - ) (delta,base bs,False) + t = getT bs k bias + (nq,dc) = (q-t) `divMod` (base bs - t) + in if out + then Nothing + else (if q < t + then Just (q,(q,k+base bs,True)) + else Just (t + dc,(nq,k+base bs,False))) + ) (delta,base bs,False) encodeValues' :: BootString -> Int -> Int -> Int -> Int -> Int -> [Int] -> (Int,Int,Int,[Int]) encodeValues' _ _ h bias delta _ [] = (delta,h,bias,[]) encodeValues' bs b h bias delta n (c:cs) = case compare c n of - LT -> encodeValues' bs b h bias (delta+1) n cs - GT -> encodeValues' bs b h bias delta n cs - EQ -> let - (ndelta,nh,nbias,rest) = encodeValues' bs b (h+1) (adapt bs delta (h+1) (h==b)) 0 n cs - xs = encodeValue bs bias delta n c - in (ndelta,nh,nbias,xs++rest) + LT -> encodeValues' bs b h bias (delta+1) n cs + GT -> encodeValues' bs b h bias delta n cs + EQ -> let + (ndelta,nh,nbias,rest) = encodeValues' bs b (h+1) (adapt bs delta (h+1) (h==b)) 0 n cs + xs = encodeValue bs bias delta n c + in (ndelta,nh,nbias,xs++rest) encodeValues :: BootString -> Int -> Int -> Int -> Int -> Int -> Int -> [Int] -> [Int] encodeValues bs b l h bias delta n cps - | h == l = [] - | otherwise = outp++encodeValues bs b l nh nbias (ndelta+1) (m+1) cps - where - m = minimum (filter (>=n) cps) - (ndelta,nh,nbias,outp) = encodeValues' bs b h bias (delta + (m - n)*(h + 1)) m cps + | h == l = [] + | otherwise = outp++encodeValues bs b l nh nbias (ndelta+1) (m+1) cps + where + m = minimum (filter (>=n) cps) + (ndelta,nh,nbias,outp) = encodeValues' bs b h bias (delta + (m - n)*(h + 1)) m cps breakLast :: (a -> Bool) -> [a] -> Maybe ([a],[a]) breakLast p xs = do @@ -166,7 +166,7 @@ instance Encoding BootString where encodeChar _ c = error "Data.Encoding.BootString.encodeChar: Please use 'encode' for encoding BootStrings" decodeChar _ = error "Data.Encoding.BootString.decodeChar: Please use 'decode' for decoding BootStrings" encode bs str = let (base,nbase) = partition (\c -> ord c < init_n bs) str - b = length base + b = length base in do res <- mapM punyChar $ encodeValues bs b (length str) b (init_bias bs) 0 (init_n bs) (map ord str) when (not $ null base) $ do @@ -178,8 +178,8 @@ instance Encoding BootString where let m = fromIntegral $ ord '-' case breakLast (==m) wrds of Just ([],_) -> throwException (IllegalCharacter m) - Just (base,_:nbase) -> case find (\w -> fromIntegral w > init_n bs) base of - Nothing -> punyDecode base nbase - Just ww -> throwException (IllegalCharacter ww) - Nothing -> punyDecode [] wrds + Just (base,_:nbase) -> case find (\w -> fromIntegral w > init_n bs) base of + Nothing -> punyDecode base nbase + Just ww -> throwException (IllegalCharacter ww) + Nothing -> punyDecode [] wrds encodeable bs c = True -- XXX: hm, really? diff --git a/Data/Encoding/Exception.hs b/Data/Encoding/Exception.hs index cdbe8ec..6c52556 100644 --- a/Data/Encoding/Exception.hs +++ b/Data/Encoding/Exception.hs @@ -9,8 +9,8 @@ import Control.Monad.Identity -- | This exception type is thrown whenever something went wrong during the -- encoding-process. data EncodingException - = HasNoRepresentation Char -- ^ Thrown if a specific character - -- is not representable in an encoding. + = HasNoRepresentation Char -- ^ Thrown if a specific character + -- is not representable in an encoding. deriving (Eq,Ord,Show,Read,Typeable) instance Exception EncodingException @@ -18,13 +18,13 @@ instance Exception EncodingException -- | This exception type is thrown whenever something went wrong during the -- decoding-process. data DecodingException - = IllegalCharacter Word8 -- ^ The sequence contained an illegal - -- byte that couldn't be decoded. - | UnexpectedEnd -- ^ more bytes were needed to allow a - -- successfull decoding. - | OutOfRange -- ^ the decoded value was out of the unicode range - | IllegalRepresentation [Word8] -- ^ The character sequence encodes a - -- character, but is illegal. + = IllegalCharacter Word8 -- ^ The sequence contained an illegal + -- byte that couldn't be decoded. + | UnexpectedEnd -- ^ more bytes were needed to allow a + -- successfull decoding. + | OutOfRange -- ^ the decoded value was out of the unicode range + | IllegalRepresentation [Word8] -- ^ The character sequence encodes a + -- character, but is illegal. deriving (Eq,Ord,Show,Read,Typeable) instance Exception DecodingException diff --git a/Data/Encoding/KOI8R.hs b/Data/Encoding/KOI8R.hs index 9201591..5f3391d 100644 --- a/Data/Encoding/KOI8R.hs +++ b/Data/Encoding/KOI8R.hs @@ -3,7 +3,7 @@ See for more information. -} module Data.Encoding.KOI8R - (KOI8R(..)) where + (KOI8R(..)) where import Control.Throws import Data.Array.Unboxed @@ -28,23 +28,23 @@ koi8rMap = fromList (zip koi8rList [128..]) koi8rList :: [Char] koi8rList = - ['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524' - ,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590' - ,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248' - ,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7' - ,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556' - ,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e' - ,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565' - ,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9' - ,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433' - ,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e' - ,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432' - ,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a' - ,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413' - ,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e' - ,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412' - ,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a' - ] + ['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524' + ,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590' + ,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248' + ,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7' + ,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556' + ,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e' + ,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565' + ,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9' + ,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433' + ,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e' + ,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432' + ,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a' + ,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413' + ,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e' + ,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412' + ,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a' + ] instance Encoding KOI8R where decodeChar _ = do @@ -53,8 +53,8 @@ instance Encoding KOI8R where then return $ chr $ fromIntegral w else return $ koi8rArr!w encodeChar _ ch - | ch < '\128' = pushWord8 $ fromIntegral $ ord ch - | otherwise = case lookup ch koi8rMap of - Just w -> pushWord8 w - Nothing -> throwException (HasNoRepresentation ch) + | ch < '\128' = pushWord8 $ fromIntegral $ ord ch + | otherwise = case lookup ch koi8rMap of + Just w -> pushWord8 w + Nothing -> throwException (HasNoRepresentation ch) encodeable _ c = member c koi8rMap diff --git a/Data/Encoding/KOI8U.hs b/Data/Encoding/KOI8U.hs index d80cebc..a951eb6 100644 --- a/Data/Encoding/KOI8U.hs +++ b/Data/Encoding/KOI8U.hs @@ -3,7 +3,7 @@ See for more information. -} module Data.Encoding.KOI8U - (KOI8U(..)) where + (KOI8U(..)) where import Data.Encoding.Base import Data.Encoding.ByteSource @@ -28,23 +28,23 @@ koi8uMap = fromList (zip koi8uList [128..]) koi8uList :: [Char] koi8uList = - ['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524' - ,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590' - ,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248' - ,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7' - ,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457' - ,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e' - ,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407' - ,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9' - ,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433' - ,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e' - ,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432' - ,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a' - ,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413' - ,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e' - ,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412' - ,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a' - ] + ['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524' + ,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590' + ,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248' + ,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7' + ,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457' + ,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e' + ,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407' + ,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9' + ,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433' + ,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e' + ,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432' + ,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a' + ,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413' + ,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e' + ,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412' + ,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a' + ] instance Encoding KOI8U where decodeChar _ = do @@ -53,8 +53,8 @@ instance Encoding KOI8U where then return $ chr $ fromIntegral w else return $ koi8uArr!w encodeChar _ ch - | ch < '\128' = pushWord8 $ fromIntegral $ ord ch - | otherwise = case lookup ch koi8uMap of - Just w -> pushWord8 w - Nothing -> throwException (HasNoRepresentation ch) + | ch < '\128' = pushWord8 $ fromIntegral $ ord ch + | otherwise = case lookup ch koi8uMap of + Just w -> pushWord8 w + Nothing -> throwException (HasNoRepresentation ch) encodeable _ c = member c koi8uMap diff --git a/Data/Encoding/Preprocessor/XMLMapping.hs b/Data/Encoding/Preprocessor/XMLMapping.hs index bbe64f8..eae8971 100644 --- a/Data/Encoding/Preprocessor/XMLMapping.hs +++ b/Data/Encoding/Preprocessor/XMLMapping.hs @@ -47,14 +47,14 @@ data CharacterMapping_normalization = CharacterMapping_normalization_undetermine deriving (Eq,Show) data Stateful_siso = Stateful_siso Validity Validity deriving (Eq,Show) -newtype History = History (List1 Modified) deriving (Eq,Show) +newtype History = History (List1 Modified) deriving (Eq,Show) data Modified = Modified Modified_Attrs String deriving (Eq,Show) data Modified_Attrs = Modified_Attrs { modifiedVersion :: String , modifiedDate :: String } deriving (Eq,Show) -newtype Validity = Validity (List1 State) deriving (Eq,Show) +newtype Validity = Validity (List1 State) deriving (Eq,Show) data State = State { stateType :: String , stateNext :: String @@ -112,10 +112,10 @@ data Escape = Escape { escapeSequence :: String , escapeName :: String } deriving (Eq,Show) -newtype Si = Si (List1 Designator) deriving (Eq,Show) -newtype So = So (List1 Designator) deriving (Eq,Show) -newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show) -newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show) +newtype Si = Si (List1 Designator) deriving (Eq,Show) +newtype So = So (List1 Designator) deriving (Eq,Show) +newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show) +newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show) data Designator = Designator { designatorSequence :: String , designatorName :: String diff --git a/Data/Encoding/UTF16.hs b/Data/Encoding/UTF16.hs index d24b243..086ea67 100644 --- a/Data/Encoding/UTF16.hs +++ b/Data/Encoding/UTF16.hs @@ -18,9 +18,9 @@ import Data.Typeable import Data.Word data UTF16 - = UTF16 -- ^ Decodes big and little endian, encodes big endian. - | UTF16BE -- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian. - | UTF16LE -- ^ Little endian decoding and encoding. + = UTF16 -- ^ Decodes big and little endian, encodes big endian. + | UTF16BE -- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian. + | UTF16LE -- ^ Little endian decoding and encoding. deriving (Eq,Show,Typeable) readBOM :: ByteSource m => m (Either Char UTF16) diff --git a/Data/Encoding/UTF32.hs b/Data/Encoding/UTF32.hs index 18f6210..5c100fe 100644 --- a/Data/Encoding/UTF32.hs +++ b/Data/Encoding/UTF32.hs @@ -16,10 +16,10 @@ import Data.Typeable data UTF32 - = UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present. - | UTF32BE -- ^ Encodes and decodes using the big endian encoding. - | UTF32LE -- ^ Encodes and decodes using the little endian encoding. - deriving (Eq,Show,Typeable) + = UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present. + | UTF32BE -- ^ Encodes and decodes using the big endian encoding. + | UTF32LE -- ^ Encodes and decodes using the little endian encoding. + deriving (Eq,Show,Typeable) instance Encoding UTF32 where encodeChar UTF32LE ch = pushWord32le (fromIntegral $ ord ch) diff --git a/System/IO/Encoding.hs b/System/IO/Encoding.hs index 0e93eb7..793d135 100644 --- a/System/IO/Encoding.hs +++ b/System/IO/Encoding.hs @@ -67,17 +67,23 @@ import Control.Monad.Reader (runReaderT) -- encoding. hGetContents :: (Encoding e,?enc :: e) => Handle -> IO String hGetContents h = do - str <- LBS.hGetContents h - return $ decodeLazyByteString ?enc str + str <- LBS.hGetContents h + return $ decodeLazyByteString ?enc str +-- | Like the normal 'System.IO.getContents', but decodes the input using an +-- encoding. getContents :: (Encoding e,?enc :: e) => IO String getContents = do str <- LBS.getContents return $ decodeLazyByteString ?enc str +-- | Like the normal 'System.IO.putStr', but decodes the input using an +-- encoding. putStr :: (Encoding e,?enc :: e) => String -> IO () putStr = hPutStr stdout +-- | Like the normal 'System.IO.putStrLn', but decodes the input using an +-- encoding. putStrLn :: (Encoding e,?enc :: e) => String -> IO () putStrLn = hPutStrLn stdout @@ -86,46 +92,72 @@ putStrLn = hPutStrLn stdout hPutStr :: (Encoding e,?enc :: e) => Handle -> String -> IO () hPutStr h str = LBS.hPut h (encodeLazyByteString ?enc str) +-- | Like the normal 'System.IO.hPutStrLn', but decodes the input using an +-- encoding. hPutStrLn :: (Encoding e,?enc :: e) => Handle -> String -> IO () hPutStrLn h str = do LBS.hPut h (encodeLazyByteString ?enc str) LBS.hPut h (encodeLazyByteString ?enc "\n") +-- | Like the normal 'System.IO.print', but decodes the input using an +-- encoding. print :: (Encoding e,Show a,?enc :: e) => a -> IO () print = hPrint stdout +-- | Like the normal 'System.IO.hPrint', but decodes the input using an +-- encoding. hPrint :: (Encoding e,Show a,?enc :: e) => Handle -> a -> IO () hPrint h x = hPutStrLn h (show x) +-- | Like the normal 'System.IO.readFile', but decodes the input using an +-- encoding. readFile :: (Encoding e,?enc :: e) => FilePath -> IO String readFile fn = LBS.readFile fn >>= return.(decodeLazyByteString ?enc) +-- | Like the normal 'System.IO.writeFile', but decodes the input using an +-- encoding. writeFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO () writeFile fn str = LBS.writeFile fn $ encodeLazyByteString ?enc str +-- | Like the normal 'System.IO.appendFile', but decodes the input using an +-- encoding. appendFile :: (Encoding e,?enc :: e) => FilePath -> String -> IO () appendFile fn str = LBS.appendFile fn $ encodeLazyByteString ?enc str +-- | Like the normal 'System.IO.getChar', but decodes the input using an +-- encoding. getChar :: (Encoding e,?enc :: e) => IO Char getChar = hGetChar stdin +-- | Like the normal 'System.IO.hGetChar', but decodes the input using an +-- encoding. hGetChar :: (Encoding e,?enc :: e) => Handle -> IO Char hGetChar h = runReaderT (decodeChar ?enc) h +-- | Like the normal 'System.IO.getLine', but decodes the input using an +-- encoding. getLine :: (Encoding e,?enc :: e) => IO String getLine = hGetLine stdin +-- | Like the normal 'System.IO.hGetLine', but decodes the input using an +-- encoding. hGetLine :: (Encoding e,?enc :: e) => Handle -> IO String hGetLine h = do line <- BS.hGetLine h return $ decodeStrictByteString ?enc line +-- | Like the normal 'System.IO.putChar', but decodes the input using an +-- encoding. putChar :: (Encoding e,?enc :: e) => Char -> IO () putChar = hPutChar stdout +-- | Like the normal 'System.IO.hPutChar', but decodes the input using an +-- encoding. hPutChar :: (Encoding e,?enc :: e) => Handle -> Char -> IO () hPutChar h c = runReaderT (encodeChar ?enc c) h +-- | Like the normal 'System.IO.interact', but decodes the input using an +-- encoding. interact :: (Encoding e,?enc :: e) => (String -> String) -> IO () interact f = do line <- hGetLine stdin @@ -133,7 +165,7 @@ interact f = do #ifdef SYSTEM_ENCODING foreign import ccall "system_encoding.h get_system_encoding" - get_system_encoding :: IO CString + get_system_encoding :: IO CString #endif -- | Returns the encoding used on the current system. Currently only supported @@ -141,9 +173,9 @@ foreign import ccall "system_encoding.h get_system_encoding" getSystemEncoding :: IO DynEncoding getSystemEncoding = do #ifdef SYSTEM_ENCODING - enc <- get_system_encoding - str <- peekCString enc - return $ encodingFromString str + enc <- get_system_encoding + str <- peekCString enc + return $ encodingFromString str #else - error "getSystemEncoding is not supported on this platform" + error "getSystemEncoding is not supported on this platform" #endif diff --git a/encoding.cabal b/encoding.cabal index 12b7d26..bcd67d5 100644 --- a/encoding.cabal +++ b/encoding.cabal @@ -1,16 +1,16 @@ -Name: encoding -Version: 0.8.1 -Author: Henning Günther -Maintainer: daniel@wagner-home.com -License: BSD3 +Name: encoding +Version: 0.8.1 +Author: Henning Günther +Maintainer: daniel@wagner-home.com +License: BSD3 License-File: LICENSE -Synopsis: A library for various character encodings +Synopsis: A library for various character encodings Description: - Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that. -Category: Codec -Homepage: http://code.haskell.org/encoding/ -Cabal-Version: >=1.6 -Build-Type: Custom + Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that. +Category: Codec +Homepage: http://code.haskell.org/encoding/ +Cabal-Version: >=1.8 +Build-Type: Custom Extra-Source-Files: CHANGELOG Data/Encoding/Preprocessor/Mapping.hs @@ -45,7 +45,7 @@ Custom-Setup Library Build-Depends: array, base >=3 && <5, - binary < 0.8, + binary, bytestring, containers, extensible-exceptions, @@ -132,3 +132,16 @@ Library C-Sources: system_encoding.c CPP-Options: -DSYSTEM_ENCODING + +test-suite encoding-test + type: exitcode-stdio-1.0 + hs-source-dirs: tests + main-is: Main.hs + other-modules: Test.Tester + , Test.Tests + build-depends: base + , bytestring + , encoding + , HUnit + , QuickCheck + ghc-options: -threaded -rtsopts -with-rtsopts=-N \ No newline at end of file diff --git a/stack.yaml b/stack.yaml new file mode 100644 index 0000000..6cbacba --- /dev/null +++ b/stack.yaml @@ -0,0 +1,66 @@ +# This file was automatically generated by 'stack init' +# +# Some commonly used options have been documented as comments in this file. +# For advanced use and comprehensive documentation of the format, please see: +# http://docs.haskellstack.org/en/stable/yaml_configuration/ + +# Resolver to choose a 'specific' stackage snapshot or a compiler version. +# A snapshot resolver dictates the compiler version and the set of packages +# to be used for project dependencies. For example: +# +# resolver: lts-3.5 +# resolver: nightly-2015-09-21 +# resolver: ghc-7.10.2 +# resolver: ghcjs-0.1.0_ghc-7.10.2 +# resolver: +# name: custom-snapshot +# location: "./custom-snapshot.yaml" +resolver: lts-8.22 + +# User packages to be built. +# Various formats can be used as shown in the example below. +# +# packages: +# - some-directory +# - https://example.com/foo/bar/baz-0.0.2.tar.gz +# - location: +# git: https://github.com/commercialhaskell/stack.git +# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# extra-dep: true +# subdirs: +# - auto-update +# - wai +# +# A package marked 'extra-dep: true' will only be built if demanded by a +# non-dependency (i.e. a user package), and its test suites and benchmarks +# will not be run. This is useful for tweaking upstream packages. +packages: +- '.' +# Dependency packages to be pulled from upstream that are not in the resolver +# (e.g., acme-missiles-0.3) +extra-deps: [] + +# Override default flag values for local packages and extra-deps +flags: {} + +# Extra package databases containing global packages +extra-package-dbs: [] + +# Control whether we use the GHC we find on the path +# system-ghc: true +# +# Require a specific version of stack, using version ranges +# require-stack-version: -any # Default +# require-stack-version: ">=1.4" +# +# Override the architecture used by stack, especially useful on Windows +# arch: i386 +# arch: x86_64 +# +# Extra directories used by stack for building +# extra-include-dirs: [/path/to/dir] +# extra-lib-dirs: [/path/to/dir] +# +# Allow a newer minor version of GHC than the snapshot specifies +# compiler-check: newer-minor \ No newline at end of file diff --git a/tests/Main.hs b/tests/Main.hs index 0b14f1c..e2d4e5d 100644 --- a/tests/Main.hs +++ b/tests/Main.hs @@ -3,16 +3,16 @@ import Test.HUnit import Test.Tests hunitTests = - [ ("utf8Tests", utf8Tests) - , ("utf16Tests", utf16Tests) - , ("punycodeTests", punycodeTests) - , ("isoTests", isoTests) - , ("jisTests", jisTests) - , ("gb18030Tests", gb18030Tests) - ] + [ ("utf8Tests", utf8Tests) + , ("utf16Tests", utf16Tests) + , ("punycodeTests", punycodeTests) + , ("isoTests", isoTests) + , ("jisTests", jisTests) + , ("gb18030Tests", gb18030Tests) + ] main = do - identityTests - forM_ hunitTests $ \(name, test) -> do - putStrLn $ "running " ++ name - runTestTT test >>= print + identityTests + forM_ hunitTests $ \(name, test) -> do + putStrLn $ "running " ++ name + runTestTT test >>= print diff --git a/tests/Test/Tester.hs b/tests/Test/Tester.hs index 8dbc85d..b853d87 100644 --- a/tests/Test/Tester.hs +++ b/tests/Test/Tester.hs @@ -13,14 +13,14 @@ import Prelude hiding (readFile) import System.IO.Encoding data EncodingTest - = forall enc. (Encoding enc,Show enc) => - EncodingTest enc String [Word8] - | forall enc. (Encoding enc,Show enc) => - EncodingFileTest enc FilePath FilePath - | forall enc. (Encoding enc,Show enc) => - DecodingError enc [Word8] DecodingException - | forall enc. (Encoding enc,Show enc) => - EncodingError enc String EncodingException + = forall enc. (Encoding enc,Show enc) => + EncodingTest enc String [Word8] + | forall enc. (Encoding enc,Show enc) => + EncodingFileTest enc FilePath FilePath + | forall enc. (Encoding enc,Show enc) => + DecodingError enc [Word8] DecodingException + | forall enc. (Encoding enc,Show enc) => + EncodingError enc String EncodingException instance Testable EncodingTest where test (EncodingTest enc src trg) diff --git a/tests/Test/Tests.hs b/tests/Test/Tests.hs index 1bf1207..9d0f530 100644 --- a/tests/Test/Tests.hs +++ b/tests/Test/Tests.hs @@ -70,173 +70,173 @@ identityTests = do utf8Tests :: Test utf8Tests = TestList $ map test $ concat [[EncodingTest enc "\x0041\x2262\x0391\x002E" - [0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E] - ,EncodingTest enc "\xD55C\xAD6D\xC5B4" - [0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4] - ,EncodingTest enc "\x65E5\x672C\x8A9E" - [0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E] - ,EncodingTest enc "\x233B4" - [0xF0,0xA3,0x8E,0xB4] - ,EncodingTest enc "" - [] - -- First possible sequence of a certain length - ,EncodingTest enc "\x0000" - [0x00] - ,EncodingTest enc "\x0080" - [0xC2,0x80] - ,EncodingTest enc "\x0800" - [0xE0,0xA0,0x80] - ,EncodingTest enc "\x10000" - [0xF0,0x90,0x80,0x80] - -- Last possible sequence of a certain length - ,EncodingTest enc "\x007F" - [0x7F] - ,EncodingTest enc "\x07FF" - [0xDF,0xBF] - ,EncodingTest enc "\xFFFF" - [0xEF,0xBF,0xBF] - -- Other boundaries - ,EncodingTest enc "\xD7FF" - [0xED,0x9F,0xBF] - ,EncodingTest enc "\xE000" - [0xEE,0x80,0x80] - ,EncodingTest enc "\xFFFD" - [0xEF,0xBF,0xBD] - -- Illegal starting characters - ,DecodingError enc - [0x65,0x55,0x85] - (IllegalCharacter 0x85) - -- Unexpected end - ,DecodingError enc - [0x41,0xE2,0x89,0xA2,0xCE] - UnexpectedEnd - ,DecodingError enc - [0x41,0xE2,0x89] - UnexpectedEnd - ,DecodingError enc - [0x41,0xE2] - UnexpectedEnd + [0x41,0xE2,0x89,0xA2,0xCE,0x91,0x2E] + ,EncodingTest enc "\xD55C\xAD6D\xC5B4" + [0xED,0x95,0x9C,0xEA,0xB5,0xAD,0xEC,0x96,0xB4] + ,EncodingTest enc "\x65E5\x672C\x8A9E" + [0xE6,0x97,0xA5,0xE6,0x9C,0xAC,0xE8,0xAA,0x9E] + ,EncodingTest enc "\x233B4" + [0xF0,0xA3,0x8E,0xB4] + ,EncodingTest enc "" + [] + -- First possible sequence of a certain length + ,EncodingTest enc "\x0000" + [0x00] + ,EncodingTest enc "\x0080" + [0xC2,0x80] + ,EncodingTest enc "\x0800" + [0xE0,0xA0,0x80] + ,EncodingTest enc "\x10000" + [0xF0,0x90,0x80,0x80] + -- Last possible sequence of a certain length + ,EncodingTest enc "\x007F" + [0x7F] + ,EncodingTest enc "\x07FF" + [0xDF,0xBF] + ,EncodingTest enc "\xFFFF" + [0xEF,0xBF,0xBF] + -- Other boundaries + ,EncodingTest enc "\xD7FF" + [0xED,0x9F,0xBF] + ,EncodingTest enc "\xE000" + [0xEE,0x80,0x80] + ,EncodingTest enc "\xFFFD" + [0xEF,0xBF,0xBD] + -- Illegal starting characters + ,DecodingError enc + [0x65,0x55,0x85] + (IllegalCharacter 0x85) + -- Unexpected end + ,DecodingError enc + [0x41,0xE2,0x89,0xA2,0xCE] + UnexpectedEnd + ,DecodingError enc + [0x41,0xE2,0x89] + UnexpectedEnd + ,DecodingError enc + [0x41,0xE2] + UnexpectedEnd ] | enc <- [UTF8,UTF8Strict] ]++ - [DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE) - ,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF) - -- Overlong representations of '/' - ,DecodingError UTF8Strict [0xC0,0xAF] - (IllegalRepresentation [0xC0,0xAF]) - ,DecodingError UTF8Strict [0xE0,0x80,0xAF] - (IllegalRepresentation [0xE0,0x80,0xAF]) - ,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF] - (IllegalRepresentation [0xF0,0x80,0x80,0xAF]) - -- Maximum overlong sequences - ,DecodingError UTF8Strict [0xC1,0xBF] - (IllegalRepresentation [0xC1,0xBF]) - ,DecodingError UTF8Strict [0xE0,0x9F,0xBF] - (IllegalRepresentation [0xE0,0x9F,0xBF]) - ,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF] - (IllegalRepresentation [0xF0,0x8F,0xBF,0xBF]) - -- Overlong represenations of '\NUL' - ,DecodingError UTF8Strict [0xC0,0x80] - (IllegalRepresentation [0xC0,0x80]) - ,DecodingError UTF8Strict [0xE0,0x80,0x80] - (IllegalRepresentation [0xE0,0x80,0x80]) - ,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80] - (IllegalRepresentation [0xF0,0x80,0x80,0x80]) - -- Invalid extends - -- 2 of 2 - ,DecodingError UTF8Strict [0xCC,0x1C,0xE0] - (IllegalCharacter 0x1C) - -- 2 of 3 - ,DecodingError UTF8Strict [0xE3,0x6C,0xB3] - (IllegalCharacter 0x6C) - -- 3 of 3 - ,DecodingError UTF8Strict [0xE3,0xB4,0x6D] - (IllegalCharacter 0x6D) - -- 2 of 4 - ,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3] - (IllegalCharacter 0x6C) - -- 3 of 4 - ,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3] - (IllegalCharacter 0x6C) - -- 4 of 4 - ,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C] - (IllegalCharacter 0x6C) - ] + [DecodingError UTF8 [0xFE] (IllegalCharacter 0xFE) + ,DecodingError UTF8 [0xFF] (IllegalCharacter 0xFF) + -- Overlong representations of '/' + ,DecodingError UTF8Strict [0xC0,0xAF] + (IllegalRepresentation [0xC0,0xAF]) + ,DecodingError UTF8Strict [0xE0,0x80,0xAF] + (IllegalRepresentation [0xE0,0x80,0xAF]) + ,DecodingError UTF8Strict [0xF0,0x80,0x80,0xAF] + (IllegalRepresentation [0xF0,0x80,0x80,0xAF]) + -- Maximum overlong sequences + ,DecodingError UTF8Strict [0xC1,0xBF] + (IllegalRepresentation [0xC1,0xBF]) + ,DecodingError UTF8Strict [0xE0,0x9F,0xBF] + (IllegalRepresentation [0xE0,0x9F,0xBF]) + ,DecodingError UTF8Strict [0xF0,0x8F,0xBF,0xBF] + (IllegalRepresentation [0xF0,0x8F,0xBF,0xBF]) + -- Overlong represenations of '\NUL' + ,DecodingError UTF8Strict [0xC0,0x80] + (IllegalRepresentation [0xC0,0x80]) + ,DecodingError UTF8Strict [0xE0,0x80,0x80] + (IllegalRepresentation [0xE0,0x80,0x80]) + ,DecodingError UTF8Strict [0xF0,0x80,0x80,0x80] + (IllegalRepresentation [0xF0,0x80,0x80,0x80]) + -- Invalid extends + -- 2 of 2 + ,DecodingError UTF8Strict [0xCC,0x1C,0xE0] + (IllegalCharacter 0x1C) + -- 2 of 3 + ,DecodingError UTF8Strict [0xE3,0x6C,0xB3] + (IllegalCharacter 0x6C) + -- 3 of 3 + ,DecodingError UTF8Strict [0xE3,0xB4,0x6D] + (IllegalCharacter 0x6D) + -- 2 of 4 + ,DecodingError UTF8Strict [0xF2,0x6C,0xB3,0xB3] + (IllegalCharacter 0x6C) + -- 3 of 4 + ,DecodingError UTF8Strict [0xF2,0xB3,0x6C,0xB3] + (IllegalCharacter 0x6C) + -- 4 of 4 + ,DecodingError UTF8Strict [0xF2,0xB3,0xB3,0x6C] + (IllegalCharacter 0x6C) + ] utf16Tests :: Test utf16Tests = TestList $ map test $ - [EncodingTest UTF16BE "z" - [0x00,0x7A] - ,EncodingTest UTF16BE "\x6C34" - [0x6C,0x34] - ,EncodingTest UTF16BE "\x1D11E" - [0xD8,0x34,0xDD,0x1E] - ,EncodingTest UTF16 "\x6C34z\x1D11E" - [0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E] - ,EncodingTest UTF16BE "˨" - [0x02,0xE8] - ,DecodingError UTF16LE [0x65,0xDC] - (IllegalCharacter 0xDC) - ,DecodingError UTF16BE [0xDC,0x33] - (IllegalCharacter 0xDC) - ,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33] - (IllegalCharacter 0xDA) - ,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66] - (IllegalCharacter 0xDA) - ] + [EncodingTest UTF16BE "z" + [0x00,0x7A] + ,EncodingTest UTF16BE "\x6C34" + [0x6C,0x34] + ,EncodingTest UTF16BE "\x1D11E" + [0xD8,0x34,0xDD,0x1E] + ,EncodingTest UTF16 "\x6C34z\x1D11E" + [0xFE,0xFF,0x6C,0x34,0x00,0x7A,0xD8,0x34,0xDD,0x1E] + ,EncodingTest UTF16BE "˨" + [0x02,0xE8] + ,DecodingError UTF16LE [0x65,0xDC] + (IllegalCharacter 0xDC) + ,DecodingError UTF16BE [0xDC,0x33] + (IllegalCharacter 0xDC) + ,DecodingError UTF16BE [0xD9,0x78,0xDA,0x33] + (IllegalCharacter 0xDA) + ,DecodingError UTF16BE [0xD9,0x78,0xDA,0x66] + (IllegalCharacter 0xDA) + ] punycodeTests :: Test punycodeTests = TestList $ map test $ - [EncodingTest punycode "abcdef" - (map (fromIntegral.ord) "abcdef-") - ,EncodingTest punycode "abæcdöef" - (map (fromIntegral.ord) "abcdef-qua4k") - ,EncodingTest punycode "schön" - (map (fromIntegral.ord) "schn-7qa") - ,EncodingTest punycode "ยจฆฟคฏข" - (map (fromIntegral.ord) "22cdfh1b8fsa") - ,EncodingTest punycode "☺" - (map (fromIntegral.ord) "74h") - -- taken from http://tools.ietf.org/html/rfc3492#section-7 - -- Arabic (Egyptian) - ,punyTest "ليهمابتكلموشعربي؟" - "egbpdaj6bu4bxfgehfvwxn" - -- Chinese (simplified) - ,punyTest "他们为什么不说中文" - "ihqwcrb4cv8a8dqg056pqjye" - -- Chinese (traditional) - ,punyTest "他們爲什麽不說中文" - "ihqwctvzc91f659drss3x8bo0yb" - -- Czech - ,punyTest "Pročprostěnemluvíčesky" - "Proprostnemluvesky-uyb24dma41a" - -- Hebrew - ,punyTest "למההםפשוטלאמדבריםעברית" - "4dbcagdahymbxekheh6e0a7fei0b" - -- Hindi (Devanagari) - ,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902" - "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" - -- Japanese (kanji and hiragana) - ,punyTest "なぜみんな日本語を話してくれないのか" - "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" - -- Korean (Hangul syllables) - ,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까" - "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" - -- Russian (Cyrillic) - ,punyTest "почемужеонинеговорятпорусски" - "b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there - -- Spanish - ,punyTest "PorquénopuedensimplementehablarenEspañol" - "PorqunopuedensimplementehablarenEspaol-fmd56a" - -- Vietnamese - ,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt" - "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" - {-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\ - \bar\xE2\x80\x8B\xE2\x81\xA0\ - \baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF" - "foobarbaz"-} - ] - where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp) + [EncodingTest punycode "abcdef" + (map (fromIntegral.ord) "abcdef-") + ,EncodingTest punycode "abæcdöef" + (map (fromIntegral.ord) "abcdef-qua4k") + ,EncodingTest punycode "schön" + (map (fromIntegral.ord) "schn-7qa") + ,EncodingTest punycode "ยจฆฟคฏข" + (map (fromIntegral.ord) "22cdfh1b8fsa") + ,EncodingTest punycode "☺" + (map (fromIntegral.ord) "74h") + -- taken from http://tools.ietf.org/html/rfc3492#section-7 + -- Arabic (Egyptian) + ,punyTest "ليهمابتكلموشعربي؟" + "egbpdaj6bu4bxfgehfvwxn" + -- Chinese (simplified) + ,punyTest "他们为什么不说中文" + "ihqwcrb4cv8a8dqg056pqjye" + -- Chinese (traditional) + ,punyTest "他們爲什麽不說中文" + "ihqwctvzc91f659drss3x8bo0yb" + -- Czech + ,punyTest "Pročprostěnemluvíčesky" + "Proprostnemluvesky-uyb24dma41a" + -- Hebrew + ,punyTest "למההםפשוטלאמדבריםעברית" + "4dbcagdahymbxekheh6e0a7fei0b" + -- Hindi (Devanagari) + ,punyTest "\x92F\x939\x932\x94B\x917\x939\x93F\x928\x94D\x926\x940\x915\x94D\x92F\x94B\x902\x928\x939\x940\x902\x92C\x94B\x932\x938\x915\x924\x947\x939\x948\x902" + "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" + -- Japanese (kanji and hiragana) + ,punyTest "なぜみんな日本語を話してくれないのか" + "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" + -- Korean (Hangul syllables) + ,punyTest "세계의모든사람들이한국어를이해한다면얼마나좋을까" + "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" + -- Russian (Cyrillic) + ,punyTest "почемужеонинеговорятпорусски" + "b1abfaaepdrnnbgefbadotcwatmq2g4l" -- I think the ietf made a mistake there + -- Spanish + ,punyTest "PorquénopuedensimplementehablarenEspañol" + "PorqunopuedensimplementehablarenEspaol-fmd56a" + -- Vietnamese + ,punyTest "TạisaohọkhôngthểchỉnóitiếngViệt" + "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" + {-,punyTest "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B\ + \bar\xE2\x80\x8B\xE2\x81\xA0\ + \baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF" + "foobarbaz"-} + ] + where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp) isoTests :: Test isoTests = TestList $ map test $