From b95bfe9be40d7cc0b9a2e2a947e4f635422bbc30 Mon Sep 17 00:00:00 2001 From: Henning Guenther Date: Mon, 23 Feb 2009 10:24:59 -0800 Subject: [PATCH] JIS X 0208 encoding darcs-hash:20090223182459-a4fee-98ced8f8b7bac594dc6510eeecb6bea8c51a6090 --- Data/Encoding.hs | 3 + Data/Encoding/Base.hs | 15 ++++ Data/Encoding/Helper/Template.hs | 127 +++++++++++++++++++++---------- Data/Encoding/JISX0208.hs | 6 ++ Test/Tests.hs | 15 +++- encoding.cabal | 1 + 6 files changed, 126 insertions(+), 41 deletions(-) create mode 100644 Data/Encoding/JISX0208.hs diff --git a/Data/Encoding.hs b/Data/Encoding.hs index fa292b4..92bf0ce 100644 --- a/Data/Encoding.hs +++ b/Data/Encoding.hs @@ -73,6 +73,7 @@ import Data.Encoding.KOI8U import Data.Encoding.GB18030 import Data.Encoding.MacOSRoman import Data.Encoding.JISX0201 +import Data.Encoding.JISX0208 import Data.Char import Text.Regex @@ -312,6 +313,8 @@ encodingFromStringMaybe codeName = case (normalizeEncoding codeName) of "macintosh" -> Just $ DynEncoding MacOSRoman -- JIS X 0201 "jis_x_0201" -> Just $ DynEncoding JISX0201 + -- JIS x 0208 + "jis_x_0208" -> Just $ DynEncoding JISX0208 -- defaults to nothing _ -> Nothing where diff --git a/Data/Encoding/Base.hs b/Data/Encoding/Base.hs index b0fab52..9dc3312 100644 --- a/Data/Encoding/Base.hs +++ b/Data/Encoding/Base.hs @@ -37,9 +37,24 @@ encodeWithMap mp c = case Map.lookup c mp of Nothing -> throwException $ HasNoRepresentation c Just v -> pushWord8 v +encodeWithMap2 :: ByteSink m => Map Char (Word8,Word8) -> Char -> m () +encodeWithMap2 mp c = case Map.lookup c mp of + Nothing -> throwException $ HasNoRepresentation c + Just (w1,w2) -> do + pushWord8 w1 + pushWord8 w2 + decodeWithArray :: ByteSource m => Array Word8 (Maybe Char) -> m Char decodeWithArray arr = do w <- fetchWord8 case arr!w of Nothing -> throwException $ IllegalCharacter w + Just c -> return c + +decodeWithArray2 :: ByteSource m => Array (Word8,Word8) (Maybe Char) -> m Char +decodeWithArray2 arr = do + w1 <- fetchWord8 + w2 <- fetchWord8 + case arr!(w1,w2) of + Nothing -> throwException $ IllegalCharacter w1 Just c -> return c \ No newline at end of file diff --git a/Data/Encoding/Helper/Template.hs b/Data/Encoding/Helper/Template.hs index a92270a..fa55977 100644 --- a/Data/Encoding/Helper/Template.hs +++ b/Data/Encoding/Helper/Template.hs @@ -2,6 +2,7 @@ module Data.Encoding.Helper.Template where import Data.Encoding.Base +import Data.Bits import Data.Char import Data.Maybe (mapMaybe) import Data.Map as Map (fromList,lookup) @@ -10,66 +11,112 @@ import Language.Haskell.TH makeISOInstance :: String -> FilePath -> Q [Dec] makeISOInstance name file = do - let rname = mkName name trans <- runIO (readTranslation file) mp <- encodingMap (validTranslations trans) - arr <- decodingArray (fillTranslations trans) - return [ DataD [] rname [] [NormalC rname []] [''Show] - , InstanceD [] (AppT (ConT ''Encoding) (ConT rname)) - [FunD 'encodeChar - [Clause [WildP] (NormalB $ AppE (VarE 'encodeWithMap) (VarE $ mkName "mp")) - [ValD (VarP $ mkName "mp") (NormalB mp) []] - ] - ,FunD 'decodeChar - [Clause [WildP] (NormalB $ AppE (VarE 'decodeWithArray) (VarE $ mkName "arr")) - [ValD (VarP $ mkName "arr") (NormalB arr) []] - ] - ] - ] - + arr <- decodingArray (fillTranslations 0 255 trans) + return $ encodingInstance 'encodeWithMap 'decodeWithArray name mp arr + +makeJISInstance :: String -> FilePath -> Q [Dec] +makeJISInstance name file = do + trans <- runIO (readJISTranslation file) + mp <- encodingMap2 (validTranslations trans) + arr <- decodingArray2 (fillTranslations (0x21,0x21) (0x7E,0x7E) trans) + return $ encodingInstance 'encodeWithMap2 'decodeWithArray2 name mp arr + +encodingInstance :: Name -> Name -> String -> Exp -> Exp -> [Dec] +encodingInstance enc dec name mp arr + = [ DataD [] rname [] [NormalC rname []] [''Show] + , InstanceD [] (AppT (ConT ''Encoding) (ConT rname)) + [FunD 'encodeChar + [Clause [WildP] (NormalB $ AppE (VarE enc) (VarE rmp)) + [ValD (VarP rmp) (NormalB mp) []] + ] + ,FunD 'decodeChar + [Clause [WildP] (NormalB $ AppE (VarE dec) (VarE rarr)) + [ValD (VarP rarr) (NormalB arr) []] + ] + ] + ] + where + rname = mkName name + rarr = mkName "arr" + rmp = mkName "mp" + createCharArray :: [(Integer,Maybe Char)] -> Integer -> Integer -> Q Exp -createCharArray lst = createArray (map (\(x,y) -> (x,case y of - Nothing -> ConE 'Nothing - Just c -> AppE (ConE 'Just) (LitE $ CharL c)) - ) lst) +createCharArray lst f t = createArray (map (\(x,y) -> + (LitE $ IntegerL x,mbCharToExp y) + ) lst) (LitE $ IntegerL f) (LitE $ IntegerL t) +createCharArray2 :: [((Integer,Integer),Maybe Char)] -> (Integer,Integer) -> (Integer,Integer) -> Q Exp +createCharArray2 lst (f1,f2) (t1,t2) + = createArray (map (\((x1,x2),y) -> + (TupE [integerExp x1,integerExp x2],mbCharToExp y) + ) lst) + (TupE [integerExp f1,integerExp f2]) + (TupE [integerExp t1,integerExp t2]) -createArray :: [(Integer,Exp)] -> Integer -> Integer -> Q Exp -createArray lst from to = return $ AppE - (AppE - (VarE 'array) - (TupE [LitE $ IntegerL from,LitE $ IntegerL to])) - (ListE [ TupE [LitE $ IntegerL x,y] - | (x,y) <- lst ]) +integerExp :: Integer -> Exp +integerExp i = LitE $ IntegerL i + +mbCharToExp :: Maybe Char -> Exp +mbCharToExp Nothing = ConE 'Nothing +mbCharToExp (Just c) = AppE (ConE 'Just) (LitE $ CharL c) + +createArray :: [(Exp,Exp)] -> Exp -> Exp -> Q Exp +createArray lst from to + = return $ AppE + (AppE + (VarE 'array) + (TupE [from,to])) + (ListE [TupE [x,y] | (x,y) <- lst]) decodingArray :: [(Integer,Maybe Char)] -> Q Exp decodingArray trans = createCharArray trans 0 255 +decodingArray2 :: [((Integer,Integer),Maybe Char)] -> Q Exp +decodingArray2 trans = createCharArray2 trans (0x21,0x21) (0x7E,0x7E) + encodingMap :: [(Integer,Char)] -> Q Exp encodingMap trans = return $ AppE (VarE 'fromList) (ListE [ TupE [LitE $ CharL to,LitE $ IntegerL from] | (from,to) <- trans]) +encodingMap2 :: [((Integer,Integer),Char)] -> Q Exp +encodingMap2 trans = return $ AppE + (VarE 'fromList) + (ListE [ TupE [LitE $ CharL to,TupE [integerExp f1,integerExp f2]] + | ((f1,f2),to) <- trans]) + readTranslation :: FilePath -> IO [(Integer,Maybe Char)] readTranslation file = do - cont <- readFile file - return $ mapMaybe (\ln -> case ln of - [] -> Nothing - ('#':xs) -> Nothing - _ -> case words ln of - (src:"#UNDEFINED":_) -> Just (read src,Nothing) - (src:trg:_) -> Just (read src,Just $ chr $ read trg) - _ -> Nothing - ) (lines cont) + cont <- readFile file + return $ mapMaybe (\ln -> case ln of + [src] -> Just (src,Nothing) + [src,trg] -> Just (src,Just $ chr $ fromIntegral trg) + _ -> Nothing) (parseTranslationTable cont) -fillTranslations :: [(Integer,Maybe Char)] -> [(Integer,Maybe Char)] -fillTranslations = fillTranslations' (-1) +readJISTranslation :: FilePath -> IO [((Integer,Integer),Maybe Char)] +readJISTranslation file = do + cont <- readFile file + return $ mapMaybe (\ln -> case ln of + [_,src] -> Just ((src `shiftR` 8,src .&. 0xFF),Nothing) + [_,src,trg] -> Just ((src `shiftR` 8,src .&. 0xFF),Just $ chr $ fromIntegral trg) + _ -> Nothing) (parseTranslationTable cont) + +parseTranslationTable :: String -> [[Integer]] +parseTranslationTable cont = filter (not.null) (map (\ln -> map read (takeWhile ((/='#').head) (words ln))) (lines cont)) + +fillTranslations :: Ix a => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)] +fillTranslations f t = merge (range (f,t)) where - fillTranslations' n ((n',c):cs) = (map (\i -> (i,Nothing)) [n+1..n'-1])++((n',c):fillTranslations' n' cs) - fillTranslations' n [] = map (\i -> (i,Nothing)) [n+1..255] + merge xs [] = map (\x -> (x,Nothing)) xs + merge [] _ = error "Data.Encoding.Helper.Template.fillTranslations: Character translations out of range" + merge (x:xs) (y:ys) = if x < fst y + then (x,Nothing):(merge xs (y:ys)) + else y:(merge xs ys) -validTranslations :: [(Integer,Maybe Char)] -> [(Integer,Char)] +validTranslations :: [(a,Maybe Char)] -> [(a,Char)] validTranslations = mapMaybe (\(n,mc) -> case mc of Nothing -> Nothing Just c -> Just (n,c)) \ No newline at end of file diff --git a/Data/Encoding/JISX0208.hs b/Data/Encoding/JISX0208.hs new file mode 100644 index 0000000..ec9c371 --- /dev/null +++ b/Data/Encoding/JISX0208.hs @@ -0,0 +1,6 @@ +{-# LANGUAGE TemplateHaskell #-} +module Data.Encoding.JISX0208 where + +import Data.Encoding.Helper.Template (makeJISInstance) + +$( makeJISInstance "JISX0208" "jis0208.txt" ) \ No newline at end of file diff --git a/Test/Tests.hs b/Test/Tests.hs index 87e2e3d..3e50283 100644 --- a/Test/Tests.hs +++ b/Test/Tests.hs @@ -17,6 +17,7 @@ import Data.Encoding.ISO885910 import Data.Encoding.ISO885911 import Data.Encoding.ISO885913 import Data.Encoding.ISO885914 +import Data.Encoding.JISX0208 import Data.Encoding.BootString import Test.HUnit import Test.QuickCheck hiding (test) @@ -227,4 +228,16 @@ punycodeTests = TestList $ map test $ \baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF" "foobarbaz"-} ] - where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp) \ No newline at end of file + where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp) + +isoTests :: Test +isoTests = TestList $ map test $ + [EncodingTest ISO88592 "\x104\x2D8\x141\xA4\x13D\x15A\xA7\xA8\x160\x15E\x164\x179\xAD\x17D\x17B\xB0\x105\x2DB\x142\xB4\x13E\x15B\x2C7\xB8\x161\x15F" + [0xA1..0xBA] + ] + +jisTests :: Test +jisTests = TestList $ map test $ + [EncodingTest JISX0208 "\x4E9C" + [0x30,0x21] + ] \ No newline at end of file diff --git a/encoding.cabal b/encoding.cabal index 7a6beb4..3add12b 100644 --- a/encoding.cabal +++ b/encoding.cabal @@ -61,4 +61,5 @@ Library Data.Encoding.BootString Data.Encoding.MacOSRoman Data.Encoding.JISX0201 + Data.Encoding.JISX0208 System.IO.Encoding \ No newline at end of file