JIS X 0208 encoding

darcs-hash:20090223182459-a4fee-98ced8f8b7bac594dc6510eeecb6bea8c51a6090
This commit is contained in:
Henning Guenther 2009-02-23 10:24:59 -08:00
parent 0398f66695
commit b95bfe9be4
6 changed files with 126 additions and 41 deletions

View File

@ -73,6 +73,7 @@ import Data.Encoding.KOI8U
import Data.Encoding.GB18030 import Data.Encoding.GB18030
import Data.Encoding.MacOSRoman import Data.Encoding.MacOSRoman
import Data.Encoding.JISX0201 import Data.Encoding.JISX0201
import Data.Encoding.JISX0208
import Data.Char import Data.Char
import Text.Regex import Text.Regex
@ -312,6 +313,8 @@ encodingFromStringMaybe codeName = case (normalizeEncoding codeName) of
"macintosh" -> Just $ DynEncoding MacOSRoman "macintosh" -> Just $ DynEncoding MacOSRoman
-- JIS X 0201 -- JIS X 0201
"jis_x_0201" -> Just $ DynEncoding JISX0201 "jis_x_0201" -> Just $ DynEncoding JISX0201
-- JIS x 0208
"jis_x_0208" -> Just $ DynEncoding JISX0208
-- defaults to nothing -- defaults to nothing
_ -> Nothing _ -> Nothing
where where

View File

@ -37,9 +37,24 @@ encodeWithMap mp c = case Map.lookup c mp of
Nothing -> throwException $ HasNoRepresentation c Nothing -> throwException $ HasNoRepresentation c
Just v -> pushWord8 v Just v -> pushWord8 v
encodeWithMap2 :: ByteSink m => Map Char (Word8,Word8) -> Char -> m ()
encodeWithMap2 mp c = case Map.lookup c mp of
Nothing -> throwException $ HasNoRepresentation c
Just (w1,w2) -> do
pushWord8 w1
pushWord8 w2
decodeWithArray :: ByteSource m => Array Word8 (Maybe Char) -> m Char decodeWithArray :: ByteSource m => Array Word8 (Maybe Char) -> m Char
decodeWithArray arr = do decodeWithArray arr = do
w <- fetchWord8 w <- fetchWord8
case arr!w of case arr!w of
Nothing -> throwException $ IllegalCharacter w Nothing -> throwException $ IllegalCharacter w
Just c -> return c
decodeWithArray2 :: ByteSource m => Array (Word8,Word8) (Maybe Char) -> m Char
decodeWithArray2 arr = do
w1 <- fetchWord8
w2 <- fetchWord8
case arr!(w1,w2) of
Nothing -> throwException $ IllegalCharacter w1
Just c -> return c Just c -> return c

View File

@ -2,6 +2,7 @@
module Data.Encoding.Helper.Template where module Data.Encoding.Helper.Template where
import Data.Encoding.Base import Data.Encoding.Base
import Data.Bits
import Data.Char import Data.Char
import Data.Maybe (mapMaybe) import Data.Maybe (mapMaybe)
import Data.Map as Map (fromList,lookup) import Data.Map as Map (fromList,lookup)
@ -10,66 +11,112 @@ import Language.Haskell.TH
makeISOInstance :: String -> FilePath -> Q [Dec] makeISOInstance :: String -> FilePath -> Q [Dec]
makeISOInstance name file = do makeISOInstance name file = do
let rname = mkName name
trans <- runIO (readTranslation file) trans <- runIO (readTranslation file)
mp <- encodingMap (validTranslations trans) mp <- encodingMap (validTranslations trans)
arr <- decodingArray (fillTranslations trans) arr <- decodingArray (fillTranslations 0 255 trans)
return [ DataD [] rname [] [NormalC rname []] [''Show] return $ encodingInstance 'encodeWithMap 'decodeWithArray name mp arr
, InstanceD [] (AppT (ConT ''Encoding) (ConT rname))
[FunD 'encodeChar makeJISInstance :: String -> FilePath -> Q [Dec]
[Clause [WildP] (NormalB $ AppE (VarE 'encodeWithMap) (VarE $ mkName "mp")) makeJISInstance name file = do
[ValD (VarP $ mkName "mp") (NormalB mp) []] trans <- runIO (readJISTranslation file)
] mp <- encodingMap2 (validTranslations trans)
,FunD 'decodeChar arr <- decodingArray2 (fillTranslations (0x21,0x21) (0x7E,0x7E) trans)
[Clause [WildP] (NormalB $ AppE (VarE 'decodeWithArray) (VarE $ mkName "arr")) return $ encodingInstance 'encodeWithMap2 'decodeWithArray2 name mp arr
[ValD (VarP $ mkName "arr") (NormalB arr) []]
] encodingInstance :: Name -> Name -> String -> Exp -> Exp -> [Dec]
] encodingInstance enc dec name mp arr
] = [ DataD [] rname [] [NormalC rname []] [''Show]
, InstanceD [] (AppT (ConT ''Encoding) (ConT rname))
[FunD 'encodeChar
[Clause [WildP] (NormalB $ AppE (VarE enc) (VarE rmp))
[ValD (VarP rmp) (NormalB mp) []]
]
,FunD 'decodeChar
[Clause [WildP] (NormalB $ AppE (VarE dec) (VarE rarr))
[ValD (VarP rarr) (NormalB arr) []]
]
]
]
where
rname = mkName name
rarr = mkName "arr"
rmp = mkName "mp"
createCharArray :: [(Integer,Maybe Char)] -> Integer -> Integer -> Q Exp createCharArray :: [(Integer,Maybe Char)] -> Integer -> Integer -> Q Exp
createCharArray lst = createArray (map (\(x,y) -> (x,case y of createCharArray lst f t = createArray (map (\(x,y) ->
Nothing -> ConE 'Nothing (LitE $ IntegerL x,mbCharToExp y)
Just c -> AppE (ConE 'Just) (LitE $ CharL c)) ) lst) (LitE $ IntegerL f) (LitE $ IntegerL t)
) lst)
createCharArray2 :: [((Integer,Integer),Maybe Char)] -> (Integer,Integer) -> (Integer,Integer) -> Q Exp
createCharArray2 lst (f1,f2) (t1,t2)
= createArray (map (\((x1,x2),y) ->
(TupE [integerExp x1,integerExp x2],mbCharToExp y)
) lst)
(TupE [integerExp f1,integerExp f2])
(TupE [integerExp t1,integerExp t2])
createArray :: [(Integer,Exp)] -> Integer -> Integer -> Q Exp integerExp :: Integer -> Exp
createArray lst from to = return $ AppE integerExp i = LitE $ IntegerL i
(AppE
(VarE 'array) mbCharToExp :: Maybe Char -> Exp
(TupE [LitE $ IntegerL from,LitE $ IntegerL to])) mbCharToExp Nothing = ConE 'Nothing
(ListE [ TupE [LitE $ IntegerL x,y] mbCharToExp (Just c) = AppE (ConE 'Just) (LitE $ CharL c)
| (x,y) <- lst ])
createArray :: [(Exp,Exp)] -> Exp -> Exp -> Q Exp
createArray lst from to
= return $ AppE
(AppE
(VarE 'array)
(TupE [from,to]))
(ListE [TupE [x,y] | (x,y) <- lst])
decodingArray :: [(Integer,Maybe Char)] -> Q Exp decodingArray :: [(Integer,Maybe Char)] -> Q Exp
decodingArray trans = createCharArray trans 0 255 decodingArray trans = createCharArray trans 0 255
decodingArray2 :: [((Integer,Integer),Maybe Char)] -> Q Exp
decodingArray2 trans = createCharArray2 trans (0x21,0x21) (0x7E,0x7E)
encodingMap :: [(Integer,Char)] -> Q Exp encodingMap :: [(Integer,Char)] -> Q Exp
encodingMap trans = return $ AppE encodingMap trans = return $ AppE
(VarE 'fromList) (VarE 'fromList)
(ListE [ TupE [LitE $ CharL to,LitE $ IntegerL from] (ListE [ TupE [LitE $ CharL to,LitE $ IntegerL from]
| (from,to) <- trans]) | (from,to) <- trans])
encodingMap2 :: [((Integer,Integer),Char)] -> Q Exp
encodingMap2 trans = return $ AppE
(VarE 'fromList)
(ListE [ TupE [LitE $ CharL to,TupE [integerExp f1,integerExp f2]]
| ((f1,f2),to) <- trans])
readTranslation :: FilePath -> IO [(Integer,Maybe Char)] readTranslation :: FilePath -> IO [(Integer,Maybe Char)]
readTranslation file = do readTranslation file = do
cont <- readFile file cont <- readFile file
return $ mapMaybe (\ln -> case ln of return $ mapMaybe (\ln -> case ln of
[] -> Nothing [src] -> Just (src,Nothing)
('#':xs) -> Nothing [src,trg] -> Just (src,Just $ chr $ fromIntegral trg)
_ -> case words ln of _ -> Nothing) (parseTranslationTable cont)
(src:"#UNDEFINED":_) -> Just (read src,Nothing)
(src:trg:_) -> Just (read src,Just $ chr $ read trg)
_ -> Nothing
) (lines cont)
fillTranslations :: [(Integer,Maybe Char)] -> [(Integer,Maybe Char)] readJISTranslation :: FilePath -> IO [((Integer,Integer),Maybe Char)]
fillTranslations = fillTranslations' (-1) readJISTranslation file = do
cont <- readFile file
return $ mapMaybe (\ln -> case ln of
[_,src] -> Just ((src `shiftR` 8,src .&. 0xFF),Nothing)
[_,src,trg] -> Just ((src `shiftR` 8,src .&. 0xFF),Just $ chr $ fromIntegral trg)
_ -> Nothing) (parseTranslationTable cont)
parseTranslationTable :: String -> [[Integer]]
parseTranslationTable cont = filter (not.null) (map (\ln -> map read (takeWhile ((/='#').head) (words ln))) (lines cont))
fillTranslations :: Ix a => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)]
fillTranslations f t = merge (range (f,t))
where where
fillTranslations' n ((n',c):cs) = (map (\i -> (i,Nothing)) [n+1..n'-1])++((n',c):fillTranslations' n' cs) merge xs [] = map (\x -> (x,Nothing)) xs
fillTranslations' n [] = map (\i -> (i,Nothing)) [n+1..255] merge [] _ = error "Data.Encoding.Helper.Template.fillTranslations: Character translations out of range"
merge (x:xs) (y:ys) = if x < fst y
then (x,Nothing):(merge xs (y:ys))
else y:(merge xs ys)
validTranslations :: [(Integer,Maybe Char)] -> [(Integer,Char)] validTranslations :: [(a,Maybe Char)] -> [(a,Char)]
validTranslations = mapMaybe (\(n,mc) -> case mc of validTranslations = mapMaybe (\(n,mc) -> case mc of
Nothing -> Nothing Nothing -> Nothing
Just c -> Just (n,c)) Just c -> Just (n,c))

View File

@ -0,0 +1,6 @@
{-# LANGUAGE TemplateHaskell #-}
module Data.Encoding.JISX0208 where
import Data.Encoding.Helper.Template (makeJISInstance)
$( makeJISInstance "JISX0208" "jis0208.txt" )

View File

@ -17,6 +17,7 @@ import Data.Encoding.ISO885910
import Data.Encoding.ISO885911 import Data.Encoding.ISO885911
import Data.Encoding.ISO885913 import Data.Encoding.ISO885913
import Data.Encoding.ISO885914 import Data.Encoding.ISO885914
import Data.Encoding.JISX0208
import Data.Encoding.BootString import Data.Encoding.BootString
import Test.HUnit import Test.HUnit
import Test.QuickCheck hiding (test) import Test.QuickCheck hiding (test)
@ -227,4 +228,16 @@ punycodeTests = TestList $ map test $
\baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF" \baz\xEF\xB8\x80\xEF\xB8\x88\xEF\xB8\x8F\xEF\xBB\xBF"
"foobarbaz"-} "foobarbaz"-}
] ]
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp) where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
isoTests :: Test
isoTests = TestList $ map test $
[EncodingTest ISO88592 "\x104\x2D8\x141\xA4\x13D\x15A\xA7\xA8\x160\x15E\x164\x179\xAD\x17D\x17B\xB0\x105\x2DB\x142\xB4\x13E\x15B\x2C7\xB8\x161\x15F"
[0xA1..0xBA]
]
jisTests :: Test
jisTests = TestList $ map test $
[EncodingTest JISX0208 "\x4E9C"
[0x30,0x21]
]

View File

@ -61,4 +61,5 @@ Library
Data.Encoding.BootString Data.Encoding.BootString
Data.Encoding.MacOSRoman Data.Encoding.MacOSRoman
Data.Encoding.JISX0201 Data.Encoding.JISX0201
Data.Encoding.JISX0208
System.IO.Encoding System.IO.Encoding