JIS X 0208 encoding
darcs-hash:20090223182459-a4fee-98ced8f8b7bac594dc6510eeecb6bea8c51a6090
This commit is contained in:
parent
0398f66695
commit
b95bfe9be4
@ -73,6 +73,7 @@ import Data.Encoding.KOI8U
|
|||||||
import Data.Encoding.GB18030
|
import Data.Encoding.GB18030
|
||||||
import Data.Encoding.MacOSRoman
|
import Data.Encoding.MacOSRoman
|
||||||
import Data.Encoding.JISX0201
|
import Data.Encoding.JISX0201
|
||||||
|
import Data.Encoding.JISX0208
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Text.Regex
|
import Text.Regex
|
||||||
|
|
||||||
@ -312,6 +313,8 @@ encodingFromStringMaybe codeName = case (normalizeEncoding codeName) of
|
|||||||
"macintosh" -> Just $ DynEncoding MacOSRoman
|
"macintosh" -> Just $ DynEncoding MacOSRoman
|
||||||
-- JIS X 0201
|
-- JIS X 0201
|
||||||
"jis_x_0201" -> Just $ DynEncoding JISX0201
|
"jis_x_0201" -> Just $ DynEncoding JISX0201
|
||||||
|
-- JIS x 0208
|
||||||
|
"jis_x_0208" -> Just $ DynEncoding JISX0208
|
||||||
-- defaults to nothing
|
-- defaults to nothing
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
where
|
where
|
||||||
|
|||||||
@ -37,9 +37,24 @@ encodeWithMap mp c = case Map.lookup c mp of
|
|||||||
Nothing -> throwException $ HasNoRepresentation c
|
Nothing -> throwException $ HasNoRepresentation c
|
||||||
Just v -> pushWord8 v
|
Just v -> pushWord8 v
|
||||||
|
|
||||||
|
encodeWithMap2 :: ByteSink m => Map Char (Word8,Word8) -> Char -> m ()
|
||||||
|
encodeWithMap2 mp c = case Map.lookup c mp of
|
||||||
|
Nothing -> throwException $ HasNoRepresentation c
|
||||||
|
Just (w1,w2) -> do
|
||||||
|
pushWord8 w1
|
||||||
|
pushWord8 w2
|
||||||
|
|
||||||
decodeWithArray :: ByteSource m => Array Word8 (Maybe Char) -> m Char
|
decodeWithArray :: ByteSource m => Array Word8 (Maybe Char) -> m Char
|
||||||
decodeWithArray arr = do
|
decodeWithArray arr = do
|
||||||
w <- fetchWord8
|
w <- fetchWord8
|
||||||
case arr!w of
|
case arr!w of
|
||||||
Nothing -> throwException $ IllegalCharacter w
|
Nothing -> throwException $ IllegalCharacter w
|
||||||
Just c -> return c
|
Just c -> return c
|
||||||
|
|
||||||
|
decodeWithArray2 :: ByteSource m => Array (Word8,Word8) (Maybe Char) -> m Char
|
||||||
|
decodeWithArray2 arr = do
|
||||||
|
w1 <- fetchWord8
|
||||||
|
w2 <- fetchWord8
|
||||||
|
case arr!(w1,w2) of
|
||||||
|
Nothing -> throwException $ IllegalCharacter w1
|
||||||
|
Just c -> return c
|
||||||
@ -2,6 +2,7 @@
|
|||||||
module Data.Encoding.Helper.Template where
|
module Data.Encoding.Helper.Template where
|
||||||
|
|
||||||
import Data.Encoding.Base
|
import Data.Encoding.Base
|
||||||
|
import Data.Bits
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.Maybe (mapMaybe)
|
import Data.Maybe (mapMaybe)
|
||||||
import Data.Map as Map (fromList,lookup)
|
import Data.Map as Map (fromList,lookup)
|
||||||
@ -10,66 +11,112 @@ import Language.Haskell.TH
|
|||||||
|
|
||||||
makeISOInstance :: String -> FilePath -> Q [Dec]
|
makeISOInstance :: String -> FilePath -> Q [Dec]
|
||||||
makeISOInstance name file = do
|
makeISOInstance name file = do
|
||||||
let rname = mkName name
|
|
||||||
trans <- runIO (readTranslation file)
|
trans <- runIO (readTranslation file)
|
||||||
mp <- encodingMap (validTranslations trans)
|
mp <- encodingMap (validTranslations trans)
|
||||||
arr <- decodingArray (fillTranslations trans)
|
arr <- decodingArray (fillTranslations 0 255 trans)
|
||||||
return [ DataD [] rname [] [NormalC rname []] [''Show]
|
return $ encodingInstance 'encodeWithMap 'decodeWithArray name mp arr
|
||||||
, InstanceD [] (AppT (ConT ''Encoding) (ConT rname))
|
|
||||||
[FunD 'encodeChar
|
makeJISInstance :: String -> FilePath -> Q [Dec]
|
||||||
[Clause [WildP] (NormalB $ AppE (VarE 'encodeWithMap) (VarE $ mkName "mp"))
|
makeJISInstance name file = do
|
||||||
[ValD (VarP $ mkName "mp") (NormalB mp) []]
|
trans <- runIO (readJISTranslation file)
|
||||||
]
|
mp <- encodingMap2 (validTranslations trans)
|
||||||
,FunD 'decodeChar
|
arr <- decodingArray2 (fillTranslations (0x21,0x21) (0x7E,0x7E) trans)
|
||||||
[Clause [WildP] (NormalB $ AppE (VarE 'decodeWithArray) (VarE $ mkName "arr"))
|
return $ encodingInstance 'encodeWithMap2 'decodeWithArray2 name mp arr
|
||||||
[ValD (VarP $ mkName "arr") (NormalB arr) []]
|
|
||||||
]
|
encodingInstance :: Name -> Name -> String -> Exp -> Exp -> [Dec]
|
||||||
]
|
encodingInstance enc dec name mp arr
|
||||||
]
|
= [ DataD [] rname [] [NormalC rname []] [''Show]
|
||||||
|
, InstanceD [] (AppT (ConT ''Encoding) (ConT rname))
|
||||||
|
[FunD 'encodeChar
|
||||||
|
[Clause [WildP] (NormalB $ AppE (VarE enc) (VarE rmp))
|
||||||
|
[ValD (VarP rmp) (NormalB mp) []]
|
||||||
|
]
|
||||||
|
,FunD 'decodeChar
|
||||||
|
[Clause [WildP] (NormalB $ AppE (VarE dec) (VarE rarr))
|
||||||
|
[ValD (VarP rarr) (NormalB arr) []]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
where
|
||||||
|
rname = mkName name
|
||||||
|
rarr = mkName "arr"
|
||||||
|
rmp = mkName "mp"
|
||||||
|
|
||||||
createCharArray :: [(Integer,Maybe Char)] -> Integer -> Integer -> Q Exp
|
createCharArray :: [(Integer,Maybe Char)] -> Integer -> Integer -> Q Exp
|
||||||
createCharArray lst = createArray (map (\(x,y) -> (x,case y of
|
createCharArray lst f t = createArray (map (\(x,y) ->
|
||||||
Nothing -> ConE 'Nothing
|
(LitE $ IntegerL x,mbCharToExp y)
|
||||||
Just c -> AppE (ConE 'Just) (LitE $ CharL c))
|
) lst) (LitE $ IntegerL f) (LitE $ IntegerL t)
|
||||||
) lst)
|
|
||||||
|
|
||||||
|
createCharArray2 :: [((Integer,Integer),Maybe Char)] -> (Integer,Integer) -> (Integer,Integer) -> Q Exp
|
||||||
|
createCharArray2 lst (f1,f2) (t1,t2)
|
||||||
|
= createArray (map (\((x1,x2),y) ->
|
||||||
|
(TupE [integerExp x1,integerExp x2],mbCharToExp y)
|
||||||
|
) lst)
|
||||||
|
(TupE [integerExp f1,integerExp f2])
|
||||||
|
(TupE [integerExp t1,integerExp t2])
|
||||||
|
|
||||||
createArray :: [(Integer,Exp)] -> Integer -> Integer -> Q Exp
|
integerExp :: Integer -> Exp
|
||||||
createArray lst from to = return $ AppE
|
integerExp i = LitE $ IntegerL i
|
||||||
(AppE
|
|
||||||
(VarE 'array)
|
mbCharToExp :: Maybe Char -> Exp
|
||||||
(TupE [LitE $ IntegerL from,LitE $ IntegerL to]))
|
mbCharToExp Nothing = ConE 'Nothing
|
||||||
(ListE [ TupE [LitE $ IntegerL x,y]
|
mbCharToExp (Just c) = AppE (ConE 'Just) (LitE $ CharL c)
|
||||||
| (x,y) <- lst ])
|
|
||||||
|
createArray :: [(Exp,Exp)] -> Exp -> Exp -> Q Exp
|
||||||
|
createArray lst from to
|
||||||
|
= return $ AppE
|
||||||
|
(AppE
|
||||||
|
(VarE 'array)
|
||||||
|
(TupE [from,to]))
|
||||||
|
(ListE [TupE [x,y] | (x,y) <- lst])
|
||||||
|
|
||||||
decodingArray :: [(Integer,Maybe Char)] -> Q Exp
|
decodingArray :: [(Integer,Maybe Char)] -> Q Exp
|
||||||
decodingArray trans = createCharArray trans 0 255
|
decodingArray trans = createCharArray trans 0 255
|
||||||
|
|
||||||
|
decodingArray2 :: [((Integer,Integer),Maybe Char)] -> Q Exp
|
||||||
|
decodingArray2 trans = createCharArray2 trans (0x21,0x21) (0x7E,0x7E)
|
||||||
|
|
||||||
encodingMap :: [(Integer,Char)] -> Q Exp
|
encodingMap :: [(Integer,Char)] -> Q Exp
|
||||||
encodingMap trans = return $ AppE
|
encodingMap trans = return $ AppE
|
||||||
(VarE 'fromList)
|
(VarE 'fromList)
|
||||||
(ListE [ TupE [LitE $ CharL to,LitE $ IntegerL from]
|
(ListE [ TupE [LitE $ CharL to,LitE $ IntegerL from]
|
||||||
| (from,to) <- trans])
|
| (from,to) <- trans])
|
||||||
|
|
||||||
|
encodingMap2 :: [((Integer,Integer),Char)] -> Q Exp
|
||||||
|
encodingMap2 trans = return $ AppE
|
||||||
|
(VarE 'fromList)
|
||||||
|
(ListE [ TupE [LitE $ CharL to,TupE [integerExp f1,integerExp f2]]
|
||||||
|
| ((f1,f2),to) <- trans])
|
||||||
|
|
||||||
readTranslation :: FilePath -> IO [(Integer,Maybe Char)]
|
readTranslation :: FilePath -> IO [(Integer,Maybe Char)]
|
||||||
readTranslation file = do
|
readTranslation file = do
|
||||||
cont <- readFile file
|
cont <- readFile file
|
||||||
return $ mapMaybe (\ln -> case ln of
|
return $ mapMaybe (\ln -> case ln of
|
||||||
[] -> Nothing
|
[src] -> Just (src,Nothing)
|
||||||
('#':xs) -> Nothing
|
[src,trg] -> Just (src,Just $ chr $ fromIntegral trg)
|
||||||
_ -> case words ln of
|
_ -> Nothing) (parseTranslationTable cont)
|
||||||
(src:"#UNDEFINED":_) -> Just (read src,Nothing)
|
|
||||||
(src:trg:_) -> Just (read src,Just $ chr $ read trg)
|
|
||||||
_ -> Nothing
|
|
||||||
) (lines cont)
|
|
||||||
|
|
||||||
fillTranslations :: [(Integer,Maybe Char)] -> [(Integer,Maybe Char)]
|
readJISTranslation :: FilePath -> IO [((Integer,Integer),Maybe Char)]
|
||||||
fillTranslations = fillTranslations' (-1)
|
readJISTranslation file = do
|
||||||
|
cont <- readFile file
|
||||||
|
return $ mapMaybe (\ln -> case ln of
|
||||||
|
[_,src] -> Just ((src `shiftR` 8,src .&. 0xFF),Nothing)
|
||||||
|
[_,src,trg] -> Just ((src `shiftR` 8,src .&. 0xFF),Just $ chr $ fromIntegral trg)
|
||||||
|
_ -> Nothing) (parseTranslationTable cont)
|
||||||
|
|
||||||
|
parseTranslationTable :: String -> [[Integer]]
|
||||||
|
parseTranslationTable cont = filter (not.null) (map (\ln -> map read (takeWhile ((/='#').head) (words ln))) (lines cont))
|
||||||
|
|
||||||
|
fillTranslations :: Ix a => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)]
|
||||||
|
fillTranslations f t = merge (range (f,t))
|
||||||
where
|
where
|
||||||
fillTranslations' n ((n',c):cs) = (map (\i -> (i,Nothing)) [n+1..n'-1])++((n',c):fillTranslations' n' cs)
|
merge xs [] = map (\x -> (x,Nothing)) xs
|
||||||
fillTranslations' n [] = map (\i -> (i,Nothing)) [n+1..255]
|
merge [] _ = error "Data.Encoding.Helper.Template.fillTranslations: Character translations out of range"
|
||||||
|
merge (x:xs) (y:ys) = if x < fst y
|
||||||
|
then (x,Nothing):(merge xs (y:ys))
|
||||||
|
else y:(merge xs ys)
|
||||||
|
|
||||||
validTranslations :: [(Integer,Maybe Char)] -> [(Integer,Char)]
|
validTranslations :: [(a,Maybe Char)] -> [(a,Char)]
|
||||||
validTranslations = mapMaybe (\(n,mc) -> case mc of
|
validTranslations = mapMaybe (\(n,mc) -> case mc of
|
||||||
Nothing -> Nothing
|
Nothing -> Nothing
|
||||||
Just c -> Just (n,c))
|
Just c -> Just (n,c))
|
||||||
6
Data/Encoding/JISX0208.hs
Normal file
6
Data/Encoding/JISX0208.hs
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
{-# LANGUAGE TemplateHaskell #-}
|
||||||
|
module Data.Encoding.JISX0208 where
|
||||||
|
|
||||||
|
import Data.Encoding.Helper.Template (makeJISInstance)
|
||||||
|
|
||||||
|
$( makeJISInstance "JISX0208" "jis0208.txt" )
|
||||||
@ -17,6 +17,7 @@ import Data.Encoding.ISO885910
|
|||||||
import Data.Encoding.ISO885911
|
import Data.Encoding.ISO885911
|
||||||
import Data.Encoding.ISO885913
|
import Data.Encoding.ISO885913
|
||||||
import Data.Encoding.ISO885914
|
import Data.Encoding.ISO885914
|
||||||
|
import Data.Encoding.JISX0208
|
||||||
import Data.Encoding.BootString
|
import Data.Encoding.BootString
|
||||||
import Test.HUnit
|
import Test.HUnit
|
||||||
import Test.QuickCheck hiding (test)
|
import Test.QuickCheck hiding (test)
|
||||||
@ -228,3 +229,15 @@ punycodeTests = TestList $ map test $
|
|||||||
"foobarbaz"-}
|
"foobarbaz"-}
|
||||||
]
|
]
|
||||||
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
|
where punyTest str outp = EncodingTest punycode str (map (fromIntegral.ord) outp)
|
||||||
|
|
||||||
|
isoTests :: Test
|
||||||
|
isoTests = TestList $ map test $
|
||||||
|
[EncodingTest ISO88592 "\x104\x2D8\x141\xA4\x13D\x15A\xA7\xA8\x160\x15E\x164\x179\xAD\x17D\x17B\xB0\x105\x2DB\x142\xB4\x13E\x15B\x2C7\xB8\x161\x15F"
|
||||||
|
[0xA1..0xBA]
|
||||||
|
]
|
||||||
|
|
||||||
|
jisTests :: Test
|
||||||
|
jisTests = TestList $ map test $
|
||||||
|
[EncodingTest JISX0208 "\x4E9C"
|
||||||
|
[0x30,0x21]
|
||||||
|
]
|
||||||
@ -61,4 +61,5 @@ Library
|
|||||||
Data.Encoding.BootString
|
Data.Encoding.BootString
|
||||||
Data.Encoding.MacOSRoman
|
Data.Encoding.MacOSRoman
|
||||||
Data.Encoding.JISX0201
|
Data.Encoding.JISX0201
|
||||||
|
Data.Encoding.JISX0208
|
||||||
System.IO.Encoding
|
System.IO.Encoding
|
||||||
Loading…
Reference in New Issue
Block a user