Implemented UTF32BE and LE
darcs-hash:20080115233419-a4fee-391b20ca239a6e59a6146f00ae084d0ff94ee908
This commit is contained in:
parent
e849ef1404
commit
5a0a697cb8
@ -12,18 +12,45 @@ import Data.Encoding.Base
|
||||
import Data.Word
|
||||
import Control.Exception (throwDyn)
|
||||
import Data.Typeable
|
||||
import qualified Data.ByteString as BS
|
||||
import qualified Data.ByteString.Lazy as LBS
|
||||
|
||||
data UTF32 = UTF32 deriving (Eq,Show,Typeable)
|
||||
data UTF32
|
||||
= UTF32 -- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present.
|
||||
| UTF32BE -- ^ Encodes and decodes using the big endian encoding.
|
||||
| UTF32LE -- ^ Encodes and decodes using the little endian encoding.
|
||||
deriving (Eq,Show,Typeable)
|
||||
|
||||
bom :: Char
|
||||
bom = '\xFEFF'
|
||||
|
||||
instance Encoding UTF32 where
|
||||
encode _ = encodeMultibyte encodeUTF32
|
||||
encodeLazy _ = encodeMultibyteLazy encodeUTF32
|
||||
encode UTF32 str = encodeMultibyte encodeUTF32be (bom:str)
|
||||
encode UTF32LE str = encodeMultibyte encodeUTF32le str
|
||||
encode UTF32BE str = encodeMultibyte encodeUTF32be str
|
||||
encodeLazy UTF32 str = encodeMultibyteLazy encodeUTF32be (bom:str)
|
||||
encodeLazy UTF32LE str = encodeMultibyteLazy encodeUTF32le str
|
||||
encodeLazy UTF32BE str = encodeMultibyteLazy encodeUTF32be str
|
||||
encodable _ c = ord c < 0x0010FFFF
|
||||
decode _ = decodeMultibyte decodeUTF32
|
||||
decodeLazy _ = decodeMultibyteLazy decodeUTF32
|
||||
decode UTF32 str = let
|
||||
(start,rest) = BS.splitAt 4 str
|
||||
in case BS.unpack start of
|
||||
[0x00,0x00,0xFE,0xFF] -> decode UTF32BE rest
|
||||
[0xFE,0xFF,0x00,0x00] -> decode UTF32LE rest
|
||||
_ -> decode UTF32BE str
|
||||
decode UTF32LE str = decodeMultibyte decodeUTF32le str
|
||||
decode UTF32BE str = decodeMultibyte decodeUTF32be str
|
||||
decodeLazy UTF32 str = let
|
||||
(start,rest) = LBS.splitAt 4 str
|
||||
in case LBS.unpack start of
|
||||
[0x00,0x00,0xFE,0xFF] -> decodeLazy UTF32BE rest
|
||||
[0xFE,0xFF,0x00,0x00] -> decodeLazy UTF32LE rest
|
||||
_ -> decodeLazy UTF32BE str
|
||||
decodeLazy UTF32LE str = decodeMultibyteLazy decodeUTF32le str
|
||||
decodeLazy UTF32BE str = decodeMultibyteLazy decodeUTF32be str
|
||||
|
||||
encodeUTF32 :: Char -> (Word8,EncodeState)
|
||||
encodeUTF32 ch = let
|
||||
encodeUTF32be :: Char -> (Word8,EncodeState)
|
||||
encodeUTF32be ch = let
|
||||
w = ord ch
|
||||
w1 = fromIntegral $ w `shiftR` 24
|
||||
w2 = fromIntegral $ w `shiftR` 16
|
||||
@ -31,11 +58,33 @@ encodeUTF32 ch = let
|
||||
w4 = fromIntegral $ w
|
||||
in (w1,Put3 w2 w3 w4)
|
||||
|
||||
decodeUTF32 :: [Word8] -> (Char,[Word8])
|
||||
decodeUTF32 (w1:w2:w3:w4:rest) = (chr $
|
||||
(fromIntegral w1 `shiftL` 24) .|.
|
||||
(fromIntegral w2 `shiftL` 16) .|.
|
||||
(fromIntegral w3 `shiftL` 8) .|.
|
||||
(fromIntegral w4),rest)
|
||||
decodeUTF32 _ = throwDyn UnexpectedEnd
|
||||
encodeUTF32le :: Char -> (Word8,EncodeState)
|
||||
encodeUTF32le ch = let
|
||||
w = ord ch
|
||||
w1 = fromIntegral $ w `shiftR` 24
|
||||
w2 = fromIntegral $ w `shiftR` 16
|
||||
w3 = fromIntegral $ w `shiftR` 8
|
||||
w4 = fromIntegral $ w
|
||||
in (w4,Put3 w3 w2 w1)
|
||||
|
||||
decodeUTF32be :: [Word8] -> (Char,[Word8])
|
||||
decodeUTF32be (w1:w2:w3:w4:rest) = let
|
||||
v = (fromIntegral w1 `shiftL` 24) .|.
|
||||
(fromIntegral w2 `shiftL` 16) .|.
|
||||
(fromIntegral w3 `shiftL` 8) .|.
|
||||
(fromIntegral w4)
|
||||
in if v < 0x0010FFFF
|
||||
then (chr v,rest)
|
||||
else throwDyn (IllegalRepresentation [w1,w2,w3,w4])
|
||||
decodeUTF32be _ = throwDyn UnexpectedEnd
|
||||
|
||||
decodeUTF32le :: [Word8] -> (Char,[Word8])
|
||||
decodeUTF32le (w1:w2:w3:w4:rest) = let
|
||||
v = (fromIntegral w4 `shiftL` 24) .|.
|
||||
(fromIntegral w3 `shiftL` 16) .|.
|
||||
(fromIntegral w2 `shiftL` 8) .|.
|
||||
(fromIntegral w1)
|
||||
in if v < 0x0010FFFF
|
||||
then (chr v,rest)
|
||||
else throwDyn (IllegalRepresentation [w1,w2,w3,w4])
|
||||
decodeUTF32le _ = throwDyn UnexpectedEnd
|
||||
|
||||
Loading…
Reference in New Issue
Block a user