Added KOI8U encoding
darcs-hash:20080120213322-a4fee-752cc3f3ec4e7a01e5bbf93bad08a88ca3cb3a5b
This commit is contained in:
parent
c201774307
commit
6c5011aaa3
@ -44,6 +44,7 @@ import Data.Encoding.CP1256
|
||||
import Data.Encoding.CP1257
|
||||
import Data.Encoding.CP1258
|
||||
import Data.Encoding.KOI8R
|
||||
import Data.Encoding.KOI8U
|
||||
import Data.Encoding.GB18030
|
||||
|
||||
-- | An untyped encoding. Used in 'System.IO.Encoding.getSystemEncoding'.
|
||||
@ -80,6 +81,7 @@ encodingFromStringMaybe "UTF-8" = Just $ DynEncoding UTF8
|
||||
encodingFromStringMaybe "UTF-16" = Just $ DynEncoding UTF16
|
||||
encodingFromStringMaybe "UTF-32" = Just $ DynEncoding UTF32
|
||||
encodingFromStringMaybe "KOI8-R" = Just $ DynEncoding KOI8R
|
||||
encodingFromStringMaybe "KOI8-U" = Just $ DynEncoding KOI8U
|
||||
encodingFromStringMaybe "ISO-8859-1" = Just $ DynEncoding ISO88591
|
||||
encodingFromStringMaybe "ISO-8859-2" = Just $ DynEncoding ISO88592
|
||||
encodingFromStringMaybe "ISO-8859-3" = Just $ DynEncoding ISO88593
|
||||
|
||||
@ -41,7 +41,8 @@ koi8rList =
|
||||
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
|
||||
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
|
||||
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
|
||||
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a']
|
||||
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
|
||||
]
|
||||
|
||||
koi8rDecode :: Word8 -> Char
|
||||
koi8rDecode ch
|
||||
|
||||
64
Data/Encoding/KOI8U.hs
Normal file
64
Data/Encoding/KOI8U.hs
Normal file
@ -0,0 +1,64 @@
|
||||
{-# LANGUAGE DeriveDataTypeable #-}
|
||||
{- | This module implements KOI8-U encoding which covers the ukrainian alphabet.
|
||||
See <http://en.wikipedia.org/wiki/KOI8-U> for more information.
|
||||
-}
|
||||
module Data.Encoding.KOI8U
|
||||
(KOI8U(..)) where
|
||||
|
||||
import Control.Exception (throwDyn)
|
||||
import Data.Word
|
||||
import Data.Array.Unboxed
|
||||
import Data.Encoding.Base
|
||||
import Data.Char (chr,ord)
|
||||
import Data.Map (Map,fromList,lookup,member)
|
||||
import qualified Data.ByteString.Lazy as Lazy
|
||||
import Prelude hiding (lookup)
|
||||
import Data.Typeable
|
||||
|
||||
data KOI8U = KOI8U deriving (Eq,Show,Typeable)
|
||||
|
||||
koi8uArr :: UArray Word8 Char
|
||||
koi8uArr = listArray (128,255) koi8uList
|
||||
|
||||
koi8uMap :: Map Char Word8
|
||||
koi8uMap = fromList (zip koi8uList [0..])
|
||||
|
||||
koi8uList :: [Char]
|
||||
koi8uList =
|
||||
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
|
||||
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
|
||||
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
|
||||
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
|
||||
,'\x2550','\x2551','\x2552','\x0451','\x0454','\x2554','\x0456','\x0457'
|
||||
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x0491','\x255d','\x255e'
|
||||
,'\x255f','\x2560','\x2561','\x0401','\x0403','\x2563','\x0406','\x0407'
|
||||
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x0490','\x256c','\x00a9'
|
||||
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
|
||||
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
|
||||
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
|
||||
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
|
||||
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
|
||||
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
|
||||
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
|
||||
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
|
||||
]
|
||||
|
||||
koi8uDecode :: Word8 -> Char
|
||||
koi8uDecode ch
|
||||
| ch < 128 = chr $ fromIntegral ch
|
||||
| otherwise = koi8uArr!ch
|
||||
|
||||
koi8uEncode :: Char -> Word8
|
||||
koi8uEncode ch
|
||||
| ch < '\128' = fromIntegral $ ord ch
|
||||
| otherwise = case lookup ch koi8uMap of
|
||||
Just w -> w
|
||||
Nothing -> throwDyn (HasNoRepresentation ch)
|
||||
|
||||
instance Encoding KOI8U where
|
||||
encode _ = encodeSinglebyte koi8uEncode
|
||||
encodeLazy _ = encodeSinglebyteLazy koi8uEncode
|
||||
encodable _ c = (c < '\128') || (member c koi8uMap)
|
||||
decode _ = decodeSinglebyte koi8uDecode
|
||||
decodeLazy _ str = concatMap (decodeSinglebyte koi8uDecode) (Lazy.toChunks str)
|
||||
decodable _ = const True
|
||||
@ -83,6 +83,7 @@ Library
|
||||
Data.Encoding.CP1257
|
||||
Data.Encoding.CP1258
|
||||
Data.Encoding.KOI8R
|
||||
Data.Encoding.KOI8U
|
||||
Data.Encoding.GB18030
|
||||
System.IO.Encoding
|
||||
Other-Modules:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user