encoding/Data/Encoding/KOI8R.hs
Henning Guenther 6c5011aaa3 Added KOI8U encoding
darcs-hash:20080120213322-a4fee-752cc3f3ec4e7a01e5bbf93bad08a88ca3cb3a5b
2008-01-20 13:33:22 -08:00

66 lines
2.5 KiB
Haskell

{-# LANGUAGE DeriveDataTypeable #-}
{- | This module implements KOI8-R encoding which covers the russian and bulgarian alphabet.
See <http://en.wikipedia.org/wiki/KOI8-R> for more information.
-}
module Data.Encoding.KOI8R
(KOI8R(..)) where
import Control.Exception (throwDyn)
import Data.Array.Unboxed
import Data.Char (ord,chr)
import qualified Data.ByteString.Lazy as Lazy
import Data.Map hiding (map,(!))
import Data.Word
import Prelude hiding (lookup)
import Data.Typeable
import Data.Encoding.Base
data KOI8R = KOI8R deriving (Eq,Show,Typeable)
koi8rArr :: UArray Word8 Char
koi8rArr = listArray (128,255) koi8rList
koi8rMap :: Map Char Word8
koi8rMap = fromList (zip koi8rList [0..])
koi8rList :: [Char]
koi8rList =
['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556'
,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e'
,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565'
,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9'
,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
]
koi8rDecode :: Word8 -> Char
koi8rDecode ch
| ch < 128 = chr $ fromIntegral ch
| otherwise = koi8rArr!ch
koi8rEncode :: Char -> Word8
koi8rEncode ch
| ch < '\128' = fromIntegral $ ord ch
| otherwise = case lookup ch koi8rMap of
Just w -> w
Nothing -> throwDyn (HasNoRepresentation ch)
instance Encoding KOI8R where
encode _ = encodeSinglebyte koi8rEncode
encodeLazy _ = encodeSinglebyteLazy koi8rEncode
encodable _ c = (c < '\128') || (member c koi8rMap)
decode _ = decodeSinglebyte koi8rDecode
decodeLazy _ str = concatMap (decodeSinglebyte koi8rDecode) (Lazy.toChunks str)
decodable _ = const True