Switch from Template-based code generating to text-based
Ignore-this: f58ceb5e1068be132b0a67a851b096f4 This has two advantages: 1. TemplateHaskell is painfully slow. There, I said it. 2. TemplateHaskell doesn't yet support some extensions that can be usefull for this library. Specifically the MagicHash extension. darcs-hash:20090813023321-a4fee-0da13d0da6454f6ba3bd111ed6b80268d9e1b45c
This commit is contained in:
parent
1ad8755a80
commit
6101ee16ae
15
Data/Array/Static.hs
Normal file
15
Data/Array/Static.hs
Normal file
@ -0,0 +1,15 @@
|
||||
{-# LANGUAGE MagicHash #-}
|
||||
module Data.Array.Static where
|
||||
|
||||
import Data.Static
|
||||
import GHC.Exts
|
||||
import Data.Ix
|
||||
|
||||
data StaticArray i e = StaticArray i i Addr#
|
||||
|
||||
bounds :: Ix i => StaticArray i e -> (i,i)
|
||||
bounds (StaticArray s e _) = (s,e)
|
||||
|
||||
(!) :: (StaticElement e,Ix i) => StaticArray i e -> i -> e
|
||||
(!) (StaticArray s e addr) i = let (I# ri) = index (s,e) i
|
||||
in extract addr ri
|
||||
12
Data/Array/Static/Builder.hs
Normal file
12
Data/Array/Static/Builder.hs
Normal file
@ -0,0 +1,12 @@
|
||||
{-# LANGUAGE MagicHash #-}
|
||||
module Data.Array.Static.Builder where
|
||||
|
||||
import Data.Static
|
||||
|
||||
buildStaticArray :: (StaticElement e,Show i) => (i,i) -> [e] -> String
|
||||
buildStaticArray (s,e) els = "StaticArray ("++show s++") ("++show e++") \""
|
||||
++concat (map (\w -> '\\':show w) (concat (map gen els)))
|
||||
++"\"#"
|
||||
|
||||
buildStaticArray' :: (StaticElement e) => [e] -> String
|
||||
buildStaticArray' els = buildStaticArray (0,length els-1) els
|
||||
73
Data/CharMap.hs
Normal file
73
Data/CharMap.hs
Normal file
@ -0,0 +1,73 @@
|
||||
module Data.CharMap where
|
||||
|
||||
import Data.Map.Static
|
||||
import Data.Encoding.ByteSink
|
||||
import Data.Encoding.Exception
|
||||
|
||||
import Control.Throws
|
||||
import Data.Word
|
||||
import Data.Char
|
||||
import Prelude hiding (lookup)
|
||||
|
||||
data CharMap
|
||||
= Node !Char !CharMap !CharMap
|
||||
| DeadEnd
|
||||
| LeafRange1 !Int !Word8
|
||||
| LeafRange2 !Int !Word8 !Word8 !Word8
|
||||
| LeafRange3 !Int !Word8 !Word8 !Word8 !Word8 !Word8
|
||||
| LeafRange4 !Int !Word8 !Word8 !Word8 !Word8 !Word8 !Word8 !Word8
|
||||
| LeafMap1 (StaticMap Char Word8)
|
||||
| LeafMap2 (StaticMap Char Word16)
|
||||
| LeafMap4 (StaticMap Char Word32)
|
||||
|
||||
mapEncode :: ByteSink m => Char -> CharMap -> m ()
|
||||
mapEncode ch (Node rch l r)
|
||||
| ch < rch = mapEncode ch l
|
||||
| otherwise = mapEncode ch r
|
||||
mapEncode ch DeadEnd = throwException (HasNoRepresentation ch)
|
||||
mapEncode ch (LeafRange1 bch st)
|
||||
= pushWord8 $ st + (fromIntegral ((ord ch) - bch))
|
||||
mapEncode ch (LeafRange2 bch min1 min2 r2)
|
||||
= let v = (ord ch) - bch
|
||||
(w1,w2) = v `divMod` (fromIntegral r2)
|
||||
in do
|
||||
pushWord8 (fromIntegral w1 + min1)
|
||||
pushWord8 (fromIntegral w2 + min2)
|
||||
mapEncode ch (LeafRange3 bch min1 min2 r2 min3 r3)
|
||||
= let v = (ord ch) - bch
|
||||
(v1,w3) = v `divMod` (fromIntegral r3)
|
||||
(w1,w2) = v1 `divMod` (fromIntegral r2)
|
||||
in do
|
||||
pushWord8 (fromIntegral w1 + min1)
|
||||
pushWord8 (fromIntegral w2 + min2)
|
||||
pushWord8 (fromIntegral w3 + min3)
|
||||
mapEncode ch (LeafRange4 bch min1 min2 r2 min3 r3 min4 r4)
|
||||
= let v = (ord ch) - bch
|
||||
(v1,w4) = v `divMod` (fromIntegral r4)
|
||||
(v2,w3) = v1 `divMod` (fromIntegral r3)
|
||||
(w1,w2) = v2 `divMod` (fromIntegral r2)
|
||||
in do
|
||||
pushWord8 (fromIntegral w1 + min1)
|
||||
pushWord8 (fromIntegral w2 + min2)
|
||||
pushWord8 (fromIntegral w3 + min3)
|
||||
pushWord8 (fromIntegral w4 + min4)
|
||||
mapEncode ch (LeafMap1 mp) = case lookup ch mp of
|
||||
Nothing -> throwException (HasNoRepresentation ch)
|
||||
Just v -> pushWord8 v
|
||||
mapEncode ch (LeafMap2 mp) = case lookup ch mp of
|
||||
Nothing -> throwException (HasNoRepresentation ch)
|
||||
Just v -> pushWord16be v
|
||||
mapEncode ch (LeafMap4 mp) = case lookup ch mp of
|
||||
Nothing -> throwException (HasNoRepresentation ch)
|
||||
Just v -> pushWord32be v
|
||||
|
||||
|
||||
mapMember :: Char -> CharMap -> Bool
|
||||
mapMember c (Node rc l r)
|
||||
| c < rc = mapMember c l
|
||||
| otherwise = mapMember c r
|
||||
mapMember c DeadEnd = False
|
||||
mapMember c (LeafMap1 mp) = member c mp
|
||||
mapMember c (LeafMap2 mp) = member c mp
|
||||
mapMember c (LeafMap4 mp) = member c mp
|
||||
mapMember c _ = True
|
||||
64
Data/CharMap/Builder.hs
Normal file
64
Data/CharMap/Builder.hs
Normal file
@ -0,0 +1,64 @@
|
||||
module Data.CharMap.Builder where
|
||||
|
||||
import Data.Map.Static.Builder
|
||||
|
||||
import Data.List
|
||||
import Data.Ord
|
||||
import Data.Char
|
||||
import Data.Bits
|
||||
import Data.Word
|
||||
|
||||
data BuildingBlock
|
||||
= SingleMapping Char [Word8]
|
||||
| RangeMapping Char Char Int [(Word8,Word8)]
|
||||
|
||||
charRange :: BuildingBlock -> (Char,Char)
|
||||
charRange (SingleMapping c _) = (c,c)
|
||||
charRange (RangeMapping s e _ _) = (s,e)
|
||||
|
||||
mappingLength :: BuildingBlock -> Int
|
||||
mappingLength (SingleMapping _ w) = length w
|
||||
mappingLength (RangeMapping _ _ _ ws) = length ws
|
||||
|
||||
isRange :: BuildingBlock -> Bool
|
||||
isRange (SingleMapping _ _) = False
|
||||
isRange (RangeMapping _ _ _ _) = True
|
||||
|
||||
buildCharMap :: [BuildingBlock] -> String
|
||||
buildCharMap lst = let slst = sortBy (comparing (fst.charRange)) lst
|
||||
grps = groupBy (\x y -> (not (isRange x || isRange y))
|
||||
&& mappingLength x == mappingLength y
|
||||
) slst
|
||||
|
||||
split' xs = splitAt (length xs `div` 2) xs
|
||||
|
||||
build' [] _ _ = "DeadEnd"
|
||||
build' [[RangeMapping st end off (x:xs)]] bl br
|
||||
= let e1 = if bl < st
|
||||
then "Node ("++show st++") DeadEnd ("++e2++")"
|
||||
else e2
|
||||
e2 = if br>end
|
||||
then "Node ("++show end++") ("++e3++") DeadEnd"
|
||||
else e3
|
||||
e3 = "LeafRange"++show (length xs+1)++" ("++show (ord st - off)++") "
|
||||
++show (fst x)++concat (map (\(w,r) -> " "++show w++" "++show r) xs)
|
||||
in e1
|
||||
build' [mps@((SingleMapping _ w):_)] bl br
|
||||
= "LeafMap"++show (length w)++" ("
|
||||
++(case length w of
|
||||
1 -> buildStaticMap (map (\(SingleMapping c [w]) -> (c,w)) mps)
|
||||
2 -> buildStaticMap $ map (\(SingleMapping c [w1,w2])
|
||||
-> (c,((fromIntegral w1) `shiftL` 8) .|. (fromIntegral w2)::Word16)
|
||||
) mps
|
||||
4 -> buildStaticMap $ map (\(SingleMapping c [w1,w2,w3,w4])
|
||||
-> (c,((fromIntegral w1) `shiftL` 24)
|
||||
.|. ((fromIntegral w2) `shiftL` 16)
|
||||
.|. ((fromIntegral w3) `shiftL` 8)
|
||||
.|. (fromIntegral w4)::Word32)
|
||||
) mps)++")"
|
||||
build' mps bl br = let (l,r@((spl:_):_)) = split' mps
|
||||
(el,_) = charRange spl
|
||||
in "Node ("++show el++") ("++build' l bl (pred el)++") ("++
|
||||
build' r el br++")"
|
||||
|
||||
in build' grps minBound maxBound
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.CP1250 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "CP1250" "CP1250.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.CP1251 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "CP1251" "CP1251.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.CP1252 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "CP1252" "CP1252.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.CP1253 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "CP1253" "CP1253.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.CP1254 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "CP1254" "CP1254.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.CP1255 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "CP1255" "CP1255.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.CP1256 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "CP1256" "CP1256.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.CP1257 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "CP1257" "CP1257.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.CP1258 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "CP1258" "CP1258.TXT" )
|
||||
@ -1,237 +0,0 @@
|
||||
{-# LANGUAGE CPP,DeriveDataTypeable #-}
|
||||
{- | GB18030 is a chinese character encoding that is mandatory in china (if you
|
||||
- don\'t implement it, you\'re not allowed to sell your software there).
|
||||
-}
|
||||
|
||||
module Data.Encoding.GB18030
|
||||
(GB18030(..))
|
||||
where
|
||||
|
||||
import Control.Throws
|
||||
import Data.Char (chr,ord)
|
||||
import Data.Word
|
||||
import Data.Bits
|
||||
import Data.Encoding.Base
|
||||
import Data.Encoding.ByteSource
|
||||
import Data.Encoding.ByteSink
|
||||
import Data.Encoding.Exception
|
||||
import Data.ByteString (ByteString)
|
||||
import qualified Data.ByteString as BS
|
||||
import Data.Typeable
|
||||
|
||||
#if __GLASGOW_HASKELL__>=608
|
||||
import Data.ByteString.Unsafe (unsafeIndex)
|
||||
#else
|
||||
import Data.ByteString.Base (unsafeIndex)
|
||||
#endif
|
||||
|
||||
import Data.Encoding.GB18030Data
|
||||
|
||||
data GB18030 = GB18030 deriving (Eq,Show,Typeable)
|
||||
|
||||
instance Encoding GB18030 where
|
||||
decodeChar _ = do
|
||||
w1 <- fetchWord8
|
||||
case () of
|
||||
_
|
||||
| w1 <= 0x80 -> return (chr $ fromIntegral w1) -- it's ascii
|
||||
| w1 <= 0xFE -> do
|
||||
w2 <- fetchWord8
|
||||
case () of
|
||||
_
|
||||
| w2 < 0x30 -> throwException (IllegalCharacter w2)
|
||||
| w2 <= 0x39 -> do
|
||||
w3 <- fetchWord8
|
||||
case () of
|
||||
_
|
||||
| w3 < 0x81 -> throwException (IllegalCharacter w3)
|
||||
| w3 <= 0xFE -> do
|
||||
w4 <- fetchWord8
|
||||
case () of
|
||||
_
|
||||
| w4 < 0x30 -> throwException (IllegalCharacter w4)
|
||||
| w4 <= 0x39 -> decodeGBFour $ linear w1 w2 w3 w4
|
||||
| otherwise -> throwException (IllegalCharacter w4)
|
||||
| otherwise -> throwException (IllegalCharacter w3)
|
||||
| w2 <= 0x7E -> return $ decodeGBTwo $ linear2 w1 w2
|
||||
| w2 == 0x7F -> throwException (IllegalCharacter w2)
|
||||
| w2 <= 0xFE -> return $ decodeGBTwo $ linear2 w1 w2
|
||||
| otherwise -> throwException (IllegalCharacter w2)
|
||||
| otherwise -> throwException (IllegalCharacter w1)
|
||||
{- How this works: The nested if-structures form an binary tree over the
|
||||
- encoding range.
|
||||
-}
|
||||
encodeChar _ ch = if ch<='\x4946' -- 1
|
||||
then (if ch<='\x4055' -- 2
|
||||
then (if ch<='\x2E80' -- 3
|
||||
then (if ch<='\x200F' -- 4
|
||||
then (if ch<'\x0452'
|
||||
then arr 0x0000 arr1
|
||||
else range range1)
|
||||
else (if ch<'\x2643'
|
||||
then arr 0x2010 arr2
|
||||
else range range2))
|
||||
else (if ch<='\x3917' -- 4
|
||||
then (if ch<'\x361B'
|
||||
then arr 0x2E81 arr3
|
||||
else range range3)
|
||||
else (if ch<'\x3CE1'
|
||||
then arr 0x3918 arr4
|
||||
else range range4)))
|
||||
else (if ch<='\x464B' -- 3
|
||||
then (if ch<='\x4336' -- 4
|
||||
then (if ch<'\x4160'
|
||||
then arr 0x4056 arr5
|
||||
else range range5)
|
||||
else (if ch<'\x44D7'
|
||||
then arr 0x4337 arr6
|
||||
else range range6))
|
||||
else (if ch<'\x478E'
|
||||
then arr 0x464C arr7
|
||||
else range range7)))
|
||||
else (if ch<='\xF92B' -- 2
|
||||
then (if ch<='\xD7FF' -- 3
|
||||
then (if ch<='\x4C76' -- 4
|
||||
then (if ch<'\x49B8'
|
||||
then arr 0x4947 arr8
|
||||
else range range8)
|
||||
else (if ch<'\x9FA6'
|
||||
then arr 0x4C77 arr9
|
||||
else range range9))
|
||||
else (if ch<'\xE865'
|
||||
then arr 0xD800 arr10
|
||||
else range range10))
|
||||
else (if ch<='\xFFFF' -- 3
|
||||
then (if ch<='\xFE2F' -- 4
|
||||
then (if ch<'\xFA2A'
|
||||
then arr 0xF92C arr11
|
||||
else range range11)
|
||||
else (if ch<'\xFFE6'
|
||||
then arr 0xFE30 arr12
|
||||
else range range12))
|
||||
else (if ch<='\x10FFFF' -- 4
|
||||
then range range13
|
||||
else throwException (HasNoRepresentation ch))))
|
||||
where
|
||||
range r = let (w1,w2,w3,w4) = delinear (ord ch + r)
|
||||
in pushWord8 w1 >> pushWord8 w2 >> pushWord8 w3 >> pushWord8 w4
|
||||
arr off a = let ind = (ord ch - off)*5
|
||||
w1 = unsafeIndex a (ind+1)
|
||||
w2 = unsafeIndex a (ind+2)
|
||||
w3 = unsafeIndex a (ind+3)
|
||||
w4 = unsafeIndex a (ind+4)
|
||||
in do
|
||||
pushWord8 w1
|
||||
case unsafeIndex a ind of
|
||||
1 -> return ()
|
||||
2 -> pushWord8 w2
|
||||
3 -> pushWord8 w2 >> pushWord8 w3
|
||||
4 -> pushWord8 w2 >> pushWord8 w3 >> pushWord8 w4
|
||||
encodeable _ c = c <= '\x10FFFF'
|
||||
|
||||
linear :: Word8 -> Word8 -> Word8 -> Word8 -> Int
|
||||
linear w1 w2 w3 w4
|
||||
= (fromIntegral (w4-0x30))
|
||||
+ (fromIntegral (w3-0x81))*10
|
||||
+ (fromIntegral (w2-0x30))*1260
|
||||
+ (fromIntegral (w1-0x81))*12600
|
||||
|
||||
linear2 :: Word8 -> Word8 -> Int
|
||||
linear2 w1 w2 = (fromIntegral (w2 - (if w2<=0x7E
|
||||
then 0x40
|
||||
else 0x41)))
|
||||
+ (fromIntegral (w1-0x81))*190
|
||||
|
||||
delinear :: Int -> (Word8,Word8,Word8,Word8)
|
||||
delinear n = let
|
||||
(w1,n1) = n `divMod` 12600
|
||||
(w2,n2) = n1 `divMod` 1260
|
||||
(w3,n3) = n2 `divMod` 10
|
||||
w4 = n3
|
||||
in (fromIntegral w1+0x81
|
||||
,fromIntegral w2+0x30
|
||||
,fromIntegral w3+0x81
|
||||
,fromIntegral w4+0x30)
|
||||
|
||||
decodeGBTwo :: Int -> Char
|
||||
decodeGBTwo n = let
|
||||
rn = n*2
|
||||
w1 = unsafeIndex rrarr rn
|
||||
w2 = unsafeIndex rrarr (rn+1)
|
||||
in chr $ ((fromIntegral w1) `shiftL` 8) .|. (fromIntegral w2)
|
||||
|
||||
decodeGBFour :: ByteSource m => Int -> m Char
|
||||
decodeGBFour v = if v<=17858 -- 1
|
||||
then (if v<=15582 -- 2
|
||||
then (if v<=11328 -- 3
|
||||
then (if v<=7921 -- 4
|
||||
then (if v<820
|
||||
then arr 0 rarr1
|
||||
else range range1)
|
||||
else (if v<9219
|
||||
then arr 7922 rarr2
|
||||
else range range2))
|
||||
else (if v<=13737 -- 4
|
||||
then (if v<12973
|
||||
then arr 11329 rarr3
|
||||
else range range3)
|
||||
else (if v<14698
|
||||
then arr 13738 rarr4
|
||||
else range range4)))
|
||||
else (if v<=17101 -- 3
|
||||
then (if v<=16317 -- 4
|
||||
then (if v<15847
|
||||
then arr 15583 rarr5
|
||||
else range range5)
|
||||
else (if v<16729
|
||||
then arr 16318 rarr6
|
||||
else range range6))
|
||||
else (if v<17418
|
||||
then arr 17102 rarr7
|
||||
else range range7)))
|
||||
else (if v<=37844 -- 2
|
||||
then (if v<=33468 -- 3
|
||||
then (if v<=18663 -- 4
|
||||
then (if v<17961
|
||||
then arr 17859 rarr8
|
||||
else range range8)
|
||||
else (if v<19043
|
||||
then arr 18664 rarr9
|
||||
else range range9))
|
||||
else (if v<33550
|
||||
then arr 33469 rarr10
|
||||
else range range10))
|
||||
else (if v<=39419 -- 3
|
||||
then (if v<=39107 -- 4
|
||||
then (if v<38078
|
||||
then arr 37845 rarr11
|
||||
else range range11)
|
||||
else (if v<39394
|
||||
then arr 39108 rarr12
|
||||
else range range12))
|
||||
else (if v<=1237575 && v>=189000
|
||||
then range range13
|
||||
else throwException OutOfRange)))
|
||||
where
|
||||
arr off a = let
|
||||
v' = (v-off)*2
|
||||
w1 = unsafeIndex a v'
|
||||
w2 = unsafeIndex a (v'+1)
|
||||
in return $ chr $ ((fromIntegral w1) `shiftL` 8)
|
||||
.|. (fromIntegral w2)
|
||||
range r = return $ chr (v-r)
|
||||
|
||||
range1,range2,range3,range4,range5,range6,range7,range8,range9,range10,range11,range12,range13 :: Int
|
||||
range1 = -286
|
||||
range2 = -576
|
||||
range3 = -878
|
||||
range4 = -887
|
||||
range5 = -889
|
||||
range6 = -894
|
||||
range7 = -900
|
||||
range8 = -911
|
||||
range9 = -21827
|
||||
range10 = -25943
|
||||
range11 = -25964
|
||||
range12 = -26116
|
||||
range13 = 123464
|
||||
@ -1,6 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE characterMapping SYSTEM "http://www.unicode.org/unicode/reports/tr22/CharacterMapping.dtd">
|
||||
<characterMapping id="gb-18030-2000" version="3">
|
||||
<characterMapping
|
||||
id="gb-18030-2000" version="3"
|
||||
description="GB18030 is a chinese character encoding that is mandatory in china (if you - don't implement it, you're not allowed to sell your software there).">
|
||||
<history>
|
||||
<modified version="3" date="2001-02-21">
|
||||
0x80 appears to be a valid (and unassigned) single-byte code, added to the validity.
|
||||
File diff suppressed because one or more lines are too long
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO885910 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO885910" "8859-10.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO885911 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO885911" "8859-11.TXT" )
|
||||
@ -294,4 +294,4 @@
|
||||
0xF8 0x0E58 # THAI DIGIT EIGHT
|
||||
0xF9 0x0E59 # THAI DIGIT NINE
|
||||
0xFA 0x0E5A # THAI CHARACTER ANGKHANKHU
|
||||
0xFB 0x0E5B # THAI CHARACTER KHOMUT
|
||||
0xFB 0x0E5B # THAI CHARACTER KHOMUT
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO885913 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO885913" "8859-13.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO885914 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO885914" "8859-14.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO885915 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO885915" "8859-15.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO885916 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO885916" "8859-16.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO88592 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO88592" "8859-2.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO88593 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO88593" "8859-3.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO88594 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO88594" "8859-4.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO88595 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO88595" "8859-5.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO88596 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO88596" "8859-6.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO88597 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO88597" "8859-7.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO88598 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO88598" "8859-8.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.ISO88599 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "ISO88599" "8859-9.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.JISX0201 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "JISX0201" "JIS0201.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.JISX0208 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeJISInstance)
|
||||
|
||||
$( makeJISInstance 1 "JISX0208" "JIS0208.TXT" )
|
||||
6951
Data/Encoding/JISX0208.mapping2
Normal file
6951
Data/Encoding/JISX0208.mapping2
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.JISX0212 where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeJISInstance)
|
||||
|
||||
$( makeJISInstance 0 "JISX0212" "JIS0212.TXT" )
|
||||
@ -1,6 +0,0 @@
|
||||
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
|
||||
module Data.Encoding.MacOSRoman where
|
||||
|
||||
import Data.Encoding.Helper.Template (makeISOInstance)
|
||||
|
||||
$( makeISOInstance "MacOSRoman" "ROMAN.TXT" )
|
||||
28
Data/Map/Static.hs
Normal file
28
Data/Map/Static.hs
Normal file
@ -0,0 +1,28 @@
|
||||
module Data.Map.Static where
|
||||
|
||||
import Data.Static
|
||||
import Data.Array.Static
|
||||
|
||||
import GHC.Exts
|
||||
|
||||
data StaticMap i e = StaticMap (StaticArray Int i) (StaticArray Int e)
|
||||
|
||||
lookup :: (StaticElement i,StaticElement e,Ord i) => i -> StaticMap i e -> Maybe e
|
||||
lookup ind (StaticMap idx els) = lookup' 1
|
||||
where
|
||||
lookup' n = if n > snd (bounds idx)
|
||||
then Nothing
|
||||
else case compare ind (idx!n) of
|
||||
LT -> lookup' (n * 2)
|
||||
GT -> lookup' ((n * 2) + 1)
|
||||
EQ -> Just $ els!n
|
||||
|
||||
member :: (StaticElement i,StaticElement e,Ord i) => i -> StaticMap i e -> Bool
|
||||
member ind (StaticMap idx _) = lookup' 1
|
||||
where
|
||||
lookup' n = if n > snd (bounds idx)
|
||||
then False
|
||||
else case compare ind (idx!n) of
|
||||
LT -> lookup' (n * 2)
|
||||
GT -> lookup' ((n * 2) + 1)
|
||||
EQ -> True
|
||||
42
Data/Map/Static/Builder.hs
Normal file
42
Data/Map/Static/Builder.hs
Normal file
@ -0,0 +1,42 @@
|
||||
module Data.Map.Static.Builder where
|
||||
|
||||
import Data.Static
|
||||
import Data.Array.Static.Builder
|
||||
|
||||
import Data.List
|
||||
import Data.Ord
|
||||
import Data.Bits
|
||||
|
||||
buildStaticMap :: (StaticElement i,StaticElement e,Ord i) => [(i,e)] -> String
|
||||
buildStaticMap lst = let step :: Int -> [(i,e)] -> [(Int,(i,e))]
|
||||
step n chunk = let ss = findSplitSize (length chunk)
|
||||
(h,d:t) = splitAt ss chunk
|
||||
in if null chunk
|
||||
then []
|
||||
else (n,d):((step (n*2) h)++(step (n*2+1) t))
|
||||
checkHeap n [] = []
|
||||
checkHeap n ((c,x):xs) = if c == n
|
||||
then x:checkHeap (n+1) xs
|
||||
else error $ "Heap is not consistent: Should be "++show n++" but is "++show c
|
||||
uheap = sortBy (comparing fst) (step 1 slst)
|
||||
slst = sortBy (comparing fst) lst
|
||||
heap = checkHeap 1 $ sortBy (comparing fst) (step 1 slst)
|
||||
len = length heap
|
||||
in "StaticMap ("++buildStaticArray (1,len) (map fst heap)++") ("++buildStaticArray (1,len) (map snd heap)++")"
|
||||
|
||||
maxSize :: Int -> Int
|
||||
maxSize d = (1 `shiftL` d) - 1
|
||||
|
||||
treeDepth :: Int -> Int
|
||||
treeDepth sz = find' [0..]
|
||||
where
|
||||
find' (x:xs) = if 1 `shiftL` x > sz
|
||||
then x
|
||||
else find' xs
|
||||
|
||||
findSplitSize :: Int -> Int
|
||||
findSplitSize len = let depth = treeDepth len
|
||||
free = (maxSize depth) - len
|
||||
in if free <= (1 `shiftL` (depth - 2))
|
||||
then maxSize (depth - 1)
|
||||
else len - (maxSize (depth - 2)) - 1
|
||||
6952
JIS0208.TXT
6952
JIS0208.TXT
File diff suppressed because it is too large
Load Diff
@ -12,42 +12,20 @@ Cabal-Version: >=1.2
|
||||
Build-Type: Simple
|
||||
Extra-Source-Files:
|
||||
NEWS
|
||||
8859-2.TXT
|
||||
8859-3.TXT
|
||||
8859-4.TXT
|
||||
8859-5.TXT
|
||||
8859-6.TXT
|
||||
8859-7.TXT
|
||||
8859-8.TXT
|
||||
8859-9.TXT
|
||||
8859-10.TXT
|
||||
8859-11.TXT
|
||||
8859-13.TXT
|
||||
8859-14.TXT
|
||||
8859-15.TXT
|
||||
8859-16.TXT
|
||||
CP1250.TXT
|
||||
CP1251.TXT
|
||||
CP1252.TXT
|
||||
CP1253.TXT
|
||||
CP1254.TXT
|
||||
CP1255.TXT
|
||||
CP1256.TXT
|
||||
CP1257.TXT
|
||||
CP1258.TXT
|
||||
ROMAN.TXT
|
||||
JIS0201.TXT
|
||||
JIS0208.TXT
|
||||
JIS0212.TXT
|
||||
gb-18030-2000.xml
|
||||
create_gb18030_data.sh
|
||||
|
||||
Flag splitBase
|
||||
description: Choose the new smaller, split-up base package.
|
||||
Flag newGHC
|
||||
description: Use ghc version > 6.10
|
||||
|
||||
Library
|
||||
if flag(splitBase)
|
||||
Build-Depends: bytestring, base >= 3, binary, mtl, containers, extensible-exceptions, array, template-haskell, regex-compat
|
||||
if flag(newGHC)
|
||||
Build-Depends: bytestring, base >= 3 && < 5, binary, mtl, containers, extensible-exceptions, array, template-haskell, regex-compat, ghc-prim, ghc >= 6.10
|
||||
else
|
||||
Build-Depends: bytestring, base >= 3 && < 5, binary, mtl, containers, extensible-exceptions, array, template-haskell, regex-compat, ghc < 6.10
|
||||
else
|
||||
Build-Depends: base < 3, binary, extensible-exceptions, template-haskell
|
||||
|
||||
@ -98,8 +76,10 @@ Library
|
||||
System.IO.Encoding
|
||||
Other-Modules:
|
||||
Data.Encoding.Base
|
||||
Data.Encoding.Helper.Template
|
||||
Data.Encoding.GB18030Data
|
||||
Data.Array.Static
|
||||
Data.Map.Static
|
||||
Data.Static
|
||||
Data.CharMap
|
||||
Includes:
|
||||
system_encoding.h
|
||||
Install-Includes:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user