Switch from Template-based code generating to text-based

Ignore-this: f58ceb5e1068be132b0a67a851b096f4
This has two advantages:
1. TemplateHaskell is painfully slow. There, I said it.
2. TemplateHaskell doesn't yet support some extensions that can be usefull for this library. Specifically the MagicHash extension.

darcs-hash:20090813023321-a4fee-0da13d0da6454f6ba3bd111ed6b80268d9e1b45c
This commit is contained in:
Henning Guenther 2009-08-12 19:33:21 -07:00
parent 1ad8755a80
commit 6101ee16ae
65 changed files with 7199 additions and 7471 deletions

15
Data/Array/Static.hs Normal file
View File

@ -0,0 +1,15 @@
{-# LANGUAGE MagicHash #-}
module Data.Array.Static where
import Data.Static
import GHC.Exts
import Data.Ix
data StaticArray i e = StaticArray i i Addr#
bounds :: Ix i => StaticArray i e -> (i,i)
bounds (StaticArray s e _) = (s,e)
(!) :: (StaticElement e,Ix i) => StaticArray i e -> i -> e
(!) (StaticArray s e addr) i = let (I# ri) = index (s,e) i
in extract addr ri

View File

@ -0,0 +1,12 @@
{-# LANGUAGE MagicHash #-}
module Data.Array.Static.Builder where
import Data.Static
buildStaticArray :: (StaticElement e,Show i) => (i,i) -> [e] -> String
buildStaticArray (s,e) els = "StaticArray ("++show s++") ("++show e++") \""
++concat (map (\w -> '\\':show w) (concat (map gen els)))
++"\"#"
buildStaticArray' :: (StaticElement e) => [e] -> String
buildStaticArray' els = buildStaticArray (0,length els-1) els

73
Data/CharMap.hs Normal file
View File

@ -0,0 +1,73 @@
module Data.CharMap where
import Data.Map.Static
import Data.Encoding.ByteSink
import Data.Encoding.Exception
import Control.Throws
import Data.Word
import Data.Char
import Prelude hiding (lookup)
data CharMap
= Node !Char !CharMap !CharMap
| DeadEnd
| LeafRange1 !Int !Word8
| LeafRange2 !Int !Word8 !Word8 !Word8
| LeafRange3 !Int !Word8 !Word8 !Word8 !Word8 !Word8
| LeafRange4 !Int !Word8 !Word8 !Word8 !Word8 !Word8 !Word8 !Word8
| LeafMap1 (StaticMap Char Word8)
| LeafMap2 (StaticMap Char Word16)
| LeafMap4 (StaticMap Char Word32)
mapEncode :: ByteSink m => Char -> CharMap -> m ()
mapEncode ch (Node rch l r)
| ch < rch = mapEncode ch l
| otherwise = mapEncode ch r
mapEncode ch DeadEnd = throwException (HasNoRepresentation ch)
mapEncode ch (LeafRange1 bch st)
= pushWord8 $ st + (fromIntegral ((ord ch) - bch))
mapEncode ch (LeafRange2 bch min1 min2 r2)
= let v = (ord ch) - bch
(w1,w2) = v `divMod` (fromIntegral r2)
in do
pushWord8 (fromIntegral w1 + min1)
pushWord8 (fromIntegral w2 + min2)
mapEncode ch (LeafRange3 bch min1 min2 r2 min3 r3)
= let v = (ord ch) - bch
(v1,w3) = v `divMod` (fromIntegral r3)
(w1,w2) = v1 `divMod` (fromIntegral r2)
in do
pushWord8 (fromIntegral w1 + min1)
pushWord8 (fromIntegral w2 + min2)
pushWord8 (fromIntegral w3 + min3)
mapEncode ch (LeafRange4 bch min1 min2 r2 min3 r3 min4 r4)
= let v = (ord ch) - bch
(v1,w4) = v `divMod` (fromIntegral r4)
(v2,w3) = v1 `divMod` (fromIntegral r3)
(w1,w2) = v2 `divMod` (fromIntegral r2)
in do
pushWord8 (fromIntegral w1 + min1)
pushWord8 (fromIntegral w2 + min2)
pushWord8 (fromIntegral w3 + min3)
pushWord8 (fromIntegral w4 + min4)
mapEncode ch (LeafMap1 mp) = case lookup ch mp of
Nothing -> throwException (HasNoRepresentation ch)
Just v -> pushWord8 v
mapEncode ch (LeafMap2 mp) = case lookup ch mp of
Nothing -> throwException (HasNoRepresentation ch)
Just v -> pushWord16be v
mapEncode ch (LeafMap4 mp) = case lookup ch mp of
Nothing -> throwException (HasNoRepresentation ch)
Just v -> pushWord32be v
mapMember :: Char -> CharMap -> Bool
mapMember c (Node rc l r)
| c < rc = mapMember c l
| otherwise = mapMember c r
mapMember c DeadEnd = False
mapMember c (LeafMap1 mp) = member c mp
mapMember c (LeafMap2 mp) = member c mp
mapMember c (LeafMap4 mp) = member c mp
mapMember c _ = True

64
Data/CharMap/Builder.hs Normal file
View File

@ -0,0 +1,64 @@
module Data.CharMap.Builder where
import Data.Map.Static.Builder
import Data.List
import Data.Ord
import Data.Char
import Data.Bits
import Data.Word
data BuildingBlock
= SingleMapping Char [Word8]
| RangeMapping Char Char Int [(Word8,Word8)]
charRange :: BuildingBlock -> (Char,Char)
charRange (SingleMapping c _) = (c,c)
charRange (RangeMapping s e _ _) = (s,e)
mappingLength :: BuildingBlock -> Int
mappingLength (SingleMapping _ w) = length w
mappingLength (RangeMapping _ _ _ ws) = length ws
isRange :: BuildingBlock -> Bool
isRange (SingleMapping _ _) = False
isRange (RangeMapping _ _ _ _) = True
buildCharMap :: [BuildingBlock] -> String
buildCharMap lst = let slst = sortBy (comparing (fst.charRange)) lst
grps = groupBy (\x y -> (not (isRange x || isRange y))
&& mappingLength x == mappingLength y
) slst
split' xs = splitAt (length xs `div` 2) xs
build' [] _ _ = "DeadEnd"
build' [[RangeMapping st end off (x:xs)]] bl br
= let e1 = if bl < st
then "Node ("++show st++") DeadEnd ("++e2++")"
else e2
e2 = if br>end
then "Node ("++show end++") ("++e3++") DeadEnd"
else e3
e3 = "LeafRange"++show (length xs+1)++" ("++show (ord st - off)++") "
++show (fst x)++concat (map (\(w,r) -> " "++show w++" "++show r) xs)
in e1
build' [mps@((SingleMapping _ w):_)] bl br
= "LeafMap"++show (length w)++" ("
++(case length w of
1 -> buildStaticMap (map (\(SingleMapping c [w]) -> (c,w)) mps)
2 -> buildStaticMap $ map (\(SingleMapping c [w1,w2])
-> (c,((fromIntegral w1) `shiftL` 8) .|. (fromIntegral w2)::Word16)
) mps
4 -> buildStaticMap $ map (\(SingleMapping c [w1,w2,w3,w4])
-> (c,((fromIntegral w1) `shiftL` 24)
.|. ((fromIntegral w2) `shiftL` 16)
.|. ((fromIntegral w3) `shiftL` 8)
.|. (fromIntegral w4)::Word32)
) mps)++")"
build' mps bl br = let (l,r@((spl:_):_)) = split' mps
(el,_) = charRange spl
in "Node ("++show el++") ("++build' l bl (pred el)++") ("++
build' r el br++")"
in build' grps minBound maxBound

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.CP1250 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "CP1250" "CP1250.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.CP1251 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "CP1251" "CP1251.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.CP1252 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "CP1252" "CP1252.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.CP1253 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "CP1253" "CP1253.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.CP1254 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "CP1254" "CP1254.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.CP1255 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "CP1255" "CP1255.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.CP1256 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "CP1256" "CP1256.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.CP1257 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "CP1257" "CP1257.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.CP1258 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "CP1258" "CP1258.TXT" )

View File

@ -1,237 +0,0 @@
{-# LANGUAGE CPP,DeriveDataTypeable #-}
{- | GB18030 is a chinese character encoding that is mandatory in china (if you
- don\'t implement it, you\'re not allowed to sell your software there).
-}
module Data.Encoding.GB18030
(GB18030(..))
where
import Control.Throws
import Data.Char (chr,ord)
import Data.Word
import Data.Bits
import Data.Encoding.Base
import Data.Encoding.ByteSource
import Data.Encoding.ByteSink
import Data.Encoding.Exception
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Data.Typeable
#if __GLASGOW_HASKELL__>=608
import Data.ByteString.Unsafe (unsafeIndex)
#else
import Data.ByteString.Base (unsafeIndex)
#endif
import Data.Encoding.GB18030Data
data GB18030 = GB18030 deriving (Eq,Show,Typeable)
instance Encoding GB18030 where
decodeChar _ = do
w1 <- fetchWord8
case () of
_
| w1 <= 0x80 -> return (chr $ fromIntegral w1) -- it's ascii
| w1 <= 0xFE -> do
w2 <- fetchWord8
case () of
_
| w2 < 0x30 -> throwException (IllegalCharacter w2)
| w2 <= 0x39 -> do
w3 <- fetchWord8
case () of
_
| w3 < 0x81 -> throwException (IllegalCharacter w3)
| w3 <= 0xFE -> do
w4 <- fetchWord8
case () of
_
| w4 < 0x30 -> throwException (IllegalCharacter w4)
| w4 <= 0x39 -> decodeGBFour $ linear w1 w2 w3 w4
| otherwise -> throwException (IllegalCharacter w4)
| otherwise -> throwException (IllegalCharacter w3)
| w2 <= 0x7E -> return $ decodeGBTwo $ linear2 w1 w2
| w2 == 0x7F -> throwException (IllegalCharacter w2)
| w2 <= 0xFE -> return $ decodeGBTwo $ linear2 w1 w2
| otherwise -> throwException (IllegalCharacter w2)
| otherwise -> throwException (IllegalCharacter w1)
{- How this works: The nested if-structures form an binary tree over the
- encoding range.
-}
encodeChar _ ch = if ch<='\x4946' -- 1
then (if ch<='\x4055' -- 2
then (if ch<='\x2E80' -- 3
then (if ch<='\x200F' -- 4
then (if ch<'\x0452'
then arr 0x0000 arr1
else range range1)
else (if ch<'\x2643'
then arr 0x2010 arr2
else range range2))
else (if ch<='\x3917' -- 4
then (if ch<'\x361B'
then arr 0x2E81 arr3
else range range3)
else (if ch<'\x3CE1'
then arr 0x3918 arr4
else range range4)))
else (if ch<='\x464B' -- 3
then (if ch<='\x4336' -- 4
then (if ch<'\x4160'
then arr 0x4056 arr5
else range range5)
else (if ch<'\x44D7'
then arr 0x4337 arr6
else range range6))
else (if ch<'\x478E'
then arr 0x464C arr7
else range range7)))
else (if ch<='\xF92B' -- 2
then (if ch<='\xD7FF' -- 3
then (if ch<='\x4C76' -- 4
then (if ch<'\x49B8'
then arr 0x4947 arr8
else range range8)
else (if ch<'\x9FA6'
then arr 0x4C77 arr9
else range range9))
else (if ch<'\xE865'
then arr 0xD800 arr10
else range range10))
else (if ch<='\xFFFF' -- 3
then (if ch<='\xFE2F' -- 4
then (if ch<'\xFA2A'
then arr 0xF92C arr11
else range range11)
else (if ch<'\xFFE6'
then arr 0xFE30 arr12
else range range12))
else (if ch<='\x10FFFF' -- 4
then range range13
else throwException (HasNoRepresentation ch))))
where
range r = let (w1,w2,w3,w4) = delinear (ord ch + r)
in pushWord8 w1 >> pushWord8 w2 >> pushWord8 w3 >> pushWord8 w4
arr off a = let ind = (ord ch - off)*5
w1 = unsafeIndex a (ind+1)
w2 = unsafeIndex a (ind+2)
w3 = unsafeIndex a (ind+3)
w4 = unsafeIndex a (ind+4)
in do
pushWord8 w1
case unsafeIndex a ind of
1 -> return ()
2 -> pushWord8 w2
3 -> pushWord8 w2 >> pushWord8 w3
4 -> pushWord8 w2 >> pushWord8 w3 >> pushWord8 w4
encodeable _ c = c <= '\x10FFFF'
linear :: Word8 -> Word8 -> Word8 -> Word8 -> Int
linear w1 w2 w3 w4
= (fromIntegral (w4-0x30))
+ (fromIntegral (w3-0x81))*10
+ (fromIntegral (w2-0x30))*1260
+ (fromIntegral (w1-0x81))*12600
linear2 :: Word8 -> Word8 -> Int
linear2 w1 w2 = (fromIntegral (w2 - (if w2<=0x7E
then 0x40
else 0x41)))
+ (fromIntegral (w1-0x81))*190
delinear :: Int -> (Word8,Word8,Word8,Word8)
delinear n = let
(w1,n1) = n `divMod` 12600
(w2,n2) = n1 `divMod` 1260
(w3,n3) = n2 `divMod` 10
w4 = n3
in (fromIntegral w1+0x81
,fromIntegral w2+0x30
,fromIntegral w3+0x81
,fromIntegral w4+0x30)
decodeGBTwo :: Int -> Char
decodeGBTwo n = let
rn = n*2
w1 = unsafeIndex rrarr rn
w2 = unsafeIndex rrarr (rn+1)
in chr $ ((fromIntegral w1) `shiftL` 8) .|. (fromIntegral w2)
decodeGBFour :: ByteSource m => Int -> m Char
decodeGBFour v = if v<=17858 -- 1
then (if v<=15582 -- 2
then (if v<=11328 -- 3
then (if v<=7921 -- 4
then (if v<820
then arr 0 rarr1
else range range1)
else (if v<9219
then arr 7922 rarr2
else range range2))
else (if v<=13737 -- 4
then (if v<12973
then arr 11329 rarr3
else range range3)
else (if v<14698
then arr 13738 rarr4
else range range4)))
else (if v<=17101 -- 3
then (if v<=16317 -- 4
then (if v<15847
then arr 15583 rarr5
else range range5)
else (if v<16729
then arr 16318 rarr6
else range range6))
else (if v<17418
then arr 17102 rarr7
else range range7)))
else (if v<=37844 -- 2
then (if v<=33468 -- 3
then (if v<=18663 -- 4
then (if v<17961
then arr 17859 rarr8
else range range8)
else (if v<19043
then arr 18664 rarr9
else range range9))
else (if v<33550
then arr 33469 rarr10
else range range10))
else (if v<=39419 -- 3
then (if v<=39107 -- 4
then (if v<38078
then arr 37845 rarr11
else range range11)
else (if v<39394
then arr 39108 rarr12
else range range12))
else (if v<=1237575 && v>=189000
then range range13
else throwException OutOfRange)))
where
arr off a = let
v' = (v-off)*2
w1 = unsafeIndex a v'
w2 = unsafeIndex a (v'+1)
in return $ chr $ ((fromIntegral w1) `shiftL` 8)
.|. (fromIntegral w2)
range r = return $ chr (v-r)
range1,range2,range3,range4,range5,range6,range7,range8,range9,range10,range11,range12,range13 :: Int
range1 = -286
range2 = -576
range3 = -878
range4 = -887
range5 = -889
range6 = -894
range7 = -900
range8 = -911
range9 = -21827
range10 = -25943
range11 = -25964
range12 = -26116
range13 = 123464

View File

@ -1,6 +1,8 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE characterMapping SYSTEM "http://www.unicode.org/unicode/reports/tr22/CharacterMapping.dtd">
<characterMapping id="gb-18030-2000" version="3">
<characterMapping
id="gb-18030-2000" version="3"
description="GB18030 is a chinese character encoding that is mandatory in china (if you - don't implement it, you're not allowed to sell your software there).">
<history>
<modified version="3" date="2001-02-21">
0x80 appears to be a valid (and unassigned) single-byte code, added to the validity.

File diff suppressed because one or more lines are too long

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO885910 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO885910" "8859-10.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO885911 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO885911" "8859-11.TXT" )

View File

@ -294,4 +294,4 @@
0xF8 0x0E58 # THAI DIGIT EIGHT
0xF9 0x0E59 # THAI DIGIT NINE
0xFA 0x0E5A # THAI CHARACTER ANGKHANKHU
0xFB 0x0E5B # THAI CHARACTER KHOMUT
0xFB 0x0E5B # THAI CHARACTER KHOMUT

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO885913 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO885913" "8859-13.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO885914 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO885914" "8859-14.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO885915 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO885915" "8859-15.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO885916 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO885916" "8859-16.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO88592 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO88592" "8859-2.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO88593 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO88593" "8859-3.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO88594 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO88594" "8859-4.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO88595 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO88595" "8859-5.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO88596 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO88596" "8859-6.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO88597 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO88597" "8859-7.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO88598 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO88598" "8859-8.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.ISO88599 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "ISO88599" "8859-9.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.JISX0201 where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "JISX0201" "JIS0201.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.JISX0208 where
import Data.Encoding.Helper.Template (makeJISInstance)
$( makeJISInstance 1 "JISX0208" "JIS0208.TXT" )

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.JISX0212 where
import Data.Encoding.Helper.Template (makeJISInstance)
$( makeJISInstance 0 "JISX0212" "JIS0212.TXT" )

View File

@ -1,6 +0,0 @@
{-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
module Data.Encoding.MacOSRoman where
import Data.Encoding.Helper.Template (makeISOInstance)
$( makeISOInstance "MacOSRoman" "ROMAN.TXT" )

28
Data/Map/Static.hs Normal file
View File

@ -0,0 +1,28 @@
module Data.Map.Static where
import Data.Static
import Data.Array.Static
import GHC.Exts
data StaticMap i e = StaticMap (StaticArray Int i) (StaticArray Int e)
lookup :: (StaticElement i,StaticElement e,Ord i) => i -> StaticMap i e -> Maybe e
lookup ind (StaticMap idx els) = lookup' 1
where
lookup' n = if n > snd (bounds idx)
then Nothing
else case compare ind (idx!n) of
LT -> lookup' (n * 2)
GT -> lookup' ((n * 2) + 1)
EQ -> Just $ els!n
member :: (StaticElement i,StaticElement e,Ord i) => i -> StaticMap i e -> Bool
member ind (StaticMap idx _) = lookup' 1
where
lookup' n = if n > snd (bounds idx)
then False
else case compare ind (idx!n) of
LT -> lookup' (n * 2)
GT -> lookup' ((n * 2) + 1)
EQ -> True

View File

@ -0,0 +1,42 @@
module Data.Map.Static.Builder where
import Data.Static
import Data.Array.Static.Builder
import Data.List
import Data.Ord
import Data.Bits
buildStaticMap :: (StaticElement i,StaticElement e,Ord i) => [(i,e)] -> String
buildStaticMap lst = let step :: Int -> [(i,e)] -> [(Int,(i,e))]
step n chunk = let ss = findSplitSize (length chunk)
(h,d:t) = splitAt ss chunk
in if null chunk
then []
else (n,d):((step (n*2) h)++(step (n*2+1) t))
checkHeap n [] = []
checkHeap n ((c,x):xs) = if c == n
then x:checkHeap (n+1) xs
else error $ "Heap is not consistent: Should be "++show n++" but is "++show c
uheap = sortBy (comparing fst) (step 1 slst)
slst = sortBy (comparing fst) lst
heap = checkHeap 1 $ sortBy (comparing fst) (step 1 slst)
len = length heap
in "StaticMap ("++buildStaticArray (1,len) (map fst heap)++") ("++buildStaticArray (1,len) (map snd heap)++")"
maxSize :: Int -> Int
maxSize d = (1 `shiftL` d) - 1
treeDepth :: Int -> Int
treeDepth sz = find' [0..]
where
find' (x:xs) = if 1 `shiftL` x > sz
then x
else find' xs
findSplitSize :: Int -> Int
findSplitSize len = let depth = treeDepth len
free = (maxSize depth) - len
in if free <= (1 `shiftL` (depth - 2))
then maxSize (depth - 1)
else len - (maxSize (depth - 2)) - 1

File diff suppressed because it is too large Load Diff

View File

@ -12,42 +12,20 @@ Cabal-Version: >=1.2
Build-Type: Simple
Extra-Source-Files:
NEWS
8859-2.TXT
8859-3.TXT
8859-4.TXT
8859-5.TXT
8859-6.TXT
8859-7.TXT
8859-8.TXT
8859-9.TXT
8859-10.TXT
8859-11.TXT
8859-13.TXT
8859-14.TXT
8859-15.TXT
8859-16.TXT
CP1250.TXT
CP1251.TXT
CP1252.TXT
CP1253.TXT
CP1254.TXT
CP1255.TXT
CP1256.TXT
CP1257.TXT
CP1258.TXT
ROMAN.TXT
JIS0201.TXT
JIS0208.TXT
JIS0212.TXT
gb-18030-2000.xml
create_gb18030_data.sh
Flag splitBase
description: Choose the new smaller, split-up base package.
Flag newGHC
description: Use ghc version > 6.10
Library
if flag(splitBase)
Build-Depends: bytestring, base >= 3, binary, mtl, containers, extensible-exceptions, array, template-haskell, regex-compat
if flag(newGHC)
Build-Depends: bytestring, base >= 3 && < 5, binary, mtl, containers, extensible-exceptions, array, template-haskell, regex-compat, ghc-prim, ghc >= 6.10
else
Build-Depends: bytestring, base >= 3 && < 5, binary, mtl, containers, extensible-exceptions, array, template-haskell, regex-compat, ghc < 6.10
else
Build-Depends: base < 3, binary, extensible-exceptions, template-haskell
@ -98,8 +76,10 @@ Library
System.IO.Encoding
Other-Modules:
Data.Encoding.Base
Data.Encoding.Helper.Template
Data.Encoding.GB18030Data
Data.Array.Static
Data.Map.Static
Data.Static
Data.CharMap
Includes:
system_encoding.h
Install-Includes: