Added Preprocessor for XML mappings and normal mappings

Ignore-this: dc0902f526ceb99db528e14c9e3ad563

darcs-hash:20090813024109-a4fee-447c0ff194c227ed919d6eef0f7824e63276183e
This commit is contained in:
Henning Guenther 2009-08-12 19:41:09 -07:00
parent 6101ee16ae
commit 39af34b0a3
4 changed files with 1046 additions and 2 deletions

View File

@ -0,0 +1,141 @@
module Data.Encoding.Preprocessor.Mapping where
import Distribution.Simple.PreProcess
import Distribution.Simple.Utils
import System.IO
import System.FilePath
import Data.List (intersperse,unfoldr)
import Data.Maybe
import Data.Char
import Data.Word
import Data.Ix
import Data.Bits
import Data.Encoding.Preprocessor.Addr
import Data.Array.Static.Builder
import Data.CharMap.Builder
data MappingType
= ISOMapping
| JISMapping
deriving (Eq,Ord,Show,Read)
readTranslation :: Int -> FilePath -> IO [(Integer,Maybe Char)]
readTranslation offset file = do
cont <- readFile file
return $ mapMaybe (\ln -> case drop offset ln of
[src] -> Just (src,Nothing)
[src,trg] -> Just (src,Just $ chr $ fromIntegral trg)
_ -> Nothing) (parseTranslationTable cont)
parseTranslationTable :: String -> [[Integer]]
parseTranslationTable cont = filter (not.null) (map (\ln -> map read (takeWhile ((/='#').head) (words ln))) (lines cont))
{-fillTranslations :: (Ix a,Show a) => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)]
fillTranslations f t = merge (range (f,t))
where
merge xs [] = map (\x -> (x,Nothing)) xs
merge [] cs = error $ "Data.Encoding.Helper.Template.fillTranslations: Character translations out of range: " ++ show cs
merge (x:xs) (y:ys) = if x < fst y
then (x,Nothing):(merge xs (y:ys))
else y:(merge xs ys)-}
fillTranslations :: (Enum a,Eq a) => [(a,Maybe Char)] -> (a,a,[Maybe Char])
fillTranslations [] = error "fillTranslations: zero elements"
fillTranslations ((s,c):rest) = let (e,r) = fill' s rest
fill' cur [] = (cur,[])
fill' cur all@((n,c):rest2) = if succ cur == n
then (let (e',res) = fill' n rest2
in (e',c:res))
else (let (e',res) = fill' (succ cur) all
in (e',Nothing:res))
in (s,e,c:r)
validTranslations :: [(a,Maybe Char)] -> [(a,Char)]
validTranslations = mapMaybe (\(n,mc) -> case mc of
Nothing -> Nothing
Just c -> Just (n,c))
mappingPreprocessor :: PreProcessor
mappingPreprocessor = PreProcessor
{platformIndependent = True
,runPreProcessor = \(sbase,sfile) (tbase,tfile) verb -> do
let (dir,fn) = splitFileName sfile
let (bname,ext) = splitExtensions fn
let dirs = splitDirectories dir
let tp = case ext of
".mapping" -> ISOMapping
".mapping2" -> JISMapping
info verb (tfile++" generated from mapping "++sfile)
preprocessMapping tp (sbase </> sfile) (tbase </> tfile) dirs bname
}
preprocessMapping :: MappingType -> FilePath -> FilePath -> [String] -> String -> IO ()
preprocessMapping tp src trg mods name = do
trans <- readTranslation 0 src
let mod = concat $ intersperse "." (mods++[name])
let wsize = case tp of
ISOMapping -> 1
JISMapping -> 2
let bsize = show (wsize*8) ++ (if wsize > 1 then "be" else "")
--let (larr,off,arr) = staticArray32 trans
let (sarr,earr,els) = fillTranslations trans
{-let (lmp,idx,val) = staticMap wsize trans-}
let arrname = "decoding_array_"++name
let mpname = "encoding_map_"++name
let bcheck exp = if sarr/=0 || earr/=255
then ("(if "++
concat (intersperse "||" $ (if sarr/=0 then ["w<"++show sarr] else [])++(if earr/=255 then ["w>"++show earr] else []))++
" then throwException $ IllegalCharacter $ fromIntegral w else "++exp++")"
) else exp
let mp = buildCharMap (mapMaybe (\(i,c) -> do
rc <- c
return $ SingleMapping
rc
(reverse $ unfoldr (\(w,n) -> if n == 0
then Nothing
else Just (fromIntegral w,(w `shiftR` 8,n-1))) (i,wsize))
) trans
)
{-let mp = case wsize of
1 -> buildStaticMap (mapMaybe (\(i,c) -> case c of
Nothing -> Nothing
Just rc -> Just (rc,fromIntegral i::Word8)) trans)
2 -> buildStaticMap (mapMaybe (\(i,c) -> case c of
Nothing -> Nothing
Just rc -> Just (rc,fromIntegral i::Word16)) trans)-}
writeFile trg $ unlines $
["{-# LANGUAGE MagicHash,DeriveDataTypeable #-}"
,"module "++mod++"("++name++"(..)) where"
,""
,"import Data.Encoding.Base"
,"import Data.Encoding.ByteSource"
,"import Data.Encoding.ByteSink"
,"import Data.Encoding.Exception"
,"import Data.CharMap"
,"import Data.Array.Static"
,"import Data.Map.Static"
,"import Control.Throws"
,"import Prelude hiding (lookup)"
,"import Data.Word"
,""
,"import Data.Typeable"
,""
,"data "++name++" = "++name
," deriving (Show,Eq,Typeable)"
,""
,arrname++" = "++buildStaticArray (sarr,earr) els
,""
,mpname++" :: CharMap"
,mpname++" = "++mp
,""
,"instance Encoding "++name++" where"
," decodeChar _ = do"
," w <- fetchWord"++bsize
," "++bcheck "return ()"
," case "++arrname++"!w of"
," Nothing -> throwException $ IllegalCharacter $ fromIntegral w"
," Just c -> return c"
," encodeChar _ c = mapEncode c "++mpname
," encodeable _ c = mapMember c "++mpname
]

View File

@ -0,0 +1,581 @@
module Data.Encoding.Preprocessor.XMLMapping where
import Data.Word
import Text.Read
import Text.Show
import Numeric
import Data.List (find)
import Data.Char
import Text.XML.HaXml.XmlContent
import Text.XML.HaXml.OneOfN
testFile :: FilePath -> IO CharacterMapping
testFile fp = fReadXml fp
{-Type decls-}
data CharacterMapping = CharacterMapping CharacterMapping_Attrs
(Maybe History)
(OneOf2 Validity Stateful_siso)
Assignments
deriving (Eq,Show)
data CharacterMapping_Attrs = CharacterMapping_Attrs
{ characterMappingId :: String
, characterMappingVersion :: String
, characterMappingDescription :: (Maybe String)
, characterMappingContact :: (Maybe String)
, characterMappingRegistrationAuthority :: (Maybe String)
, characterMappingRegistrationName :: (Maybe String)
, characterMappingCopyright :: (Maybe String)
, characterMappingBidiOrder :: (Defaultable CharacterMapping_bidiOrder)
, characterMappingCombiningOrder :: (Defaultable CharacterMapping_combiningOrder)
, characterMappingNormalization :: (Defaultable CharacterMapping_normalization)
} deriving (Eq,Show)
data CharacterMapping_bidiOrder = CharacterMapping_bidiOrder_logical
| CharacterMapping_bidiOrder_RTL |
CharacterMapping_bidiOrder_LTR
deriving (Eq,Show)
data CharacterMapping_combiningOrder = CharacterMapping_combiningOrder_before
| CharacterMapping_combiningOrder_after
deriving (Eq,Show)
data CharacterMapping_normalization = CharacterMapping_normalization_undetermined
| CharacterMapping_normalization_neither |
CharacterMapping_normalization_NFC |
CharacterMapping_normalization_NFD |
CharacterMapping_normalization_NFC_NFD
deriving (Eq,Show)
data Stateful_siso = Stateful_siso Validity Validity
deriving (Eq,Show)
newtype History = History (List1 Modified) deriving (Eq,Show)
data Modified = Modified Modified_Attrs String
deriving (Eq,Show)
data Modified_Attrs = Modified_Attrs
{ modifiedVersion :: String
, modifiedDate :: String
} deriving (Eq,Show)
newtype Validity = Validity (List1 State) deriving (Eq,Show)
data State = State
{ stateType :: String
, stateNext :: String
, stateS :: ByteSequence
, stateE :: (Maybe ByteSequence)
, stateMax :: (Maybe String)
} deriving (Eq,Show)
data Assignments = Assignments Assignments_Attrs [A] [Fub] [Fbu]
[Sub1] [Range]
deriving (Eq,Show)
data Assignments_Attrs = Assignments_Attrs
{ assignmentsSub :: (Defaultable String)
, assignmentsSub1 :: (Maybe String)
} deriving (Eq,Show)
data A = A
{ aB :: ByteSequence
, aU :: CodePoints
, aC :: (Maybe String)
, aV :: (Maybe String)
} deriving (Eq,Show)
data Fub = Fub
{ fubB :: ByteSequence
, fubU :: CodePoints
, fubC :: (Maybe String)
, fubRu :: (Maybe String)
, fubRc :: (Maybe String)
, fubV :: (Maybe String)
} deriving (Eq,Show)
data Fbu = Fbu
{ fbuB :: ByteSequence
, fbuU :: CodePoints
, fbuV :: (Maybe String)
} deriving (Eq,Show)
data Sub1 = Sub1
{ sub1U :: CodePoints
, sub1C :: (Maybe String)
, sub1V :: (Maybe String)
} deriving (Eq,Show)
data Range = Range
{ rangeBFirst :: ByteSequence
, rangeBLast :: ByteSequence
, rangeUFirst :: CodePoints
, rangeULast :: CodePoints
, rangeBMin :: ByteSequence
, rangeBMax :: ByteSequence
, rangeV :: (Maybe String)
} deriving (Eq,Show)
data Iso2022 = Iso2022 (Maybe Default2022)
(List1 (OneOf5 Escape Si So Ss2 Ss3))
deriving (Eq,Show)
data Default2022 = Default2022
{ default2022Name :: String
} deriving (Eq,Show)
data Escape = Escape
{ escapeSequence :: String
, escapeName :: String
} deriving (Eq,Show)
newtype Si = Si (List1 Designator) deriving (Eq,Show)
newtype So = So (List1 Designator) deriving (Eq,Show)
newtype Ss2 = Ss2 (List1 Designator) deriving (Eq,Show)
newtype Ss3 = Ss3 (List1 Designator) deriving (Eq,Show)
data Designator = Designator
{ designatorSequence :: String
, designatorName :: String
} deriving (Eq,Show)
newtype ByteSequence = BS [Word8] deriving Eq
newtype CodePoints = CP [Char] deriving Eq
{-Instance decls-}
instance HTypeable CharacterMapping where
toHType x = Defined "characterMapping" [] []
instance XmlContent CharacterMapping where
toContents (CharacterMapping as a b c) =
[CElem (Elem "characterMapping" (toAttrs as) (maybe [] toContents a
++ toContents b
++ toContents c)) ()]
parseContents = do
{ e@(Elem _ as _) <- element ["characterMapping"]
; interior e $ return (CharacterMapping (fromAttrs as))
`apply` optional parseContents `apply` parseContents `apply` parseContents
} `adjustErr` ("in <characterMapping>, "++)
instance XmlAttributes CharacterMapping_Attrs where
fromAttrs as =
CharacterMapping_Attrs
{ characterMappingId = definiteA fromAttrToStr "characterMapping" "id" as
, characterMappingVersion = definiteA fromAttrToStr "characterMapping" "version" as
, characterMappingDescription = possibleA fromAttrToStr "description" as
, characterMappingContact = possibleA fromAttrToStr "contact" as
, characterMappingRegistrationAuthority = possibleA fromAttrToStr "registrationAuthority" as
, characterMappingRegistrationName = possibleA fromAttrToStr "registrationName" as
, characterMappingCopyright = possibleA fromAttrToStr "copyright" as
, characterMappingBidiOrder = defaultA fromAttrToTyp CharacterMapping_bidiOrder_logical "bidiOrder" as
, characterMappingCombiningOrder = defaultA fromAttrToTyp CharacterMapping_combiningOrder_after "combiningOrder" as
, characterMappingNormalization = defaultA fromAttrToTyp CharacterMapping_normalization_undetermined "normalization" as
}
toAttrs v = catMaybes
[ toAttrFrStr "id" (characterMappingId v)
, toAttrFrStr "version" (characterMappingVersion v)
, maybeToAttr toAttrFrStr "description" (characterMappingDescription v)
, maybeToAttr toAttrFrStr "contact" (characterMappingContact v)
, maybeToAttr toAttrFrStr "registrationAuthority" (characterMappingRegistrationAuthority v)
, maybeToAttr toAttrFrStr "registrationName" (characterMappingRegistrationName v)
, maybeToAttr toAttrFrStr "copyright" (characterMappingCopyright v)
, defaultToAttr toAttrFrTyp "bidiOrder" (characterMappingBidiOrder v)
, defaultToAttr toAttrFrTyp "combiningOrder" (characterMappingCombiningOrder v)
, defaultToAttr toAttrFrTyp "normalization" (characterMappingNormalization v)
]
instance XmlAttrType CharacterMapping_bidiOrder where
fromAttrToTyp n (n',v)
| n==n' = translate (attr2str v)
| otherwise = Nothing
where translate "logical" = Just CharacterMapping_bidiOrder_logical
translate "RTL" = Just CharacterMapping_bidiOrder_RTL
translate "LTR" = Just CharacterMapping_bidiOrder_LTR
translate _ = Nothing
toAttrFrTyp n CharacterMapping_bidiOrder_logical = Just (n, str2attr "logical")
toAttrFrTyp n CharacterMapping_bidiOrder_RTL = Just (n, str2attr "RTL")
toAttrFrTyp n CharacterMapping_bidiOrder_LTR = Just (n, str2attr "LTR")
instance XmlAttrType CharacterMapping_combiningOrder where
fromAttrToTyp n (n',v)
| n==n' = translate (attr2str v)
| otherwise = Nothing
where translate "before" = Just CharacterMapping_combiningOrder_before
translate "after" = Just CharacterMapping_combiningOrder_after
translate _ = Nothing
toAttrFrTyp n CharacterMapping_combiningOrder_before = Just (n, str2attr "before")
toAttrFrTyp n CharacterMapping_combiningOrder_after = Just (n, str2attr "after")
instance XmlAttrType CharacterMapping_normalization where
fromAttrToTyp n (n',v)
| n==n' = translate (attr2str v)
| otherwise = Nothing
where translate "undetermined" = Just CharacterMapping_normalization_undetermined
translate "neither" = Just CharacterMapping_normalization_neither
translate "NFC" = Just CharacterMapping_normalization_NFC
translate "NFD" = Just CharacterMapping_normalization_NFD
translate "NFC_NFD" = Just CharacterMapping_normalization_NFC_NFD
translate _ = Nothing
toAttrFrTyp n CharacterMapping_normalization_undetermined = Just (n, str2attr "undetermined")
toAttrFrTyp n CharacterMapping_normalization_neither = Just (n, str2attr "neither")
toAttrFrTyp n CharacterMapping_normalization_NFC = Just (n, str2attr "NFC")
toAttrFrTyp n CharacterMapping_normalization_NFD = Just (n, str2attr "NFD")
toAttrFrTyp n CharacterMapping_normalization_NFC_NFD = Just (n, str2attr "NFC_NFD")
instance XmlAttrType ByteSequence where
fromAttrToTyp n (n',v)
| n==n' = parseByteSequence (attr2str v)
| otherwise = Nothing
toAttrFrTyp n bs = Just (n, str2attr $ show bs)
parseByteSequence :: String -> Maybe ByteSequence
parseByteSequence str = do
seq <- mapM (\w -> do
(res,_) <- find (null.snd) (readHex w)
return res
) (words str)
return $ BS seq
instance Show ByteSequence where
show (BS seq) = foldl (\f w -> f . (showChar ' ') . (showHex w)) id seq ""
instance XmlAttrType CodePoints where
fromAttrToTyp n (n',v)
| n==n' = parseCodePoints (attr2str v)
| otherwise = Nothing
toAttrFrTyp n bs = Just (n, str2attr $ show bs)
parseCodePoints :: String -> Maybe CodePoints
parseCodePoints str = do
seq <- mapM (\w -> do
(res,_) <- find (null.snd) (readHex w)
return (chr res)
) (words str)
return $ CP seq
instance Show CodePoints where
show (CP seq) = foldl (\f w -> f . (showChar ' ') . (showHex (ord w))) id seq ""
instance HTypeable Stateful_siso where
toHType x = Defined "stateful_siso" [] []
instance XmlContent Stateful_siso where
toContents (Stateful_siso a b) =
[CElem (Elem "stateful_siso" [] (toContents a ++ toContents b)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["stateful_siso"]
; interior e $ return (Stateful_siso) `apply` parseContents
`apply` parseContents
} `adjustErr` ("in <stateful_siso>, "++)
instance HTypeable History where
toHType x = Defined "history" [] []
instance XmlContent History where
toContents (History a) =
[CElem (Elem "history" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["history"]
; interior e $ return (History) `apply` parseContents
} `adjustErr` ("in <history>, "++)
instance HTypeable Modified where
toHType x = Defined "modified" [] []
instance XmlContent Modified where
toContents (Modified as a) =
[CElem (Elem "modified" (toAttrs as) (toText a)) ()]
parseContents = do
{ e@(Elem _ as _) <- element ["modified"]
; interior e $ return (Modified (fromAttrs as))
`apply` (text `onFail` return "")
} `adjustErr` ("in <modified>, "++)
instance XmlAttributes Modified_Attrs where
fromAttrs as =
Modified_Attrs
{ modifiedVersion = definiteA fromAttrToStr "modified" "version" as
, modifiedDate = definiteA fromAttrToStr "modified" "date" as
}
toAttrs v = catMaybes
[ toAttrFrStr "version" (modifiedVersion v)
, toAttrFrStr "date" (modifiedDate v)
]
instance HTypeable Validity where
toHType x = Defined "validity" [] []
instance XmlContent Validity where
toContents (Validity a) =
[CElem (Elem "validity" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["validity"]
; interior e $ return (Validity) `apply` parseContents
} `adjustErr` ("in <validity>, "++)
instance HTypeable State where
toHType x = Defined "state" [] []
instance XmlContent State where
toContents as =
[CElem (Elem "state" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["state"]
; return (fromAttrs as)
} `adjustErr` ("in <state>, "++)
instance XmlAttributes State where
fromAttrs as =
State
{ stateType = definiteA fromAttrToStr "state" "type" as
, stateNext = definiteA fromAttrToStr "state" "next" as
, stateS = definiteA fromAttrToTyp "state" "s" as
, stateE = possibleA fromAttrToTyp "e" as
, stateMax = possibleA fromAttrToStr "max" as
}
toAttrs v = catMaybes
[ toAttrFrStr "type" (stateType v)
, toAttrFrStr "next" (stateNext v)
, toAttrFrTyp "s" (stateS v)
, maybeToAttr toAttrFrTyp "e" (stateE v)
, maybeToAttr toAttrFrStr "max" (stateMax v)
]
instance HTypeable Assignments where
toHType x = Defined "assignments" [] []
instance XmlContent Assignments where
toContents (Assignments as a b c d e) =
[CElem (Elem "assignments" (toAttrs as) (concatMap toContents a ++
concatMap toContents b ++ concatMap toContents c ++
concatMap toContents d ++
concatMap toContents e)) ()]
parseContents = do
{ e@(Elem _ as _) <- element ["assignments"]
; interior e $ return (Assignments (fromAttrs as))
`apply` many parseContents `apply` many parseContents
`apply` many parseContents `apply` many parseContents
`apply` many parseContents
} `adjustErr` ("in <assignments>, "++)
instance XmlAttributes Assignments_Attrs where
fromAttrs as =
Assignments_Attrs
{ assignmentsSub = defaultA fromAttrToStr "1A" "sub" as
, assignmentsSub1 = possibleA fromAttrToStr "sub1" as
}
toAttrs v = catMaybes
[ defaultToAttr toAttrFrStr "sub" (assignmentsSub v)
, maybeToAttr toAttrFrStr "sub1" (assignmentsSub1 v)
]
instance HTypeable A where
toHType x = Defined "a" [] []
instance XmlContent A where
toContents as =
[CElem (Elem "a" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["a"]
; return (fromAttrs as)
} `adjustErr` ("in <a>, "++)
instance XmlAttributes A where
fromAttrs as =
A { aB = definiteA fromAttrToTyp "a" "b" as
, aU = definiteA fromAttrToTyp "a" "u" as
, aC = possibleA fromAttrToStr "c" as
, aV = possibleA fromAttrToStr "v" as
}
toAttrs v = catMaybes
[ toAttrFrTyp "b" (aB v)
, toAttrFrTyp "u" (aU v)
, maybeToAttr toAttrFrStr "c" (aC v)
, maybeToAttr toAttrFrStr "v" (aV v)
]
instance HTypeable Fub where
toHType x = Defined "fub" [] []
instance XmlContent Fub where
toContents as =
[CElem (Elem "fub" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["fub"]
; return (fromAttrs as)
} `adjustErr` ("in <fub>, "++)
instance XmlAttributes Fub where
fromAttrs as =
Fub
{ fubB = definiteA fromAttrToTyp "fub" "b" as
, fubU = definiteA fromAttrToTyp "fub" "u" as
, fubC = possibleA fromAttrToStr "c" as
, fubRu = possibleA fromAttrToStr "ru" as
, fubRc = possibleA fromAttrToStr "rc" as
, fubV = possibleA fromAttrToStr "v" as
}
toAttrs v = catMaybes
[ toAttrFrTyp "b" (fubB v)
, toAttrFrTyp "u" (fubU v)
, maybeToAttr toAttrFrStr "c" (fubC v)
, maybeToAttr toAttrFrStr "ru" (fubRu v)
, maybeToAttr toAttrFrStr "rc" (fubRc v)
, maybeToAttr toAttrFrStr "v" (fubV v)
]
instance HTypeable Fbu where
toHType x = Defined "fbu" [] []
instance XmlContent Fbu where
toContents as =
[CElem (Elem "fbu" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["fbu"]
; return (fromAttrs as)
} `adjustErr` ("in <fbu>, "++)
instance XmlAttributes Fbu where
fromAttrs as =
Fbu
{ fbuB = definiteA fromAttrToTyp "fbu" "b" as
, fbuU = definiteA fromAttrToTyp "fbu" "u" as
, fbuV = possibleA fromAttrToStr "v" as
}
toAttrs v = catMaybes
[ toAttrFrTyp "b" (fbuB v)
, toAttrFrTyp "u" (fbuU v)
, maybeToAttr toAttrFrStr "v" (fbuV v)
]
instance HTypeable Sub1 where
toHType x = Defined "sub1" [] []
instance XmlContent Sub1 where
toContents as =
[CElem (Elem "sub1" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["sub1"]
; return (fromAttrs as)
} `adjustErr` ("in <sub1>, "++)
instance XmlAttributes Sub1 where
fromAttrs as =
Sub1
{ sub1U = definiteA fromAttrToTyp "sub1" "u" as
, sub1C = possibleA fromAttrToStr "c" as
, sub1V = possibleA fromAttrToStr "v" as
}
toAttrs v = catMaybes
[ toAttrFrTyp "u" (sub1U v)
, maybeToAttr toAttrFrStr "c" (sub1C v)
, maybeToAttr toAttrFrStr "v" (sub1V v)
]
instance HTypeable Range where
toHType x = Defined "range" [] []
instance XmlContent Range where
toContents as =
[CElem (Elem "range" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["range"]
; return (fromAttrs as)
} `adjustErr` ("in <range>, "++)
instance XmlAttributes Range where
fromAttrs as =
Range
{ rangeBFirst = definiteA fromAttrToTyp "range" "bFirst" as
, rangeBLast = definiteA fromAttrToTyp "range" "bLast" as
, rangeUFirst = definiteA fromAttrToTyp "range" "uFirst" as
, rangeULast = definiteA fromAttrToTyp "range" "uLast" as
, rangeBMin = definiteA fromAttrToTyp "range" "bMin" as
, rangeBMax = definiteA fromAttrToTyp "range" "bMax" as
, rangeV = possibleA fromAttrToStr "v" as
}
toAttrs v = catMaybes
[ toAttrFrTyp "bFirst" (rangeBFirst v)
, toAttrFrTyp "bLast" (rangeBLast v)
, toAttrFrTyp "uFirst" (rangeUFirst v)
, toAttrFrTyp "uLast" (rangeULast v)
, toAttrFrTyp "bMin" (rangeBMin v)
, toAttrFrTyp "bMax" (rangeBMax v)
, maybeToAttr toAttrFrStr "v" (rangeV v)
]
instance HTypeable Iso2022 where
toHType x = Defined "iso2022" [] []
instance XmlContent Iso2022 where
toContents (Iso2022 a b) =
[CElem (Elem "iso2022" [] (maybe [] toContents a ++
toContents b)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["iso2022"]
; interior e $ return (Iso2022) `apply` optional parseContents
`apply` parseContents
} `adjustErr` ("in <iso2022>, "++)
instance HTypeable Default2022 where
toHType x = Defined "default2022" [] []
instance XmlContent Default2022 where
toContents as =
[CElem (Elem "default2022" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["default2022"]
; return (fromAttrs as)
} `adjustErr` ("in <default2022>, "++)
instance XmlAttributes Default2022 where
fromAttrs as =
Default2022
{ default2022Name = definiteA fromAttrToStr "default2022" "name" as
}
toAttrs v = catMaybes
[ toAttrFrStr "name" (default2022Name v)
]
instance HTypeable Escape where
toHType x = Defined "escape" [] []
instance XmlContent Escape where
toContents as =
[CElem (Elem "escape" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["escape"]
; return (fromAttrs as)
} `adjustErr` ("in <escape>, "++)
instance XmlAttributes Escape where
fromAttrs as =
Escape
{ escapeSequence = definiteA fromAttrToStr "escape" "sequence" as
, escapeName = definiteA fromAttrToStr "escape" "name" as
}
toAttrs v = catMaybes
[ toAttrFrStr "sequence" (escapeSequence v)
, toAttrFrStr "name" (escapeName v)
]
instance HTypeable Si where
toHType x = Defined "si" [] []
instance XmlContent Si where
toContents (Si a) =
[CElem (Elem "si" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["si"]
; interior e $ return (Si) `apply` parseContents
} `adjustErr` ("in <si>, "++)
instance HTypeable So where
toHType x = Defined "so" [] []
instance XmlContent So where
toContents (So a) =
[CElem (Elem "so" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["so"]
; interior e $ return (So) `apply` parseContents
} `adjustErr` ("in <so>, "++)
instance HTypeable Ss2 where
toHType x = Defined "ss2" [] []
instance XmlContent Ss2 where
toContents (Ss2 a) =
[CElem (Elem "ss2" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["ss2"]
; interior e $ return (Ss2) `apply` parseContents
} `adjustErr` ("in <ss2>, "++)
instance HTypeable Ss3 where
toHType x = Defined "ss3" [] []
instance XmlContent Ss3 where
toContents (Ss3 a) =
[CElem (Elem "ss3" [] (toContents a)) ()]
parseContents = do
{ e@(Elem _ [] _) <- element ["ss3"]
; interior e $ return (Ss3) `apply` parseContents
} `adjustErr` ("in <ss3>, "++)
instance HTypeable Designator where
toHType x = Defined "designator" [] []
instance XmlContent Designator where
toContents as =
[CElem (Elem "designator" (toAttrs as) []) ()]
parseContents = do
{ (Elem _ as []) <- element ["designator"]
; return (fromAttrs as)
} `adjustErr` ("in <designator>, "++)
instance XmlAttributes Designator where
fromAttrs as =
Designator
{ designatorSequence = definiteA fromAttrToStr "designator" "sequence" as
, designatorName = definiteA fromAttrToStr "designator" "name" as
}
toAttrs v = catMaybes
[ toAttrFrStr "sequence" (designatorSequence v)
, toAttrFrStr "name" (designatorName v)
]
{-Done-}

View File

@ -0,0 +1,315 @@
{-# LANGUAGE ParallelListComp #-}
module Data.Encoding.Preprocessor.XMLMappingBuilder where
import Data.Word
import Data.List
import Data.Ord
import Data.Char
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Array.Static.Builder
import Data.CharMap.Builder
import Data.Encoding.Preprocessor.XMLMapping
import Distribution.Simple.PreProcess
import System.FilePath
import Text.XML.HaXml.OneOfN
import Text.XML.HaXml.XmlContent
import Control.Exception (assert)
xmlPreprocessor :: PreProcessor
xmlPreprocessor = PreProcessor
{ platformIndependent = True
, runPreProcessor = \src trg verb -> do
createModuleFromFile src trg
}
description :: CharacterMapping -> Maybe String
description (CharacterMapping attrs _ _ _) = characterMappingDescription attrs
createModuleFromFile (sbase,sfile) (tbase,tfile) = do
xml <- testFile (sbase </> sfile)
let (dir,fn) = splitFileName sfile
let (bname,ext) = splitExtensions fn
let dirs = splitDirectories dir
let body = buildDecisionTree minBound maxBound "ch" (encodingElements xml)
let body2 = createDecoding (states xml) (decodingElements xml)
let mpname = "encoding_map_"++bname
let mp = buildCharMap $ [SingleMapping c w | (c,w) <- assignments xml]
++[ RangeMapping
st end
(foldl (\v (w,mi,ma) -> v*((fromIntegral $ ma-mi)+1) + (fromIntegral (w-mi))) 0 (zip3 bfirst bmin bmax))
[(min,max-min+1) | min <- bmin | max <- bmax]
| (st,end,bfirst,blast,bmin,bmax) <- ranges xml ]
writeFile (tbase</>tfile) $ unlines $
["{-# LANGUAGE MagicHash,DeriveDataTypeable #-}"]++
(case description xml of
Nothing -> []
Just str -> ["{- | "++str++" -}"]) ++
["module "++concat (intersperse "." (dirs++[bname]))
," ("++bname++"("++bname++"))"
," where"
,""
,"import Control.Throws"
,"import Data.Encoding.Base"
,"import Data.Encoding.ByteSink"
,"import Data.Encoding.ByteSource"
,"import Data.Encoding.Exception"
,"import Data.Array.Static"
,"import Data.Map.Static"
,"import Data.CharMap"
,"import Data.Char"
,"import Data.Word"
,"import Data.Typeable"
,""
,"data "++bname++" = "++bname
," deriving (Eq,Show,Typeable)"
,""
,mpname++" :: CharMap"
,mpname++" = "++mp
,""
,"instance Encoding "++bname++" where"
," encodeChar _ ch = mapEncode ch "++mpname
," decodeChar _ = "++body2
," encodeable _ ch = mapMember ch "++mpname
]
decodingValueRange :: [(Word8,Word8)] -> DecodingElement -> (Int,Int)
decodingValueRange path (DecodingElement c ws)
= let v = foldl (\n (w,(lo,up)) -> n*((fromIntegral $ up-lo)+1) + (fromIntegral $ w - lo)) 0 (zip ws path)
in (v,v)
decodingValueRange path (DecodingRange first last bfirst blast bmin bmax)
= assert (zip bmin bmax == path) $
(decodingValue path bfirst
,decodingValue path blast)
decodingValue :: [(Word8,Word8)] -> [Word8] -> Int
decodingValue path ws
= foldl (\n (w,(lo,up)) -> n*((fromIntegral $ up-lo) + 1) + (fromIntegral $ w - lo))
0 (zip ws path)
type StateMachine = Map String [(Word8,Word8,String)]
createDecoding :: StateMachine -> [DecodingElement] -> String
createDecoding sm els = create' els [] 0 "FIRST"
where
create' els path n st = let trans = sortBy (\(s1,e1,st1) (s2,e2,st2) -> compare s1 s2) $ sm Map.! st
in "(fetchWord8 >>= \\w"++show n++" -> " ++ tree' n path els trans 0 255++")"
tree' :: Int -> [(Word8,Word8)] -> [DecodingElement] -> [(Word8,Word8,String)] -> Word8 -> Word8 -> String
tree' n path els [] _ _ = illWord $ "w"++show n
tree' n path els [(s,e,nst)] bl br
= let e1 = if s > bl
then "(if w"++show n++" < "++show s++" then "++illWord ("w"++show n)++" else "++e2++")"
else e2
e2 = if e < br
then "(if w"++show n++" > "++show e++" then "++illWord ("w"++show n)++" else "++e3++")"
else e3
e3 = if nst == "VALID"
then array' rpath sels
else "{- for "++nst++"-}" ++ create' nels npath (n+1) nst
npath = (s,e):path
rpath = reverse npath
sels = sortBy (comparing (decodingValueRange rpath)) nels
nels = filter (\el -> let (ll,lr) = (decodingLimits el)!!n in ll>=s && lr <= e) els
in e1
tree' n path els trans bl br
= let (left,right@((b,_,_):_)) = splitAt (length trans `div` 2) trans
(eleft,eright) = partition (\el -> fst ((decodingLimits el)!!n) < b) els
in "(if w"++show n++" < "++show b++" then "++tree' n path eleft left bl (b-1)
++" else "++tree' n path eright right b br++")"
array' path els = let grps = groupBy (\e1 e2 -> case e1 of
DecodingRange _ _ _ _ _ _ -> False
_ -> case e2 of
DecodingRange _ _ _ _ _ _ -> False
_ -> True
) els
ranges = map (\(l,u) -> (fromIntegral $ u-l)+1) path
val = foldl (\expr (r,n,m) -> "("++expr++"*"++show r++"+(fromIntegral w"++show n++"-"++show m++"))")
"0"
(zip3 ranges [0..] (map fst path))
offset = (product ranges)-1
in "(let val = " ++ val ++ " in "++array'' path grps 0 offset++")"
array'' path [] _ _ = "throwException (IllegalRepresentation ["++concat (intersperse "," (zipWith (\n _ -> "w"++show n) [0..] path))++"])"
array'' path [grp] lo up
= case grp of
[DecodingRange first end bfirst bend bmin bmax] ->
let ranges = map (\(l,u) -> (fromIntegral $ u-l)+1) path
off = foldl (\v (r,c,m) -> v*r+(fromIntegral $ c-m)) 0 (zip3 ranges bfirst bmin)
equalranges = and $ zipWith (==) path (zip bmin bmax)
in if equalranges
then "(return (chr (val + ("++show (ord first - off)++"))))"
else error "Can't have a range that has a different range..."
_ -> let chars = fillRange lo $ map (\el@(DecodingElement c _) -> (c,fst $ decodingValueRange path el)) grp
in "(return (("++buildStaticArray (lo,up) chars++")!val))"
array'' path grps lo up = let (left,right@(brk:_)) = splitAt (length grps `div` 2) grps
(off,_) = decodingValueRange path (head brk)
in "(if val < "++show off++" then "++array'' path left lo (off-1)
++" else "++array'' path right off up++")"
fillRange :: Int -> [(Char,Int)] -> [Char]
fillRange s [] = []
fillRange s all@((c,i):cs) = case compare i s of
GT -> '\0':fillRange (s+1) all
LT -> error $ "Char out of range "++show (take 10 all)
EQ -> c:fillRange (s+1) cs
states :: CharacterMapping -> StateMachine
states (CharacterMapping attrs hist val ass)
= case val of
OneOf2 (Validity (NonEmpty lst)) -> Map.fromListWith (++) $
map (\st -> let BS [start] = stateS st
end = case stateE st of
Nothing -> start
Just (BS [rend]) -> rend
in (stateType st,[(start,end,stateNext st)])) lst
_ -> error "Mapping doesn't contain validity section"
decodingElements :: CharacterMapping -> [DecodingElement]
decodingElements mp = map (\(c,ws) -> DecodingElement c ws) (assignments mp)
++ map (\(fi,la,bfi,bla,bmi,bma) -> DecodingRange fi la bfi bla bmi bma) (ranges mp)
illWord :: String -> String
illWord n = "throwException (IllegalCharacter "++n++")"
decodingLimits :: DecodingElement -> [(Word8,Word8)]
decodingLimits (DecodingElement _ ws) = map (\w -> (w,w)) ws
decodingLimits (DecodingRange _ _ bfirst blast bmin bmax) = lim' False (zip4 bfirst blast bmin bmax)
where
lim' dec [] = []
lim' dec ((fi,la,mi,ma):xs) = if dec
then (mi,ma):(lim' dec xs)
else (fi,la):(lim' (fi/=la) xs)
decodingLength :: DecodingElement -> Int
decodingLength (DecodingRange _ _ first _ _ _) = length first
decodingLength (DecodingElement _ ws) = length ws
decodingElementCount :: DecodingElement -> Int
decodingElementCount (DecodingRange s e _ _ _ _) = ord e - ord s
decodingElementCount (DecodingElement _ _) = 1
data DecodingElement
= DecodingRange Char Char [Word8] [Word8] [Word8] [Word8]
| DecodingElement Char [Word8]
deriving Show
norep :: String -> String
norep var = "(throwException $ HasNoRepresentation "++var++")"
buildDecisionTree :: Char -> Char -> String -> [EncodingElement] -> String
buildDecisionTree l r var [] = norep var
buildDecisionTree l r var [el]
= let e1 = if l < startChar el
then "(if "++var++" < "++show (startChar el)++" then "++norep var++" else "++e2++")"
else e2
e2 = if r > endChar el
then "(if "++var++" > "++show (endChar el)++" then "++norep var++" else "++e3++")"
else e3
e3 = buildEncoding el var
in e1
buildDecisionTree ll lr var els
= let (l,r@(sep:_)) = splitAt (length els `div` 2) els
in "(if "++var++" < "++show (startChar sep)
++" then ("++(buildDecisionTree ll (pred $ startChar sep) var l)++")"
++" else ("++(buildDecisionTree (endChar sep) lr var r)++")"
++")"
buildEncoding :: EncodingElement -> String -> String
buildEncoding (EncodingRange start end bf bl bmin bmax) var
= let ranges :: [Int]
ranges = map fromIntegral $ zipWith (-) bmax bmin
in "(let num = (ord "++var++") - ("++show (ord start - (foldl (\n (r,vf,vm) -> n*(r+1) + (fromIntegral (vf-vm))) 0 (zip3 ranges bf bmin)))++")"
++concat ([ " ; (p"++show n++",r"++show n++") = "
++(if n==1 then "num" else "p"++show (n-1))
++" `divMod` "++show (r+1)
| r <- reverse ranges | n <- [1..] ])
++" in "
++concat (intersperse " >> " (reverse ["pushWord8 (fromIntegral (r"++show n++" + "++show w++"))" | n <- [1..] | w <- reverse bmin]))
++")"
buildEncoding (EncodingGroup start end encs) var
= let findParams st [] = st
findParams st (x:xs) = findParams (case compare (length x) (fst st) of
LT -> (fst st,False)
GT -> (length x,False)
EQ -> st) xs
(mx,same) = findParams (length $ head encs,True) (tail encs)
in if same
then ("(let off = "++show mx++"*(ord "++var++" - "++show (ord start)++") ; arr = "
++buildStaticArray (0,(length encs)*mx-1) (concat encs)
++" in "
++concat (intersperse " >> " ["pushWord8 (arr!(off+"++show (n-1)++"))" | n <- [1..mx]])
++")")
else ("(let off = "++show (mx+1)++"*((ord "++var++") - "++show (ord start)++") ; arr = "
++buildStaticArray (0,(length encs)*(mx+1)-1)
(concat [(fromIntegral $ length e)
:(e++replicate (mx-length e) 0) | e <- encs])
++ "::StaticArray Int Word8"
++" ; len = fromIntegral (arr!off)::Int ; bytes = map (\\n -> arr!(off+n)) [1..len]"
++" in mapM_ pushWord8 bytes)")
data EncodingElement
= EncodingRange Char Char [Word8] [Word8] [Word8] [Word8]
| EncodingGroup Char Char [[Word8]]
deriving Show
mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
mergeBy f [] ys = ys
mergeBy f xs [] = xs
mergeBy f (x:xs) (y:ys)
= case f x y of
LT -> x:mergeBy f xs (y:ys)
_ -> y:mergeBy f (x:xs) ys
startChar :: EncodingElement -> Char
startChar (EncodingRange c _ _ _ _ _) = c
startChar (EncodingGroup c _ _) = c
endChar :: EncodingElement -> Char
endChar (EncodingRange _ c _ _ _ _) = c
endChar (EncodingGroup _ c _) = c
encodingElements :: CharacterMapping -> [EncodingElement]
encodingElements mp = mergeBy (comparing startChar)
(buildGroups $ sortAssignments $ assignments mp)
(encodingRanges $ ranges mp)
assignments :: CharacterMapping -> [(Char,[Word8])]
assignments (CharacterMapping _ _ _ (Assignments _ ass _ _ _ ranges))
= map (\a -> let CP [cp] = aU a
BS bs = aB a
in (cp,bs)
) ass
encodingRanges :: [(Char,Char,[Word8],[Word8],[Word8],[Word8])] -> [EncodingElement]
encodingRanges lst = sortBy (comparing (\(EncodingRange c _ _ _ _ _) -> c)) $
map (\(ufirst,ulast,bfirst,blast,bmin,bmax) -> EncodingRange ufirst ulast bfirst blast bmin bmax) lst
ranges :: CharacterMapping -> [(Char,Char,[Word8],[Word8],[Word8],[Word8])]
ranges (CharacterMapping _ _ _ (Assignments _ ass _ _ _ ranges))
= map (\r -> let BS bfirst = rangeBFirst r
BS blast = rangeBLast r
CP [ufirst] = rangeUFirst r
CP [ulast] = rangeULast r
BS bmin = rangeBMin r
BS bmax = rangeBMax r
in (ufirst,ulast,bfirst,blast,bmin,bmax)
) ranges
sortAssignments :: [(Char,[Word8])] -> [(Char,[Word8])]
sortAssignments = sortBy (comparing fst)
buildGroups :: [(Char,[Word8])] -> [EncodingElement]
buildGroups [] = []
buildGroups ((c,bs):rest) = (EncodingGroup c end (bs:wrds)):buildGroups oth
where
(end,wrds,oth) = group c rest
group n [] = (n,[],[])
group n all@((c,bs):rest)
| succ n == c = let (e,res,oth) = group c rest
in (e,bs:res,oth)
| otherwise = (n,[],all)

View File

@ -1,5 +1,12 @@
module Main where
import Distribution.Simple (defaultMain)
import Distribution.Simple
import Data.Encoding.Preprocessor.Mapping
import Data.Encoding.Preprocessor.XMLMappingBuilder
main = defaultMain
main = defaultMainWithHooks (simpleUserHooks
{hookedPreProcessors = ("mapping",\_ _ -> mappingPreprocessor)
:("mapping2",\_ _ -> mappingPreprocessor)
:("xml",\_ _ -> xmlPreprocessor)
:(hookedPreProcessors simpleUserHooks)
})