More documentation for encodings

darcs-hash:20080115224955-a4fee-339ceea9439723fa09096abab221f2ad735c8905
2008-01-15 14:49:55 -08:00 · 2008-01-15 14:49:55 -08:00 · e849ef1404
commit e849ef1404
parent 38afec5701
15 changed files with 43 additions and 8 deletions
--- a/Data/Encoding/ASCII.hs
+++ b/Data/Encoding/ASCII.hs
@ -2,7 +2,7 @@
 -- | ASCII (American Standard Code for Information Interchange) is the
 --   \"normal\" computer encoding using the byte values 0-127 to represent
 --   characters. Refer to <http://en.wikipedia.org/wiki/ASCII> for
--   more informations.
+--   more information.
 module Data.Encoding.ASCII
 	(ASCII(..)) where

--- a/Data/Encoding/CP1250.hs
+++ b/Data/Encoding/CP1250.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE CPP,TemplateHaskell,DeriveDataTypeable #-}
+{- | This module implements Windows Codepage number 1250 which encodes languages that use latin script.
+     See <http://en.wikipedia.org/wiki/CP1250> for more information.
+ -}
 module Data.Encoding.CP1250
 	(CP1250(..)) where

--- a/Data/Encoding/CP1251.hs
+++ b/Data/Encoding/CP1251.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE CPP,TemplateHaskell,DeriveDataTypeable #-}
+{- | This module implements Windows Codepage number 1251 which encodes languages that use the cyrillic alphabet.
+     See <http://en.wikipedia.org/wiki/CP1251> for more information.
+ -}
 module Data.Encoding.CP1251
 	(CP1251(..)) where

--- a/Data/Encoding/CP1252.hs
+++ b/Data/Encoding/CP1252.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE CPP,TemplateHaskell,DeriveDataTypeable #-}
+{- | This module implements Windows Codepage number 1252 which is a superset of ISO 8859-1.
+     See <http://en.wikipedia.org/wiki/CP1252> for more information.
+ -}
 module Data.Encoding.CP1252
 	(CP1252(..)) where

--- a/Data/Encoding/CP1253.hs
+++ b/Data/Encoding/CP1253.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE CPP,TemplateHaskell,DeriveDataTypeable #-}
+{- | This module implements Windows Codepage number 1253 which encodes modern greek.
+     See <http://en.wikipedia.org/wiki/CP1253> for more information.
+ -}
 module Data.Encoding.CP1253
 	(CP1253(..)) where

--- a/Data/Encoding/CP1254.hs
+++ b/Data/Encoding/CP1254.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE CPP,TemplateHaskell,DeriveDataTypeable #-}
+{- | This module implements Windows Codepage number 1254 which encodes the turkish language.
+     See <http://en.wikipedia.org/wiki/CP1254> for more information.
+ -}
 module Data.Encoding.CP1254
 	(CP1254(..)) where

--- a/Data/Encoding/CP1255.hs
+++ b/Data/Encoding/CP1255.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE CPP,TemplateHaskell,DeriveDataTypeable #-}
+{- | This module implements Windows Codepage number 1255 which encodes the hebrew language.
+     See <http://en.wikipedia.org/wiki/CP1255> for more information.
+ -}
 module Data.Encoding.CP1255
 	(CP1255(..)) where

--- a/Data/Encoding/CP1256.hs
+++ b/Data/Encoding/CP1256.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE CPP,TemplateHaskell,DeriveDataTypeable #-}
+{- | This module implements Windows Codepage number 1256 which encodes languages which use the arabic script.
+     See <http://en.wikipedia.org/wiki/CP1256> for more information.
+ -}
 module Data.Encoding.CP1256
 	(CP1256(..)) where

--- a/Data/Encoding/CP1257.hs
+++ b/Data/Encoding/CP1257.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE CPP,TemplateHaskell,DeriveDataTypeable #-}
+{- | This module implements Windows Codepage number 1257 which encodes the estonian, latvian and lithuanian language.
+     See <http://en.wikipedia.org/wiki/CP1257> for more information.
+ -}
 module Data.Encoding.CP1257
 	(CP1257(..)) where

--- a/Data/Encoding/CP1258.hs
+++ b/Data/Encoding/CP1258.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE CPP,TemplateHaskell,DeriveDataTypeable #-}
+{- | This module implements Windows Codepage number 1258 which encodes the vietnamese language.
+     See <http://en.wikipedia.org/wiki/CP1258> for more information.
+ -}
 module Data.Encoding.CP1258
 	(CP1258(..)) where

--- a/Data/Encoding/ISO88591.hs
+++ b/Data/Encoding/ISO88591.hs
@ -1,6 +1,6 @@
 {-# LANGUAGE CPP,TemplateHaskell,DeriveDataTypeable #-}
 {- | Implements ISO\/IEC 8859-1 alias latin-1 encoding. See
-     <http://en.wikipedia.org/wiki/ISO/IEC_8859-1> for further informations.
+     <http://en.wikipedia.org/wiki/ISO/IEC_8859-1> for further information.
 -}
 module Data.Encoding.ISO88591
 	(ISO88591(..)
--- a/Data/Encoding/KOI8R.hs
+++ b/Data/Encoding/KOI8R.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE DeriveDataTypeable #-}
+{- | This module implements KOI8-R encoding which covers the russian and bulgarian alphabet.
+     See <http://en.wikipedia.org/wiki/KOI8-R> for more information.
+ -}
 module Data.Encoding.KOI8R
 	(KOI8R(..)) where

--- a/Data/Encoding/UTF16.hs
+++ b/Data/Encoding/UTF16.hs
@ -1,5 +1,6 @@
 {-# LANGUAGE DeriveDataTypeable #-}
-{- | This module implements UTF-16 encoding and decoding as in RFC 2781
+{- | This module implements UTF-16 encoding and decoding as in RFC 2781.
+     See <http://en.wikipedia.org/wiki/UTF-16> for more information.
 -}
 module Data.Encoding.UTF16
 	(UTF16(..)
@ -18,9 +19,9 @@ import Data.Dynamic (toDyn)
 import Data.Typeable

 data UTF16
-	= UTF16
-	| UTF16BE
-	| UTF16LE
+	= UTF16		-- ^ Decodes big and little endian, encodes big endian.
+	| UTF16BE	-- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian.
+	| UTF16LE	-- ^ Little endian decoding and encoding.
 	deriving (Eq,Show,Typeable)

 utf16enc :: Bool -> (EncodeState,String) -> Maybe (Word8,(EncodeState,String))
--- a/Data/Encoding/UTF32.hs
+++ b/Data/Encoding/UTF32.hs
@ -1,4 +1,7 @@
 {-# LANGUAGE DeriveDataTypeable #-}
+{- | This module implements UTF-32 encoding and decoding.
+     See <http://en.wikipedia.org/wiki/UTF-32> for more information.
+ -}
 module Data.Encoding.UTF32
 	(UTF32(..))
 	where
--- a/Data/Encoding/UTF8.hs
+++ b/Data/Encoding/UTF8.hs
@ -1,5 +1,6 @@
 {-# LANGUAGE DeriveDataTypeable #-}
 {- | This module implements UTF-8 encoding and decoding as in RFC 3629.
+     See <http://en.wikipedia.org/wiki/UTF-8> for more information.
 -}
 module Data.Encoding.UTF8
 	(UTF8(..)) where
@ -14,8 +15,8 @@ import Control.Exception
 import Data.Typeable

 data UTF8
-	= UTF8
-	| UTF8Strict
+	= UTF8		-- ^ Very forgiving decoding mechanism, accepts everything that it can make any sense of.
+	| UTF8Strict	-- ^ More strict decoding, doesn\'t accept sequences that have a too long representation and checks bits that aren\'t used in the decoding.
 	deriving (Eq,Show,Typeable)

 encodeUTF8 :: Char -> (Word8,EncodeState)