Allow for documentation on mapping files
Ignore-this: 5bcfdd21c16464e29fb7498dd6ec8915 Use the first commented lines in each mapping file as the docstring for the whole module. darcs-hash:20090829125423-a4fee-c99efd2cccdaa24d3b283aca0d43cd7ff6970c7f
This commit is contained in:
parent
56867f5768
commit
b12649a071
@ -1,21 +1,5 @@
|
|||||||
#
|
#Implements the Windows-1250 encoding.
|
||||||
# Name: cp1250 to Unicode table
|
#For details, refer to <http://en.wikipedia.org/wiki/Windows-1250>.
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.01
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/15/98
|
|
||||||
#
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp1250 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp1250 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,21 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp1251 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.01
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/15/98
|
|
||||||
#
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp1251 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp1251 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,21 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp1252 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.01
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/15/98
|
|
||||||
#
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp1252 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp1252 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,21 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp1253 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.01
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/15/98
|
|
||||||
#
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp1253 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp1253 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,21 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp1254 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.01
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/15/98
|
|
||||||
#
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp1254 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp1254 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,21 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp1255 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.01
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 1/7/2000
|
|
||||||
#
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp1255 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp1255 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,21 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp1256 to Unicode table
|
|
||||||
# Unicode version: 2.1
|
|
||||||
# Table version: 2.01
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 01/5/99
|
|
||||||
#
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp1256 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp1256 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,21 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp1257 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.01
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/15/98
|
|
||||||
#
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp1257 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp1257 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,21 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp1258 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.01
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/15/98
|
|
||||||
#
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp1258 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp1258 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp437_DOSLatinUS to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp437_DOSLatinUS code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp437_DOSLatinUS order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp737_DOSGreek to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp737_DOSGreek code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp737_DOSGreek order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp775_DOSBaltRim to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp775_DOSBaltRim code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp775_DOSBaltRim order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp850_DOSLatin1 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp850_DOSLatin1 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp850_DOSLatin1 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp852_DOSLatin2 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp852_DOSLatin2 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp852_DOSLatin2 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp855_DOSCyrillic to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp855_DOSCyrillic code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp855_DOSCyrillic order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp857_DOSTurkish to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp857_DOSTurkish code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp857_DOSTurkish order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp860_DOSPortuguese to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp860_DOSPortuguese code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp860_DOSPortuguese order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp861_DOSIcelandic to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp861_DOSIcelandic code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp861_DOSIcelandic order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp862_DOSHebrew to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp862_DOSHebrew code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp862_DOSHebrew order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp863_DOSCanadaF to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp863_DOSCanadaF code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp863_DOSCanadaF order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp864_DOSArabic to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp864_DOSArabic code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp864_DOSArabic order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp865_DOSNordic to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp865_DOSNordic code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp865_DOSNordic order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp866_DOSCyrillicRussian to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp866_DOSCyrillicRussian code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp866_DOSCyrillicRussian order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,20 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp869_DOSGreek2 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/24/96
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp869_DOSGreek2 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp869_DOSGreek2 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,21 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: cp874 to Unicode table
|
|
||||||
# Unicode version: 2.0
|
|
||||||
# Table version: 2.00
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 04/15/98
|
|
||||||
#
|
|
||||||
# Contact: Shawn.Steele@microsoft.com
|
|
||||||
#
|
|
||||||
# General notes: none
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the cp874 code (in hex)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 is the Unicode name (follows a comment sign, '#')
|
|
||||||
#
|
|
||||||
# The entries are in cp874 order
|
|
||||||
#
|
|
||||||
0x00 0x0000 #NULL
|
0x00 0x0000 #NULL
|
||||||
0x01 0x0001 #START OF HEADING
|
0x01 0x0001 #START OF HEADING
|
||||||
0x02 0x0002 #START OF TEXT
|
0x02 0x0002 #START OF TEXT
|
||||||
|
|||||||
@ -1,53 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: JIS X 0201 (1976) to Unicode 1.1 Table
|
|
||||||
# Unicode version: 1.1
|
|
||||||
# Table version: 0.9
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 8 March 1994
|
|
||||||
#
|
|
||||||
# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved.
|
|
||||||
#
|
|
||||||
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
|
|
||||||
# No claims are made as to fitness for any particular purpose. No
|
|
||||||
# warranties of any kind are expressed or implied. The recipient
|
|
||||||
# agrees to determine applicability of information provided. If this
|
|
||||||
# file has been provided on magnetic media by Unicode, Inc., the sole
|
|
||||||
# remedy for any claim will be exchange of defective media within 90
|
|
||||||
# days of receipt.
|
|
||||||
#
|
|
||||||
# Recipient is granted the right to make copies in any form for
|
|
||||||
# internal distribution and to freely use the information supplied
|
|
||||||
# in the creation of products supporting Unicode. Unicode, Inc.
|
|
||||||
# specifically excludes the right to re-distribute this file directly
|
|
||||||
# to third parties or other organizations whether for profit or not.
|
|
||||||
#
|
|
||||||
# General notes:
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# This table contains one set of mappings from JIS X 0201 into Unicode.
|
|
||||||
# Note that these data are *possible* mappings only and may not be the
|
|
||||||
# same as those used by actual products, nor may they be the best suited
|
|
||||||
# for all uses. For more information on the mappings between various code
|
|
||||||
# pages incorporating the repertoire of JIS X 0201 and Unicode, consult the
|
|
||||||
# VENDORS mapping data. Normative information on the mapping between
|
|
||||||
# JIS X 0201 and Unicode may be found in the Unihan.txt file in the
|
|
||||||
# latest Unicode Character Database.
|
|
||||||
#
|
|
||||||
# If you have carefully considered the fact that the mappings in
|
|
||||||
# this table are only one possible set of mappings between JIS X 0201 and
|
|
||||||
# Unicode and have no normative status, but still feel that you
|
|
||||||
# have located an error in the table that requires fixing, you may
|
|
||||||
# report any such error to errata@unicode.org.
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the shift JIS code (in hex as 0xXX)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 the Unicode (ISO 10646) name (follows a comment sign)
|
|
||||||
#
|
|
||||||
# The entries are in JIS order
|
|
||||||
#
|
|
||||||
#
|
|
||||||
0x20 0x0020 # SPACE
|
0x20 0x0020 # SPACE
|
||||||
0x21 0x0021 # EXCLAMATION MARK
|
0x21 0x0021 # EXCLAMATION MARK
|
||||||
0x22 0x0022 # QUOTATION MARK
|
0x22 0x0022 # QUOTATION MARK
|
||||||
|
|||||||
@ -1,75 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: JIS X 0208 (1990) to Unicode
|
|
||||||
# Unicode version: 1.1
|
|
||||||
# Table version: 0.9
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 8 March 1994
|
|
||||||
#
|
|
||||||
# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved.
|
|
||||||
#
|
|
||||||
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
|
|
||||||
# No claims are made as to fitness for any particular purpose. No
|
|
||||||
# warranties of any kind are expressed or implied. The recipient
|
|
||||||
# agrees to determine applicability of information provided. If this
|
|
||||||
# file has been provided on magnetic media by Unicode, Inc., the sole
|
|
||||||
# remedy for any claim will be exchange of defective media within 90
|
|
||||||
# days of receipt.
|
|
||||||
#
|
|
||||||
# Recipient is granted the right to make copies in any form for
|
|
||||||
# internal distribution and to freely use the information supplied
|
|
||||||
# in the creation of products supporting Unicode. Unicode, Inc.
|
|
||||||
# specifically excludes the right to re-distribute this file directly
|
|
||||||
# to third parties or other organizations whether for profit or not.
|
|
||||||
#
|
|
||||||
# General notes:
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# This table contains one set of mappings from JIS X 0208 (1990) into Unicode.
|
|
||||||
# Note that these data are *possible* mappings only and may not be the
|
|
||||||
# same as those used by actual products, nor may they be the best suited
|
|
||||||
# for all uses. For more information on the mappings between various code
|
|
||||||
# pages incorporating the repertoire of JIS X 0208 (1990) and Unicode, consult the
|
|
||||||
# VENDORS mapping data. Normative information on the mapping between
|
|
||||||
# JIS X 0208 (1990) and Unicode may be found in the Unihan.txt file in the
|
|
||||||
# latest Unicode Character Database.
|
|
||||||
#
|
|
||||||
# If you have carefully considered the fact that the mappings in
|
|
||||||
# this table are only one possible set of mappings between JIS X 0208 (1990)
|
|
||||||
# and Unicode and have no normative status, but still feel that you
|
|
||||||
# have located an error in the table that requires fixing, you may
|
|
||||||
# report any such error to errata@unicode.org.
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Format: Four tab-separated columns
|
|
||||||
# Column #1 is the JIS X 0208 code (in hex as 0xXXXX)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 the Unicode name (follows a comment sign, '#')
|
|
||||||
# The official names for Unicode characters U+4E00
|
|
||||||
# to U+9FA5, inclusive, is "CJK UNIFIED IDEOGRAPH-XXXX",
|
|
||||||
# where XXXX is the code point. Including all these
|
|
||||||
# names in this file increases its size substantially
|
|
||||||
# and needlessly. The token "<CJK>" is used for the
|
|
||||||
# name of these characters. If necessary, it can be
|
|
||||||
# expanded algorithmically by a parser or editor.
|
|
||||||
#
|
|
||||||
# The entries are in JIS X 0208 order
|
|
||||||
#
|
|
||||||
# The following algorithms can be used to change the hex form
|
|
||||||
# of JIS 0208 to other standard forms:
|
|
||||||
#
|
|
||||||
# To change hex to EUC form, add 0x8080
|
|
||||||
# To change hex to kuten form, first subtract 0x2020. Then
|
|
||||||
# the high and low bytes correspond to the ku and ten of
|
|
||||||
# the kuten form. For example, 0x2121 -> 0x0101 -> 0101;
|
|
||||||
# 0x7426 -> 0x5406 -> 8406
|
|
||||||
#
|
|
||||||
# The kanji mappings are a normative part of ISO/IEC 10646. The
|
|
||||||
# non-kanji mappings are provisional, pending definition of
|
|
||||||
# official mappings by Japanese standards bodies
|
|
||||||
#
|
|
||||||
# Any comments or problems, contact <John_Jenkins@taligent.com>
|
|
||||||
#
|
|
||||||
#
|
|
||||||
0x2121 0x3000 # IDEOGRAPHIC SPACE
|
0x2121 0x3000 # IDEOGRAPHIC SPACE
|
||||||
0x2122 0x3001 # IDEOGRAPHIC COMMA
|
0x2122 0x3001 # IDEOGRAPHIC COMMA
|
||||||
0x2123 0x3002 # IDEOGRAPHIC FULL STOP
|
0x2123 0x3002 # IDEOGRAPHIC FULL STOP
|
||||||
|
|||||||
@ -1,89 +1,3 @@
|
|||||||
#
|
|
||||||
# Name: JIS X 0212 (1990) to Unicode
|
|
||||||
# Unicode version: 1.1
|
|
||||||
# Table version: 0.9
|
|
||||||
# Table format: Format A
|
|
||||||
# Date: 8 March 1994
|
|
||||||
#
|
|
||||||
# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved.
|
|
||||||
#
|
|
||||||
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
|
|
||||||
# No claims are made as to fitness for any particular purpose. No
|
|
||||||
# warranties of any kind are expressed or implied. The recipient
|
|
||||||
# agrees to determine applicability of information provided. If this
|
|
||||||
# file has been provided on magnetic media by Unicode, Inc., the sole
|
|
||||||
# remedy for any claim will be exchange of defective media within 90
|
|
||||||
# days of receipt.
|
|
||||||
#
|
|
||||||
# Recipient is granted the right to make copies in any form for
|
|
||||||
# internal distribution and to freely use the information supplied
|
|
||||||
# in the creation of products supporting Unicode. Unicode, Inc.
|
|
||||||
# specifically excludes the right to re-distribute this file directly
|
|
||||||
# to third parties or other organizations whether for profit or not.
|
|
||||||
#
|
|
||||||
# General notes:
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# This table contains one set of mappings from JIS X 0212 into Unicode.
|
|
||||||
# Note that these data are *possible* mappings only and may not be the
|
|
||||||
# same as those used by actual products, nor may they be the best suited
|
|
||||||
# for all uses. For more information on the mappings between various code
|
|
||||||
# pages incorporating the repertoire of JIS X 0212 and Unicode, consult the
|
|
||||||
# VENDORS mapping data. Normative information on the mapping between
|
|
||||||
# JIS X 0212 and Unicode may be found in the Unihan.txt file in the
|
|
||||||
# latest Unicode Character Database.
|
|
||||||
#
|
|
||||||
# If you have carefully considered the fact that the mappings in
|
|
||||||
# this table are only one possible set of mappings between JIS X 0212 and
|
|
||||||
# Unicode and have no normative status, but still feel that you
|
|
||||||
# have located an error in the table that requires fixing, you may
|
|
||||||
# report any such error to errata@unicode.org.
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Format: Three tab-separated columns
|
|
||||||
# Column #1 is the JIS X 0212 code (in hex as 0xXXXX)
|
|
||||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
|
||||||
# Column #3 the Unicode name (follows a comment sign, '#')
|
|
||||||
# The official names for Unicode characters U+4E00
|
|
||||||
# to U+9FA5, inclusive, is "CJK UNIFIED IDEOGRAPH-XXXX",
|
|
||||||
# where XXXX is the code point. Including all these
|
|
||||||
# names in this file increases its size substantially
|
|
||||||
# and needlessly. The token "<CJK>" is used for the
|
|
||||||
# name of these characters. If necessary, it can be
|
|
||||||
# expanded algorithmically by a parser or editor.
|
|
||||||
#
|
|
||||||
# The entries are in JIS X 0212 order
|
|
||||||
#
|
|
||||||
# The following algorithms can be used to change the hex form
|
|
||||||
# of JIS 0212 to other standard forms:
|
|
||||||
#
|
|
||||||
# To change hex to EUC form, add 0x8080
|
|
||||||
# To change hex to kuten form, first subtract 0x2020. Then
|
|
||||||
# the high and low bytes correspond to the ku and ten of
|
|
||||||
# the kuten form. For example, 0x2121 -> 0x0101 -> 0101;
|
|
||||||
# 0x6D63 -> 0x4D43 -> 7767
|
|
||||||
#
|
|
||||||
# The kanji mappings are a normative part of ISO/IEC 10646. The
|
|
||||||
# non-kanji mappings are provisional, pending definition of
|
|
||||||
# official mappings by Japanese standards bodies
|
|
||||||
#
|
|
||||||
# Any comments or problems, contact <John_Jenkins@taligent.com>
|
|
||||||
#
|
|
||||||
# Notes:
|
|
||||||
#
|
|
||||||
# 1. JIS X 0212 apparently unified the following two symbols
|
|
||||||
# into a single character at 0x2922:
|
|
||||||
#
|
|
||||||
# LATIN CAPITAL LETTER D WITH STROKE
|
|
||||||
# LATIN CAPITAL LETTER ETH
|
|
||||||
#
|
|
||||||
# However, JIS X 0212 maintains the distinction between
|
|
||||||
# the lowercase forms of these two elements at 0x2942 and 0x2943.
|
|
||||||
# Given the structre of these JIS encodings, it is clear that
|
|
||||||
# 0x2922 and 0x2942 are intended to be a capital/small pair.
|
|
||||||
# Consequently, in the Unicode mapping, 0x2922 is treated as
|
|
||||||
# LATIN CAPITAL LETTER D WITH STROKE.
|
|
||||||
#
|
|
||||||
0x222F 0x02D8 # BREVE
|
0x222F 0x02D8 # BREVE
|
||||||
0x2230 0x02C7 # CARON (Mandarin Chinese third tone)
|
0x2230 0x02C7 # CARON (Mandarin Chinese third tone)
|
||||||
0x2231 0x00B8 # CEDILLA
|
0x2231 0x00B8 # CEDILLA
|
||||||
|
|||||||
@ -1,83 +1,4 @@
|
|||||||
#=======================================================================
|
# /Notes on Mac OS Roman:/
|
||||||
# File name: ROMAN.TXT
|
|
||||||
#
|
|
||||||
# Contents: Map (external version) from Mac OS Roman
|
|
||||||
# character set to Unicode 2.1 and later.
|
|
||||||
#
|
|
||||||
# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights
|
|
||||||
# reserved.
|
|
||||||
#
|
|
||||||
# Contact: charsets@apple.com
|
|
||||||
#
|
|
||||||
# Changes:
|
|
||||||
#
|
|
||||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
|
||||||
# <c1.1> and Text Encoding Converter 2.0.
|
|
||||||
# b4,c1 2002-Dec-19 Update URLs, notes. Matches internal
|
|
||||||
# utom<b5>.
|
|
||||||
# b03 1999-Sep-22 Update contact e-mail address. Matches
|
|
||||||
# internal utom<b4>, ufrm<b3>, and Text
|
|
||||||
# Encoding Converter version 1.5.
|
|
||||||
# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change
|
|
||||||
# mapping of 0xDB from CURRENCY SIGN to
|
|
||||||
# EURO SIGN. Matches internal utom<b3>,
|
|
||||||
# ufrm<b3>.
|
|
||||||
# n08 1998-Feb-05 Minor update to header comments
|
|
||||||
# n06 1997-Dec-14 Add warning about future changes to 0xDB
|
|
||||||
# from CURRENCY SIGN to EURO SIGN. Clarify
|
|
||||||
# some header information
|
|
||||||
# n04 1997-Dec-01 Update to match internal utom<n3>, ufrm<n22>:
|
|
||||||
# Change standard mapping for 0xBD from U+2126
|
|
||||||
# to its canonical decomposition, U+03A9.
|
|
||||||
# n03 1995-Apr-15 First version (after fixing some typos).
|
|
||||||
# Matches internal ufrm<n9>.
|
|
||||||
#
|
|
||||||
# Standard header:
|
|
||||||
# ----------------
|
|
||||||
#
|
|
||||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
|
||||||
# Computer, Inc., registered in the United States and other countries.
|
|
||||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
|
||||||
# throughout this document, "Macintosh" can be used to refer to
|
|
||||||
# Macintosh computers and "Unicode" can be used to refer to the
|
|
||||||
# Unicode standard.
|
|
||||||
#
|
|
||||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
|
||||||
# either express or implied, with respect to this document and the
|
|
||||||
# included data, its quality, accuracy, or fitness for a particular
|
|
||||||
# purpose. In no event will Apple be liable for direct, indirect,
|
|
||||||
# special, incidental, or consequential damages resulting from any
|
|
||||||
# defect or inaccuracy in this document or the included data.
|
|
||||||
#
|
|
||||||
# These mapping tables and character lists are subject to change.
|
|
||||||
# The latest tables should be available from the following:
|
|
||||||
#
|
|
||||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
|
||||||
#
|
|
||||||
# For general information about Mac OS encodings and these mapping
|
|
||||||
# tables, see the file "README.TXT".
|
|
||||||
#
|
|
||||||
# Format:
|
|
||||||
# -------
|
|
||||||
#
|
|
||||||
# Three tab-separated columns;
|
|
||||||
# '#' begins a comment which continues to the end of the line.
|
|
||||||
# Column #1 is the Mac OS Roman code (in hex as 0xNN)
|
|
||||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
|
||||||
# Column #3 is a comment containing the Unicode name
|
|
||||||
#
|
|
||||||
# The entries are in Mac OS Roman code order.
|
|
||||||
#
|
|
||||||
# One of these mappings requires the use of a corporate character.
|
|
||||||
# See the file "CORPCHAR.TXT" and notes below.
|
|
||||||
#
|
|
||||||
# Control character mappings are not shown in this table, following
|
|
||||||
# the conventions of the standard UTC mapping tables. However, the
|
|
||||||
# Mac OS Roman character set uses the standard control characters at
|
|
||||||
# 0x00-0x1F and 0x7F.
|
|
||||||
#
|
|
||||||
# Notes on Mac OS Roman:
|
|
||||||
# ----------------------
|
|
||||||
#
|
#
|
||||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||||
# environments, it is only supported directly in programming
|
# environments, it is only supported directly in programming
|
||||||
@ -117,8 +38,7 @@
|
|||||||
# interpreted as associated with these glyphs; they are usually
|
# interpreted as associated with these glyphs; they are usually
|
||||||
# interpreted (if at all) as the control codes DC1-DC4.
|
# interpreted (if at all) as the control codes DC1-DC4.
|
||||||
#
|
#
|
||||||
# Unicode mapping issues and notes:
|
# /Unicode mapping issues and notes:/
|
||||||
# ---------------------------------
|
|
||||||
#
|
#
|
||||||
# The following corporate zone Unicode character is used in this
|
# The following corporate zone Unicode character is used in this
|
||||||
# mapping:
|
# mapping:
|
||||||
@ -129,20 +49,17 @@
|
|||||||
# is not authorized for use without permission of Apple, and
|
# is not authorized for use without permission of Apple, and
|
||||||
# unauthorized use might constitute trademark infringement.
|
# unauthorized use might constitute trademark infringement.
|
||||||
#
|
#
|
||||||
# Details of mapping changes in each version:
|
# /Details of mapping changes in each version:/
|
||||||
# -------------------------------------------
|
|
||||||
#
|
#
|
||||||
# Changes from version n08 to version b02:
|
# Changes from version n08 to version b02:
|
||||||
#
|
#
|
||||||
# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from
|
# * Encoding changed for Mac OS 8.5; change mapping of 0xDB from
|
||||||
# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC).
|
# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC).
|
||||||
#
|
#
|
||||||
# Changes from version n03 to version n04:
|
# Changes from version n03 to version n04:
|
||||||
#
|
#
|
||||||
# - Change mapping of 0xBD from U+2126 to its canonical
|
# * Change mapping of 0xBD from U+2126 to its canonical
|
||||||
# decomposition, U+03A9.
|
# decomposition, U+03A9.
|
||||||
#
|
|
||||||
##################
|
|
||||||
|
|
||||||
0x20 0x0020 # SPACE
|
0x20 0x0020 # SPACE
|
||||||
0x21 0x0021 # EXCLAMATION MARK
|
0x21 0x0021 # EXCLAMATION MARK
|
||||||
|
|||||||
@ -18,16 +18,22 @@ data MappingType
|
|||||||
| JISMapping
|
| JISMapping
|
||||||
deriving (Eq,Ord,Show,Read)
|
deriving (Eq,Ord,Show,Read)
|
||||||
|
|
||||||
readTranslation :: Int -> FilePath -> IO [(Integer,Maybe Char)]
|
readTranslation :: Int -> FilePath -> IO ([(Integer,Maybe Char)],[String])
|
||||||
readTranslation offset file = do
|
readTranslation offset file = do
|
||||||
cont <- readFile file
|
cont <- fmap parseTranslationTable $ readFile file
|
||||||
return $ mapMaybe (\ln -> case drop offset ln of
|
let docstr = mapMaybe snd (takeWhile (null.fst) cont)
|
||||||
[src] -> Just (src,Nothing)
|
let trans = mapMaybe (\(ln,comm) -> case drop offset ln of
|
||||||
[src,trg] -> Just (src,Just $ chr $ fromIntegral trg)
|
[src] -> Just (src,Nothing)
|
||||||
_ -> Nothing) (parseTranslationTable cont)
|
[src,trg] -> Just (src,Just $ chr $ fromIntegral trg)
|
||||||
|
_ -> Nothing) cont
|
||||||
|
return (trans,docstr)
|
||||||
|
|
||||||
parseTranslationTable :: String -> [[Integer]]
|
parseTranslationTable :: String -> [([Integer],Maybe String)]
|
||||||
parseTranslationTable cont = filter (not.null) (map (\ln -> map read (takeWhile ((/='#').head) (words ln))) (lines cont))
|
parseTranslationTable cont = map (\ln -> let (trans,comm) = break (=='#') ln
|
||||||
|
in (map read (words trans),case comm of
|
||||||
|
"" -> Nothing
|
||||||
|
_ -> Just (tail comm))
|
||||||
|
) (lines cont)
|
||||||
|
|
||||||
{-fillTranslations :: (Ix a,Show a) => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)]
|
{-fillTranslations :: (Ix a,Show a) => a -> a -> [(a,Maybe Char)] -> [(a,Maybe Char)]
|
||||||
fillTranslations f t = merge (range (f,t))
|
fillTranslations f t = merge (range (f,t))
|
||||||
@ -70,7 +76,7 @@ mappingPreprocessor = PreProcessor
|
|||||||
|
|
||||||
preprocessMapping :: MappingType -> FilePath -> FilePath -> [String] -> String -> IO ()
|
preprocessMapping :: MappingType -> FilePath -> FilePath -> [String] -> String -> IO ()
|
||||||
preprocessMapping tp src trg mods name = do
|
preprocessMapping tp src trg mods name = do
|
||||||
trans <- readTranslation 0 src
|
(trans,doc) <- readTranslation 0 src
|
||||||
let mod = concat $ intersperse "." (mods++[name])
|
let mod = concat $ intersperse "." (mods++[name])
|
||||||
let wsize = case tp of
|
let wsize = case tp of
|
||||||
ISOMapping -> 1
|
ISOMapping -> 1
|
||||||
@ -106,7 +112,11 @@ preprocessMapping tp src trg mods name = do
|
|||||||
writeFile trg $ unlines $
|
writeFile trg $ unlines $
|
||||||
["{- This file has been auto-generated. Do not edit it. -}"
|
["{- This file has been auto-generated. Do not edit it. -}"
|
||||||
,"{-# LANGUAGE MagicHash,DeriveDataTypeable #-}"
|
,"{-# LANGUAGE MagicHash,DeriveDataTypeable #-}"
|
||||||
,"module "++mod++"("++name++"(..)) where"
|
]++(case doc of
|
||||||
|
[] -> []
|
||||||
|
_ -> ("{- | "++head doc):(map (\ln -> " "++ln) (tail doc)) ++ [" -}"])
|
||||||
|
++
|
||||||
|
["module "++mod++"("++name++"(..)) where"
|
||||||
,""
|
,""
|
||||||
,"import Data.Encoding.Base"
|
,"import Data.Encoding.Base"
|
||||||
,"import Data.Encoding.ByteSource"
|
,"import Data.Encoding.ByteSource"
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user