src/third_party/icu/source/data/mappings/convrtrs.txt - cobalt - Git at Google

 # ******************************************************************************
 # *
 # *   Copyright (C) 1995-2014, International Business Machines
 # *   Corporation and others.  All Rights Reserved.
 # *
 # ******************************************************************************

 # If this converter alias table looks very confusing, a much easier to
 # understand view can be found at this demo:
 # http://demo.icu-project.org/icu-bin/convexp

 # IMPORTANT NOTE
 #
 # This file is not read directly by ICU. If you change it, you need to
 # run gencnval, and eventually run pkgdata to update the representation that
 # ICU uses for aliases. The gencnval tool will normally compile this file into
 # cnvalias.icu. The gencnval -v verbose option will help you when you edit
 # this file.

 # Please be friendly to the rest of us that edit this table by
 # keeping this table free of tabs.

 # This is an alias file used by the character set converter.
 # A lot of converter information can be found in unicode/ucnv.h, but here
 # is more information about this file.
 #
 # If you are adding a new converter to this list and want to include it in the
 # icu data library, please be sure to add an entry to the appropriate ucm*.mk file
 # (see ucmfiles.mk for more information).
 #
 # Here is the file format using BNF-like syntax:
 #
 # converterTable ::= tags { converterLine* }
 # converterLine ::= converterName [ tags ] { taggedAlias* }'\n'
 # taggedAlias ::= alias [ tags ]
 # tags ::= '{' { tag+ } '}'
 # tag ::= standard['*']
 # converterName ::= [0-9a-zA-Z:_'-']+
 # alias ::= converterName
 #
 # Except for the converter name, aliases are case insensitive.
 # Names are separated by whitespace.
 # Line continuation and comment sytax are similar to the GNU make syntax.
 # Any lines beginning with whitespace (e.g. U+0020 SPACE or U+0009 HORIZONTAL
 # TABULATION) are presumed to be a continuation of the previous line.
 # The # symbol starts a comment and the comment continues till the end of
 # the line.
 #
 # The converter
 #
 # All names can be tagged by including a space-separated list of tags in
 # curly braces, as in ISO_8859-1:1987{IANA*} iso-8859-1 { MIME* } or
 # some-charset{MIME* IANA*}. The order of tags does not matter, and
 # whitespace is allowed between the tagged name and the tags list.
 #
 # The tags can be used to get standard names using ucnv_getStandardName().
 #
 # The complete list of recognized tags used in this file is defined in
 # the affinity list near the beginning of the file.
 #
 # The * after the standard tag denotes that the previous alias is the
 # preferred (default) charset name for that standard. There can only
 # be one of these default charset names per converter.


 # The world is getting more complicated...
 # Supporting XML parsers, HTML, MIME, and similar applications
 # that mark encodings with a charset name can be difficult.
 # Many of these applications and operating systems will update
 # their codepages over time.

 # It means that a new codepage, one that differs from an
 # old one by changing a code point, e.g., to the Euro sign,
 # must not get an old alias, because it would mean that
 # old files with this alias would be interpreted differently.

 # If an codepage gets updated by assigning characters to previously
 # unassigned code points, then a new name is not necessary.
 # Also, some codepages map unassigned codepage byte values
 # to the same numbers in Unicode for roundtripping. It may be
 # industry practice to keep the encoding name in such a case, too
 # (example: Windows codepages).

 # The aliases listed in the list of character sets
 # that is maintained by the IANA (http://www.iana.org/) must
 # not be changed to mean encodings different from what this
 # list shows. Currently, the IANA list is at
 # http://www.iana.org/assignments/character-sets
 # It should also be mentioned that the exact mapping table used for each
 # IANA names usually isn't specified. This means that some other applications
 # and operating systems are left to interpret the exact mappings for the
 # underspecified aliases. For instance, Shift-JIS on a Solaris platform
 # may be different from Shift-JIS on a Windows platform. This is why
 # some of the aliases can be tagged to differentiate different mapping
 # tables with the same alias. If an alias is given to more than one converter,
 # it is considered to be an ambiguous alias, and the affinity list will
 # choose the converter to use when a standard isn't specified with the alias.

 # Name matching is case-insensitive. Also, dashes '-', underscores '_'
 # and spaces ' ' are ignored in names (thus cs-iso_latin-1, csisolatin1
 # and "cs iso latin 1" are the same).
 # However, the names in the left column are directly file names
 # or names of algorithmic converters, and their case must not
 # be changed - or else code and/or file names must also be changed.
 # For example, the converter ibm-921 is expected to be the file ibm-921.cnv.


 # The immediately following list is the affinity list of supported standard tags.
 # When multiple converters have the same alias under different standards,
 # the standard nearest to the top of this list with that alias will
 # be the first converter that will be opened. The ordering of the aliases
 # after this affinity list does not affect the preferred alias, but it may
 # affect the order of the returned list of aliases for a given converter.
 #
 # The general ordering is from specific and frequently used to more general
 # or rarely used at the bottom.
 {
     UTR22           # Name format specified by http://www.unicode.org/unicode/reports/tr22/
     HTML            # WHATWG's encoding spec; https://encoding.spec.whatwg.org
     IANA            # Source: http://www.iana.org/assignments/character-sets
     MIME            # Source: http://www.iana.org/assignments/character-sets
     }

 UTF-8 { MIME* HTML* }
     unicode-1-1-utf-8
     utf8

 utf-16be { MIME* HTML* }

 utf-16le { MIME* HTML* }
     utf-16

 # Keep UTF-32 entries for now until we sort out Blink's behavior when
 # UTF-32 is dropped.
 UTF-32 { IANA* MIME* }          ISO-10646-UCS-4 { IANA }
                                 csUCS4
                                 ucs-4
 # Until we kill UTF-32 in Blink, add MIME tag here.
 UTF-32BE { MIME* IANA* }              UTF32_BigEndian
 UTF-32LE { MIME* IANA* }              UTF32_LittleEndian

 ibm866-html
     IBM866 { MIME* HTML* }
     866
     cp866
     csibm866

 iso-8859-2-html
     ISO-8859-2 { MIME* HTML* }
     csisolatin2
     iso-ir-101
     iso8859-2
     iso88592
     iso_8859-2
     iso_8859-2:1987
     l2
     latin2

 iso-8859-3-html
     ISO-8859-3 { MIME* HTML* }
     csisolatin3
     iso-ir-109
     iso8859-3
     iso88593
     iso_8859-3
     iso_8859-3:1988
     l3
     latin3

 iso-8859-4-html
     ISO-8859-4 { MIME* HTML* }
     csisolatin4
     iso-ir-110
     iso8859-4
     iso88594
     iso_8859-4
     iso_8859-4:1988
     l4
     latin4

 iso-8859-5-html
     ISO-8859-5 { MIME* HTML* }
     csisolatincyrillic
     cyrillic
     iso-ir-144
     iso8859-5
     iso88595
     iso_8859-5
     iso_8859-5:1988

 iso-8859-6-html
     ISO-8859-6 { MIME* HTML* }
     arabic
     asmo-708
     csiso88596e
     csiso88596i
     csisolatinarabic
     ecma-114
     iso-8859-6-e
     iso-8859-6-i
     iso-ir-127
     iso8859-6
     iso88596
     iso_8859-6
     iso_8859-6:1987

 iso-8859-7-html
     ISO-8859-7 { MIME* HTML* }
     csisolatingreek
     ecma-118
     elot_928
     greek
     greek8
     iso-ir-126
     iso8859-7
     iso88597
     iso_8859-7
     iso_8859-7:1987
     sun_eu_greek

 iso-8859-8-html
     ISO-8859-8 { MIME* HTML* }
     csiso88598e { MIME }
     csisolatinhebrew
     hebrew
     ISO-8859-8-E
     ISO-8859-8-I
     iso-ir-138
     iso8859-8
     iso88598
     iso_8859-8
     iso_8859-8:1988
     visual
     # adding this one leads to a failure in encoding-labels.html
 #   csiso88598i


 # This alias has to be dealt with by TextCodecICU unless
 # multiple encodings can share a single mapping table.
 #ISO-8859-8-I { MIME* HTML* }
 #   csiso88598i
 #   logical

 iso-8859-10-html
     ISO-8859-10 { MIME* HTML* }
     csisolatin6
     iso-ir-157
     iso8859-10
     iso885910
     l6
     latin6

 iso-8859-13-html
     ISO-8859-13 { MIME* HTML* }
     iso8859-13
     iso885913

 iso-8859-14-html
     ISO-8859-14 { MIME* HTML* }
     iso8859-14
     iso885914

 iso-8859-15-html
     ISO-8859-15 { MIME* HTML* }
     csisolatin9
     iso8859-15
     iso885915
     iso_8859-15
     l9

 iso-8859-16-html
     ISO-8859-16 { MIME* HTML* }

 koi8-r-html
     KOI8-R { MIME* HTML* }
     cskoi8r
     koi
     koi8
     koi8_r

 koi8-u-html
     KOI8-U { MIME* HTML* }
     koi8-ru

 macintosh-html
     macintosh { MIME* HTML* }
     csmacintosh
     mac
     x-mac-roman

 windows-874-html
     windows-874 { MIME* HTML* }
     dos-874
     iso-8859-11
     iso8859-11
     iso885911
     tis-620

 windows-1250-html
     windows-1250 { MIME* HTML* }
     cp1250
     x-cp1250

 windows-1251-html
     windows-1251 { MIME* HTML* }
     cp1251
     x-cp1251

 windows-1253-html
     windows-1253 { MIME* HTML* }
     cp1253
     x-cp1253

 windows-1254-html
     windows-1254 { MIME* HTML* }
     cp1254
     csisolatin5
     iso-8859-9
     iso-ir-148
     iso8859-9
     iso88599
     iso_8859-9
     iso_8859-9:1989
     l5
     latin5
     x-cp1254

 windows-1255-html
     windows-1255 { MIME* HTML* }
     cp1255
     x-cp1255

 windows-1256-html
     windows-1256 { MIME* HTML* }
     cp1256
     x-cp1256

 windows-1257-html
     windows-1257 { MIME* HTML* }
     cp1257
     x-cp1257

 windows-1258-html
     windows-1258 { MIME* HTML* }
     cp1258
     x-cp1258

 x-mac-cyrillic-html
     x-mac-cyrillic { MIME* HTML* }
     x-mac-ukrainian

 # Keep GBK and GB18030 separate for now until we decide
 # what to do about them: crbug.com/339862
 # The encoding spec requires that decoding to Unicode should use GB18030
 # while encoding from Unicode should use GBK.

 windows-936-2000
                        GBK { MIME* IANA* }
                        CP936 { IANA }
                        MS936 { IANA }
                        chinese { IANA }
                        iso-ir-58 { IANA }
                        GB2312 { IANA MIME }
                        GB_2312-80 { IANA }
                        gb_2312
                        gb2312-1980
                        csGB2312 { IANA }
                        csiso58gb231280
                        EUC-CN
                        x-gbk
                        windows-936

 # GB 18030 is partly algorithmic, using the MBCS converter
 gb18030 { IANA* }      gb18030 { HTML* MIME* }  ibm-1392 windows-54936

 big5-html
     Big5 { MIME* HTML* }
     cn-big5
     csbig5
     x-x-big5
     Big5-HKSCS

 euc-jp-html
     EUC-JP { MIME* HTML* }
     cseucpkdfmtjapanese
     x-euc-jp

 ISO_2022,locale=ja,version=0
     ISO-2022-JP { MIME* HTML* }
     csiso2022jp

 shift_jis-html
     Shift_JIS { MIME* HTML* }
     csshiftjis
     ms_kanji
     shift-jis
     sjis
     windows-31j
     x-sjis

 euc-kr-html
     EUC-KR { MIME* HTML* }
     cseuckr
     csksc56011987
     iso-ir-149
     korean
     ks_c_5601-1987
     ks_c_5601-1989
     ksc5601
     ksc_5601
     windows-949

 # We need to keep these aliases so that documents labelled with them
 # are converted to a single U+FFFD instead of being rendered as a gibberish.
 ISO-2022-KR { HTML* MIME* } csISO2022KR { IANA }
 ISO-2022-CN { IANA* HTML* } csISO2022CN  x-ISO-2022-CN-GB
 ISO-2022-CN-EXT { IANA* HTML* }
 HZ-GB-2312 { HTML* IANA* } HZ
	# ******************************************************************************
	# *
	# * Copyright (C) 1995-2014, International Business Machines
	# * Corporation and others. All Rights Reserved.
	# *
	# ******************************************************************************

	# If this converter alias table looks very confusing, a much easier to
	# understand view can be found at this demo:
	# http://demo.icu-project.org/icu-bin/convexp

	# IMPORTANT NOTE
	#
	# This file is not read directly by ICU. If you change it, you need to
	# run gencnval, and eventually run pkgdata to update the representation that
	# ICU uses for aliases. The gencnval tool will normally compile this file into
	# cnvalias.icu. The gencnval -v verbose option will help you when you edit
	# this file.

	# Please be friendly to the rest of us that edit this table by
	# keeping this table free of tabs.

	# This is an alias file used by the character set converter.
	# A lot of converter information can be found in unicode/ucnv.h, but here
	# is more information about this file.
	#
	# If you are adding a new converter to this list and want to include it in the
	# icu data library, please be sure to add an entry to the appropriate ucm*.mk file
	# (see ucmfiles.mk for more information).
	#
	# Here is the file format using BNF-like syntax:
	#
	# converterTable ::= tags { converterLine* }
	# converterLine ::= converterName [ tags ] { taggedAlias* }'\n'
	# taggedAlias ::= alias [ tags ]
	# tags ::= '{' { tag+ } '}'
	# tag ::= standard['*']
	# converterName ::= [0-9a-zA-Z:_'-']+
	# alias ::= converterName
	#
	# Except for the converter name, aliases are case insensitive.
	# Names are separated by whitespace.
	# Line continuation and comment sytax are similar to the GNU make syntax.
	# Any lines beginning with whitespace (e.g. U+0020 SPACE or U+0009 HORIZONTAL
	# TABULATION) are presumed to be a continuation of the previous line.
	# The # symbol starts a comment and the comment continues till the end of
	# the line.
	#
	# The converter
	#
	# All names can be tagged by including a space-separated list of tags in
	# curly braces, as in ISO_8859-1:1987{IANA} iso-8859-1 { MIME } or
	# some-charset{MIME* IANA*}. The order of tags does not matter, and
	# whitespace is allowed between the tagged name and the tags list.
	#
	# The tags can be used to get standard names using ucnv_getStandardName().
	#
	# The complete list of recognized tags used in this file is defined in
	# the affinity list near the beginning of the file.
	#
	# The * after the standard tag denotes that the previous alias is the
	# preferred (default) charset name for that standard. There can only
	# be one of these default charset names per converter.



	# The world is getting more complicated...
	# Supporting XML parsers, HTML, MIME, and similar applications
	# that mark encodings with a charset name can be difficult.
	# Many of these applications and operating systems will update
	# their codepages over time.

	# It means that a new codepage, one that differs from an
	# old one by changing a code point, e.g., to the Euro sign,
	# must not get an old alias, because it would mean that
	# old files with this alias would be interpreted differently.

	# If an codepage gets updated by assigning characters to previously
	# unassigned code points, then a new name is not necessary.
	# Also, some codepages map unassigned codepage byte values
	# to the same numbers in Unicode for roundtripping. It may be
	# industry practice to keep the encoding name in such a case, too
	# (example: Windows codepages).

	# The aliases listed in the list of character sets
	# that is maintained by the IANA (http://www.iana.org/) must
	# not be changed to mean encodings different from what this
	# list shows. Currently, the IANA list is at
	# http://www.iana.org/assignments/character-sets
	# It should also be mentioned that the exact mapping table used for each
	# IANA names usually isn't specified. This means that some other applications
	# and operating systems are left to interpret the exact mappings for the
	# underspecified aliases. For instance, Shift-JIS on a Solaris platform
	# may be different from Shift-JIS on a Windows platform. This is why
	# some of the aliases can be tagged to differentiate different mapping
	# tables with the same alias. If an alias is given to more than one converter,
	# it is considered to be an ambiguous alias, and the affinity list will
	# choose the converter to use when a standard isn't specified with the alias.

	# Name matching is case-insensitive. Also, dashes '-', underscores '_'
	# and spaces ' ' are ignored in names (thus cs-iso_latin-1, csisolatin1
	# and "cs iso latin 1" are the same).
	# However, the names in the left column are directly file names
	# or names of algorithmic converters, and their case must not
	# be changed - or else code and/or file names must also be changed.
	# For example, the converter ibm-921 is expected to be the file ibm-921.cnv.



	# The immediately following list is the affinity list of supported standard tags.
	# When multiple converters have the same alias under different standards,
	# the standard nearest to the top of this list with that alias will
	# be the first converter that will be opened. The ordering of the aliases
	# after this affinity list does not affect the preferred alias, but it may
	# affect the order of the returned list of aliases for a given converter.
	#
	# The general ordering is from specific and frequently used to more general
	# or rarely used at the bottom.
	{
	UTR22 # Name format specified by http://www.unicode.org/unicode/reports/tr22/
	HTML # WHATWG's encoding spec; https://encoding.spec.whatwg.org
	IANA # Source: http://www.iana.org/assignments/character-sets
	MIME # Source: http://www.iana.org/assignments/character-sets
	}

	UTF-8 { MIME* HTML* }
	unicode-1-1-utf-8
	utf8

	utf-16be { MIME* HTML* }

	utf-16le { MIME* HTML* }
	utf-16

	# Keep UTF-32 entries for now until we sort out Blink's behavior when
	# UTF-32 is dropped.
	UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA }
	csUCS4
	ucs-4
	# Until we kill UTF-32 in Blink, add MIME tag here.
	UTF-32BE { MIME* IANA* } UTF32_BigEndian
	UTF-32LE { MIME* IANA* } UTF32_LittleEndian

	ibm866-html
	IBM866 { MIME* HTML* }
	866
	cp866
	csibm866

	iso-8859-2-html
	ISO-8859-2 { MIME* HTML* }
	csisolatin2
	iso-ir-101
	iso8859-2
	iso88592
	iso_8859-2
	iso_8859-2:1987
	l2
	latin2

	iso-8859-3-html
	ISO-8859-3 { MIME* HTML* }
	csisolatin3
	iso-ir-109
	iso8859-3
	iso88593
	iso_8859-3
	iso_8859-3:1988
	l3
	latin3

	iso-8859-4-html
	ISO-8859-4 { MIME* HTML* }
	csisolatin4
	iso-ir-110
	iso8859-4
	iso88594
	iso_8859-4
	iso_8859-4:1988
	l4
	latin4

	iso-8859-5-html
	ISO-8859-5 { MIME* HTML* }
	csisolatincyrillic
	cyrillic
	iso-ir-144
	iso8859-5
	iso88595
	iso_8859-5
	iso_8859-5:1988

	iso-8859-6-html
	ISO-8859-6 { MIME* HTML* }
	arabic
	asmo-708
	csiso88596e
	csiso88596i
	csisolatinarabic
	ecma-114
	iso-8859-6-e
	iso-8859-6-i
	iso-ir-127
	iso8859-6
	iso88596
	iso_8859-6
	iso_8859-6:1987

	iso-8859-7-html
	ISO-8859-7 { MIME* HTML* }
	csisolatingreek
	ecma-118
	elot_928
	greek
	greek8
	iso-ir-126
	iso8859-7
	iso88597
	iso_8859-7
	iso_8859-7:1987
	sun_eu_greek

	iso-8859-8-html
	ISO-8859-8 { MIME* HTML* }
	csiso88598e { MIME }
	csisolatinhebrew
	hebrew
	ISO-8859-8-E
	ISO-8859-8-I
	iso-ir-138
	iso8859-8
	iso88598
	iso_8859-8
	iso_8859-8:1988
	visual
	# adding this one leads to a failure in encoding-labels.html
	# csiso88598i


	# This alias has to be dealt with by TextCodecICU unless
	# multiple encodings can share a single mapping table.
	#ISO-8859-8-I { MIME* HTML* }
	# csiso88598i
	# logical

	iso-8859-10-html
	ISO-8859-10 { MIME* HTML* }
	csisolatin6
	iso-ir-157
	iso8859-10
	iso885910
	l6
	latin6

	iso-8859-13-html
	ISO-8859-13 { MIME* HTML* }
	iso8859-13
	iso885913

	iso-8859-14-html
	ISO-8859-14 { MIME* HTML* }
	iso8859-14
	iso885914

	iso-8859-15-html
	ISO-8859-15 { MIME* HTML* }
	csisolatin9
	iso8859-15
	iso885915
	iso_8859-15
	l9

	iso-8859-16-html
	ISO-8859-16 { MIME* HTML* }

	koi8-r-html
	KOI8-R { MIME* HTML* }
	cskoi8r
	koi
	koi8
	koi8_r

	koi8-u-html
	KOI8-U { MIME* HTML* }
	koi8-ru

	macintosh-html
	macintosh { MIME* HTML* }
	csmacintosh
	mac
	x-mac-roman

	windows-874-html
	windows-874 { MIME* HTML* }
	dos-874
	iso-8859-11
	iso8859-11
	iso885911
	tis-620

	windows-1250-html
	windows-1250 { MIME* HTML* }
	cp1250
	x-cp1250

	windows-1251-html
	windows-1251 { MIME* HTML* }
	cp1251
	x-cp1251

	windows-1253-html
	windows-1253 { MIME* HTML* }
	cp1253
	x-cp1253

	windows-1254-html
	windows-1254 { MIME* HTML* }
	cp1254
	csisolatin5
	iso-8859-9
	iso-ir-148
	iso8859-9
	iso88599
	iso_8859-9
	iso_8859-9:1989
	l5
	latin5
	x-cp1254

	windows-1255-html
	windows-1255 { MIME* HTML* }
	cp1255
	x-cp1255

	windows-1256-html
	windows-1256 { MIME* HTML* }
	cp1256
	x-cp1256

	windows-1257-html
	windows-1257 { MIME* HTML* }
	cp1257
	x-cp1257

	windows-1258-html
	windows-1258 { MIME* HTML* }
	cp1258
	x-cp1258

	x-mac-cyrillic-html
	x-mac-cyrillic { MIME* HTML* }
	x-mac-ukrainian

	# Keep GBK and GB18030 separate for now until we decide
	# what to do about them: crbug.com/339862
	# The encoding spec requires that decoding to Unicode should use GB18030
	# while encoding from Unicode should use GBK.

	windows-936-2000
	GBK { MIME* IANA* }
	CP936 { IANA }
	MS936 { IANA }
	chinese { IANA }
	iso-ir-58 { IANA }
	GB2312 { IANA MIME }
	GB_2312-80 { IANA }
	gb_2312
	gb2312-1980
	csGB2312 { IANA }
	csiso58gb231280
	EUC-CN
	x-gbk
	windows-936

	# GB 18030 is partly algorithmic, using the MBCS converter
	gb18030 { IANA* } gb18030 { HTML* MIME* } ibm-1392 windows-54936

	big5-html
	Big5 { MIME* HTML* }
	cn-big5
	csbig5
	x-x-big5
	Big5-HKSCS

	euc-jp-html
	EUC-JP { MIME* HTML* }
	cseucpkdfmtjapanese
	x-euc-jp

	ISO_2022,locale=ja,version=0
	ISO-2022-JP { MIME* HTML* }
	csiso2022jp

	shift_jis-html
	Shift_JIS { MIME* HTML* }
	csshiftjis
	ms_kanji
	shift-jis
	sjis
	windows-31j
	x-sjis

	euc-kr-html
	EUC-KR { MIME* HTML* }
	cseuckr
	csksc56011987
	iso-ir-149
	korean
	ks_c_5601-1987
	ks_c_5601-1989
	ksc5601
	ksc_5601
	windows-949

	# We need to keep these aliases so that documents labelled with them
	# are converted to a single U+FFFD instead of being rendered as a gibberish.
	ISO-2022-KR { HTML* MIME* } csISO2022KR { IANA }
	ISO-2022-CN { IANA* HTML* } csISO2022CN x-ISO-2022-CN-GB
	ISO-2022-CN-EXT { IANA* HTML* }
	HZ-GB-2312 { HTML* IANA* } HZ