| #!/bin/sh |
| # Copyright 2015 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| # References: |
| # https://encoding.spec.whatwg.org/#euc-kr |
| |
| # This script downloads the following file. |
| # https://encoding.spec.whatwg.org/index-euc-kr.txt |
| |
| function preamble { |
| cat <<PREAMBLE |
| # *************************************************************************** |
| # * |
| # * Copyright (C) 1995-2015, International Business Machines |
| # * Corporation and others. All Rights Reserved. |
| # * |
| # * Generated per the algorithm for EUC-KR |
| # * described at http://encoding.spec.whatwg.org/#euc-kr |
| # * |
| # *************************************************************************** |
| <code_set_name> "euc-kr-html" |
| <mb_cur_max> 2 |
| <mb_cur_min> 1 |
| <uconv_class> "MBCS" |
| <subchar> \x3F |
| <icu:charsetFamily> "ASCII" |
| |
| # 81-fe in states 2 and 3 can be tigher and a1-fe, but |
| # to be compliant to HTML5 spec, it should be 81-fe. |
| <icu:state> 0-7f, 81-c5:1, c6:2, c7-fe:3 |
| <icu:state> 41-5a, 61-7a, 81-fe |
| <icu:state> 41-52, 81-fe |
| <icu:state> 81-fe |
| |
| CHARMAP |
| PREAMBLE |
| } |
| |
| function ascii { |
| for i in $(seq 0 127) |
| do |
| printf '<U%04X> \\x%02X |0\n' $i $i |
| done |
| } |
| |
| |
| # HKSCS characters are not supported in encoding ( |lead < 0xA1| ) |
| function euckr { |
| awk '!/^#/ && !/^$/ \ |
| { pointer = $1; \ |
| ucs = substr($2, 3); \ |
| lead = pointer / 190 + 0x81; \ |
| trail = $1 % 190 + 0x41; \ |
| tag = 0; \ |
| printf ("<U%4s> \\x%02X\\x%02X |%d\n", ucs,\ |
| lead, trail, tag);\ |
| }' \ |
| index-euc-kr.txt |
| } |
| |
| function unsorted_table { |
| euckr |
| } |
| |
| wget -N -r -nd https://encoding.spec.whatwg.org/index-euc-kr.txt |
| preamble |
| ascii |
| unsorted_table | sort -k1 | uniq |
| echo 'END CHARMAP' |