| /* |
| ********************************************************************** |
| * Copyright (C) 2009-2015, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| */ |
| |
| #include "unicode/utypes.h" |
| #include "unicode/ures.h" |
| #include "unicode/putil.h" |
| #include "unicode/uloc.h" |
| #include "ustr_imp.h" |
| #include "cmemory.h" |
| #include "cstring.h" |
| #include "putilimp.h" |
| #include "uinvchar.h" |
| #include "ulocimp.h" |
| #include "uassert.h" |
| |
| /* struct holding a single variant */ |
| typedef struct VariantListEntry { |
| const char *variant; |
| struct VariantListEntry *next; |
| } VariantListEntry; |
| |
| /* struct holding a single attribute value */ |
| typedef struct AttributeListEntry { |
| const char *attribute; |
| struct AttributeListEntry *next; |
| } AttributeListEntry; |
| |
| /* struct holding a single extension */ |
| typedef struct ExtensionListEntry { |
| const char *key; |
| const char *value; |
| struct ExtensionListEntry *next; |
| } ExtensionListEntry; |
| |
| #define MAXEXTLANG 3 |
| typedef struct ULanguageTag { |
| char *buf; /* holding parsed subtags */ |
| const char *language; |
| const char *extlang[MAXEXTLANG]; |
| const char *script; |
| const char *region; |
| VariantListEntry *variants; |
| ExtensionListEntry *extensions; |
| const char *privateuse; |
| const char *grandfathered; |
| } ULanguageTag; |
| |
| #define MINLEN 2 |
| #define SEP '-' |
| #define PRIVATEUSE 'x' |
| #define LDMLEXT 'u' |
| |
| #define LOCALE_SEP '_' |
| #define LOCALE_EXT_SEP '@' |
| #define LOCALE_KEYWORD_SEP ';' |
| #define LOCALE_KEY_TYPE_SEP '=' |
| |
| #define ISALPHA(c) uprv_isASCIILetter(c) |
| #define ISNUMERIC(c) ((c)>='0' && (c)<='9') |
| |
| static const char EMPTY[] = ""; |
| static const char LANG_UND[] = "und"; |
| static const char PRIVATEUSE_KEY[] = "x"; |
| static const char _POSIX[] = "_POSIX"; |
| static const char POSIX_KEY[] = "va"; |
| static const char POSIX_VALUE[] = "posix"; |
| static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; |
| static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; |
| static const char LOCALE_TYPE_YES[] = "yes"; |
| |
| #define LANG_UND_LEN 3 |
| |
| static const char* const GRANDFATHERED[] = { |
| /* grandfathered preferred */ |
| "art-lojban", "jbo", |
| "cel-gaulish", "xtg-x-cel-gaulish", |
| "en-GB-oed", "en-GB-x-oed", |
| "i-ami", "ami", |
| "i-bnn", "bnn", |
| "i-default", "en-x-i-default", |
| "i-enochian", "und-x-i-enochian", |
| "i-hak", "hak", |
| "i-klingon", "tlh", |
| "i-lux", "lb", |
| "i-mingo", "see-x-i-mingo", |
| "i-navajo", "nv", |
| "i-pwn", "pwn", |
| "i-tao", "tao", |
| "i-tay", "tay", |
| "i-tsu", "tsu", |
| "no-bok", "nb", |
| "no-nyn", "nn", |
| "sgn-be-fr", "sfb", |
| "sgn-be-nl", "vgt", |
| "sgn-ch-de", "sgg", |
| "zh-guoyu", "cmn", |
| "zh-hakka", "hak", |
| "zh-min", "nan-x-zh-min", |
| "zh-min-nan", "nan", |
| "zh-xiang", "hsn", |
| NULL, NULL |
| }; |
| |
| static const char DEPRECATEDLANGS[][4] = { |
| /* deprecated new */ |
| "iw", "he", |
| "ji", "yi", |
| "in", "id" |
| }; |
| |
| /* |
| * ------------------------------------------------- |
| * |
| * These ultag_ functions may be exposed as APIs later |
| * |
| * ------------------------------------------------- |
| */ |
| |
| static ULanguageTag* |
| ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); |
| |
| static void |
| ultag_close(ULanguageTag* langtag); |
| |
| static const char* |
| ultag_getLanguage(const ULanguageTag* langtag); |
| |
| #if 0 |
| static const char* |
| ultag_getJDKLanguage(const ULanguageTag* langtag); |
| #endif |
| |
| static const char* |
| ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); |
| |
| static int32_t |
| ultag_getExtlangSize(const ULanguageTag* langtag); |
| |
| static const char* |
| ultag_getScript(const ULanguageTag* langtag); |
| |
| static const char* |
| ultag_getRegion(const ULanguageTag* langtag); |
| |
| static const char* |
| ultag_getVariant(const ULanguageTag* langtag, int32_t idx); |
| |
| static int32_t |
| ultag_getVariantsSize(const ULanguageTag* langtag); |
| |
| static const char* |
| ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); |
| |
| static const char* |
| ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); |
| |
| static int32_t |
| ultag_getExtensionsSize(const ULanguageTag* langtag); |
| |
| static const char* |
| ultag_getPrivateUse(const ULanguageTag* langtag); |
| |
| #if 0 |
| static const char* |
| ultag_getGrandfathered(const ULanguageTag* langtag); |
| #endif |
| |
| /* |
| * ------------------------------------------------- |
| * |
| * Language subtag syntax validation functions |
| * |
| * ------------------------------------------------- |
| */ |
| |
| static UBool |
| _isAlphaString(const char* s, int32_t len) { |
| int32_t i; |
| for (i = 0; i < len; i++) { |
| if (!ISALPHA(*(s + i))) { |
| return FALSE; |
| } |
| } |
| return TRUE; |
| } |
| |
| static UBool |
| _isNumericString(const char* s, int32_t len) { |
| int32_t i; |
| for (i = 0; i < len; i++) { |
| if (!ISNUMERIC(*(s + i))) { |
| return FALSE; |
| } |
| } |
| return TRUE; |
| } |
| |
| static UBool |
| _isAlphaNumericString(const char* s, int32_t len) { |
| int32_t i; |
| for (i = 0; i < len; i++) { |
| if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { |
| return FALSE; |
| } |
| } |
| return TRUE; |
| } |
| |
| static UBool |
| _isLanguageSubtag(const char* s, int32_t len) { |
| /* |
| * language = 2*3ALPHA ; shortest ISO 639 code |
| * ["-" extlang] ; sometimes followed by |
| * ; extended language subtags |
| * / 4ALPHA ; or reserved for future use |
| * / 5*8ALPHA ; or registered language subtag |
| */ |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| static UBool |
| _isExtlangSubtag(const char* s, int32_t len) { |
| /* |
| * extlang = 3ALPHA ; selected ISO 639 codes |
| * *2("-" 3ALPHA) ; permanently reserved |
| */ |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| if (len == 3 && _isAlphaString(s, len)) { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| static UBool |
| _isScriptSubtag(const char* s, int32_t len) { |
| /* |
| * script = 4ALPHA ; ISO 15924 code |
| */ |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| if (len == 4 && _isAlphaString(s, len)) { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| static UBool |
| _isRegionSubtag(const char* s, int32_t len) { |
| /* |
| * region = 2ALPHA ; ISO 3166-1 code |
| * / 3DIGIT ; UN M.49 code |
| */ |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| if (len == 2 && _isAlphaString(s, len)) { |
| return TRUE; |
| } |
| if (len == 3 && _isNumericString(s, len)) { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| static UBool |
| _isVariantSubtag(const char* s, int32_t len) { |
| /* |
| * variant = 5*8alphanum ; registered variants |
| * / (DIGIT 3alphanum) |
| */ |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { |
| return TRUE; |
| } |
| if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| static UBool |
| _isPrivateuseVariantSubtag(const char* s, int32_t len) { |
| /* |
| * variant = 1*8alphanum ; registered variants |
| * / (DIGIT 3alphanum) |
| */ |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| static UBool |
| _isExtensionSingleton(const char* s, int32_t len) { |
| /* |
| * extension = singleton 1*("-" (2*8alphanum)) |
| */ |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| static UBool |
| _isExtensionSubtag(const char* s, int32_t len) { |
| /* |
| * extension = singleton 1*("-" (2*8alphanum)) |
| */ |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| static UBool |
| _isExtensionSubtags(const char* s, int32_t len) { |
| const char *p = s; |
| const char *pSubtag = NULL; |
| |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| |
| while ((p - s) < len) { |
| if (*p == SEP) { |
| if (pSubtag == NULL) { |
| return FALSE; |
| } |
| if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { |
| return FALSE; |
| } |
| pSubtag = NULL; |
| } else if (pSubtag == NULL) { |
| pSubtag = p; |
| } |
| p++; |
| } |
| if (pSubtag == NULL) { |
| return FALSE; |
| } |
| return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); |
| } |
| |
| static UBool |
| _isPrivateuseValueSubtag(const char* s, int32_t len) { |
| /* |
| * privateuse = "x" 1*("-" (1*8alphanum)) |
| */ |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| static UBool |
| _isPrivateuseValueSubtags(const char* s, int32_t len) { |
| const char *p = s; |
| const char *pSubtag = NULL; |
| |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| |
| while ((p - s) < len) { |
| if (*p == SEP) { |
| if (pSubtag == NULL) { |
| return FALSE; |
| } |
| if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { |
| return FALSE; |
| } |
| pSubtag = NULL; |
| } else if (pSubtag == NULL) { |
| pSubtag = p; |
| } |
| p++; |
| } |
| if (pSubtag == NULL) { |
| return FALSE; |
| } |
| return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); |
| } |
| |
| U_CFUNC UBool |
| ultag_isUnicodeLocaleKey(const char* s, int32_t len) { |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| if (len == 2 && _isAlphaNumericString(s, len)) { |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| U_CFUNC UBool |
| ultag_isUnicodeLocaleType(const char*s, int32_t len) { |
| const char* p; |
| int32_t subtagLen = 0; |
| |
| if (len < 0) { |
| len = (int32_t)uprv_strlen(s); |
| } |
| |
| for (p = s; len > 0; p++, len--) { |
| if (*p == SEP) { |
| if (subtagLen < 3) { |
| return FALSE; |
| } |
| subtagLen = 0; |
| } else if (ISALPHA(*p) || ISNUMERIC(*p)) { |
| subtagLen++; |
| if (subtagLen > 8) { |
| return FALSE; |
| } |
| } else { |
| return FALSE; |
| } |
| } |
| |
| return (subtagLen >= 3); |
| } |
| /* |
| * ------------------------------------------------- |
| * |
| * Helper functions |
| * |
| * ------------------------------------------------- |
| */ |
| |
| static UBool |
| _addVariantToList(VariantListEntry **first, VariantListEntry *var) { |
| UBool bAdded = TRUE; |
| |
| if (*first == NULL) { |
| var->next = NULL; |
| *first = var; |
| } else { |
| VariantListEntry *prev, *cur; |
| int32_t cmp; |
| |
| /* variants order should be preserved */ |
| prev = NULL; |
| cur = *first; |
| while (TRUE) { |
| if (cur == NULL) { |
| prev->next = var; |
| var->next = NULL; |
| break; |
| } |
| |
| /* Checking for duplicate variant */ |
| cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); |
| if (cmp == 0) { |
| /* duplicated variant */ |
| bAdded = FALSE; |
| break; |
| } |
| prev = cur; |
| cur = cur->next; |
| } |
| } |
| |
| return bAdded; |
| } |
| |
| static UBool |
| _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { |
| UBool bAdded = TRUE; |
| |
| if (*first == NULL) { |
| attr->next = NULL; |
| *first = attr; |
| } else { |
| AttributeListEntry *prev, *cur; |
| int32_t cmp; |
| |
| /* reorder variants in alphabetical order */ |
| prev = NULL; |
| cur = *first; |
| while (TRUE) { |
| if (cur == NULL) { |
| prev->next = attr; |
| attr->next = NULL; |
| break; |
| } |
| cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); |
| if (cmp < 0) { |
| if (prev == NULL) { |
| *first = attr; |
| } else { |
| prev->next = attr; |
| } |
| attr->next = cur; |
| break; |
| } |
| if (cmp == 0) { |
| /* duplicated variant */ |
| bAdded = FALSE; |
| break; |
| } |
| prev = cur; |
| cur = cur->next; |
| } |
| } |
| |
| return bAdded; |
| } |
| |
| |
| static UBool |
| _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { |
| UBool bAdded = TRUE; |
| |
| if (*first == NULL) { |
| ext->next = NULL; |
| *first = ext; |
| } else { |
| ExtensionListEntry *prev, *cur; |
| int32_t cmp; |
| |
| /* reorder variants in alphabetical order */ |
| prev = NULL; |
| cur = *first; |
| while (TRUE) { |
| if (cur == NULL) { |
| prev->next = ext; |
| ext->next = NULL; |
| break; |
| } |
| if (localeToBCP) { |
| /* special handling for locale to bcp conversion */ |
| int32_t len, curlen; |
| |
| len = (int32_t)uprv_strlen(ext->key); |
| curlen = (int32_t)uprv_strlen(cur->key); |
| |
| if (len == 1 && curlen == 1) { |
| if (*(ext->key) == *(cur->key)) { |
| cmp = 0; |
| } else if (*(ext->key) == PRIVATEUSE) { |
| cmp = 1; |
| } else if (*(cur->key) == PRIVATEUSE) { |
| cmp = -1; |
| } else { |
| cmp = *(ext->key) - *(cur->key); |
| } |
| } else if (len == 1) { |
| cmp = *(ext->key) - LDMLEXT; |
| } else if (curlen == 1) { |
| cmp = LDMLEXT - *(cur->key); |
| } else { |
| cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); |
| } |
| } else { |
| cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); |
| } |
| if (cmp < 0) { |
| if (prev == NULL) { |
| *first = ext; |
| } else { |
| prev->next = ext; |
| } |
| ext->next = cur; |
| break; |
| } |
| if (cmp == 0) { |
| /* duplicated extension key */ |
| bAdded = FALSE; |
| break; |
| } |
| prev = cur; |
| cur = cur->next; |
| } |
| } |
| |
| return bAdded; |
| } |
| |
| static void |
| _initializeULanguageTag(ULanguageTag* langtag) { |
| int32_t i; |
| |
| langtag->buf = NULL; |
| |
| langtag->language = EMPTY; |
| for (i = 0; i < MAXEXTLANG; i++) { |
| langtag->extlang[i] = NULL; |
| } |
| |
| langtag->script = EMPTY; |
| langtag->region = EMPTY; |
| |
| langtag->variants = NULL; |
| langtag->extensions = NULL; |
| |
| langtag->grandfathered = EMPTY; |
| langtag->privateuse = EMPTY; |
| } |
| |
| static int32_t |
| _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { |
| char buf[ULOC_LANG_CAPACITY]; |
| UErrorCode tmpStatus = U_ZERO_ERROR; |
| int32_t len, i; |
| int32_t reslen = 0; |
| |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| |
| len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); |
| if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| len = 0; |
| } |
| |
| /* Note: returned language code is in lower case letters */ |
| |
| if (len == 0) { |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); |
| } |
| reslen += LANG_UND_LEN; |
| } else if (!_isLanguageSubtag(buf, len)) { |
| /* invalid language code */ |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); |
| } |
| reslen += LANG_UND_LEN; |
| } else { |
| /* resolve deprecated */ |
| for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { |
| if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { |
| uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); |
| len = (int32_t)uprv_strlen(buf); |
| break; |
| } |
| } |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| } |
| u_terminateChars(appendAt, capacity, reslen, status); |
| return reslen; |
| } |
| |
| static int32_t |
| _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { |
| char buf[ULOC_SCRIPT_CAPACITY]; |
| UErrorCode tmpStatus = U_ZERO_ERROR; |
| int32_t len; |
| int32_t reslen = 0; |
| |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| |
| len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); |
| if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| return 0; |
| } |
| |
| if (len > 0) { |
| if (!_isScriptSubtag(buf, len)) { |
| /* invalid script code */ |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| return 0; |
| } else { |
| if (reslen < capacity) { |
| *(appendAt + reslen) = SEP; |
| } |
| reslen++; |
| |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| } |
| } |
| u_terminateChars(appendAt, capacity, reslen, status); |
| return reslen; |
| } |
| |
| static int32_t |
| _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { |
| char buf[ULOC_COUNTRY_CAPACITY]; |
| UErrorCode tmpStatus = U_ZERO_ERROR; |
| int32_t len; |
| int32_t reslen = 0; |
| |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| |
| len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); |
| if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| return 0; |
| } |
| |
| if (len > 0) { |
| if (!_isRegionSubtag(buf, len)) { |
| /* invalid region code */ |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| return 0; |
| } else { |
| if (reslen < capacity) { |
| *(appendAt + reslen) = SEP; |
| } |
| reslen++; |
| |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| } |
| } |
| u_terminateChars(appendAt, capacity, reslen, status); |
| return reslen; |
| } |
| |
| static int32_t |
| _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { |
| char buf[ULOC_FULLNAME_CAPACITY]; |
| UErrorCode tmpStatus = U_ZERO_ERROR; |
| int32_t len, i; |
| int32_t reslen = 0; |
| |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| |
| len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); |
| if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| return 0; |
| } |
| |
| if (len > 0) { |
| char *p, *pVar; |
| UBool bNext = TRUE; |
| VariantListEntry *var; |
| VariantListEntry *varFirst = NULL; |
| |
| pVar = NULL; |
| p = buf; |
| while (bNext) { |
| if (*p == SEP || *p == LOCALE_SEP || *p == 0) { |
| if (*p == 0) { |
| bNext = FALSE; |
| } else { |
| *p = 0; /* terminate */ |
| } |
| if (pVar == NULL) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| /* ignore empty variant */ |
| } else { |
| /* ICU uses upper case letters for variants, but |
| the canonical format is lowercase in BCP47 */ |
| for (i = 0; *(pVar + i) != 0; i++) { |
| *(pVar + i) = uprv_tolower(*(pVar + i)); |
| } |
| |
| /* validate */ |
| if (_isVariantSubtag(pVar, -1)) { |
| if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) { |
| /* emit the variant to the list */ |
| var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); |
| if (var == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| break; |
| } |
| var->variant = pVar; |
| if (!_addVariantToList(&varFirst, var)) { |
| /* duplicated variant */ |
| uprv_free(var); |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| } |
| } else { |
| /* Special handling for POSIX variant, need to remember that we had it and then */ |
| /* treat it like an extension later. */ |
| *hadPosix = TRUE; |
| } |
| } else if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } else if (_isPrivateuseValueSubtag(pVar, -1)) { |
| /* Handle private use subtags separately */ |
| break; |
| } |
| } |
| /* reset variant starting position */ |
| pVar = NULL; |
| } else if (pVar == NULL) { |
| pVar = p; |
| } |
| p++; |
| } |
| |
| if (U_SUCCESS(*status)) { |
| if (varFirst != NULL) { |
| int32_t varLen; |
| |
| /* write out validated/normalized variants to the target */ |
| var = varFirst; |
| while (var != NULL) { |
| if (reslen < capacity) { |
| *(appendAt + reslen) = SEP; |
| } |
| reslen++; |
| varLen = (int32_t)uprv_strlen(var->variant); |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); |
| } |
| reslen += varLen; |
| var = var->next; |
| } |
| } |
| } |
| |
| /* clean up */ |
| var = varFirst; |
| while (var != NULL) { |
| VariantListEntry *tmpVar = var->next; |
| uprv_free(var); |
| var = tmpVar; |
| } |
| |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| } |
| |
| u_terminateChars(appendAt, capacity, reslen, status); |
| return reslen; |
| } |
| |
| static int32_t |
| _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { |
| char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; |
| int32_t attrBufLength = 0; |
| UBool isAttribute = FALSE; |
| UEnumeration *keywordEnum = NULL; |
| int32_t reslen = 0; |
| |
| keywordEnum = uloc_openKeywords(localeID, status); |
| if (U_FAILURE(*status) && !hadPosix) { |
| uenum_close(keywordEnum); |
| return 0; |
| } |
| if (keywordEnum != NULL || hadPosix) { |
| /* reorder extensions */ |
| int32_t len; |
| const char *key; |
| ExtensionListEntry *firstExt = NULL; |
| ExtensionListEntry *ext; |
| AttributeListEntry *firstAttr = NULL; |
| AttributeListEntry *attr; |
| char *attrValue; |
| char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| char *pExtBuf = extBuf; |
| int32_t extBufCapacity = sizeof(extBuf); |
| const char *bcpKey, *bcpValue; |
| UErrorCode tmpStatus = U_ZERO_ERROR; |
| int32_t keylen; |
| UBool isBcpUExt; |
| |
| while (TRUE) { |
| isAttribute = FALSE; |
| key = uenum_next(keywordEnum, NULL, status); |
| if (key == NULL) { |
| break; |
| } |
| len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); |
| /* buf must be null-terminated */ |
| if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| /* ignore this keyword */ |
| tmpStatus = U_ZERO_ERROR; |
| continue; |
| } |
| |
| keylen = (int32_t)uprv_strlen(key); |
| isBcpUExt = (keylen > 1); |
| |
| /* special keyword used for representing Unicode locale attributes */ |
| if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { |
| isAttribute = TRUE; |
| if (len > 0) { |
| int32_t i = 0; |
| while (TRUE) { |
| attrBufLength = 0; |
| for (; i < len; i++) { |
| if (buf[i] != '-') { |
| attrBuf[attrBufLength++] = buf[i]; |
| } else { |
| i++; |
| break; |
| } |
| } |
| if (attrBufLength > 0) { |
| attrBuf[attrBufLength] = 0; |
| |
| } else if (i >= len){ |
| break; |
| } |
| |
| /* create AttributeListEntry */ |
| attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); |
| if (attr == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| break; |
| } |
| attrValue = (char*)uprv_malloc(attrBufLength + 1); |
| if (attrValue == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| break; |
| } |
| uprv_strcpy(attrValue, attrBuf); |
| attr->attribute = attrValue; |
| |
| if (!_addAttributeToList(&firstAttr, attr)) { |
| uprv_free(attr); |
| uprv_free(attrValue); |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| } |
| } |
| } |
| } else if (isBcpUExt) { |
| bcpKey = uloc_toUnicodeLocaleKey(key); |
| if (bcpKey == NULL) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| continue; |
| } |
| |
| /* we've checked buf is null-terminated above */ |
| bcpValue = uloc_toUnicodeLocaleType(key, buf); |
| if (bcpValue == NULL) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| continue; |
| } |
| if (bcpValue == buf) { |
| /* |
| When uloc_toUnicodeLocaleType(key, buf) returns the |
| input value as is, the value is well-formed, but has |
| no known mapping. This implementation normalizes the |
| the value to lower case |
| */ |
| int32_t bcpValueLen = uprv_strlen(bcpValue); |
| if (bcpValueLen < extBufCapacity) { |
| uprv_strcpy(pExtBuf, bcpValue); |
| T_CString_toLowerCase(pExtBuf); |
| |
| bcpValue = pExtBuf; |
| |
| pExtBuf += (bcpValueLen + 1); |
| extBufCapacity -= (bcpValueLen + 1); |
| } else { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| continue; |
| } |
| } |
| } else { |
| if (*key == PRIVATEUSE) { |
| if (!_isPrivateuseValueSubtags(buf, len)) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| continue; |
| } |
| } else { |
| if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| continue; |
| } |
| } |
| bcpKey = key; |
| if ((len + 1) < extBufCapacity) { |
| uprv_memcpy(pExtBuf, buf, len); |
| bcpValue = pExtBuf; |
| |
| pExtBuf += len; |
| |
| *pExtBuf = 0; |
| pExtBuf++; |
| |
| extBufCapacity -= (len + 1); |
| } else { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| } |
| |
| if (!isAttribute) { |
| /* create ExtensionListEntry */ |
| ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
| if (ext == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| break; |
| } |
| ext->key = bcpKey; |
| ext->value = bcpValue; |
| |
| if (!_addExtensionToList(&firstExt, ext, TRUE)) { |
| uprv_free(ext); |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| } |
| } |
| } |
| |
| /* Special handling for POSIX variant - add the keywords for POSIX */ |
| if (hadPosix) { |
| /* create ExtensionListEntry for POSIX */ |
| ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
| if (ext == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| goto cleanup; |
| } |
| ext->key = POSIX_KEY; |
| ext->value = POSIX_VALUE; |
| |
| if (!_addExtensionToList(&firstExt, ext, TRUE)) { |
| uprv_free(ext); |
| } |
| } |
| |
| if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { |
| UBool startLDMLExtension = FALSE; |
| |
| attr = firstAttr; |
| ext = firstExt; |
| do { |
| if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) { |
| /* write LDML singleton extension */ |
| if (reslen < capacity) { |
| *(appendAt + reslen) = SEP; |
| } |
| reslen++; |
| if (reslen < capacity) { |
| *(appendAt + reslen) = LDMLEXT; |
| } |
| reslen++; |
| |
| startLDMLExtension = TRUE; |
| } |
| |
| /* write out the sorted BCP47 attributes, extensions and private use */ |
| if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) { |
| if (reslen < capacity) { |
| *(appendAt + reslen) = SEP; |
| } |
| reslen++; |
| len = (int32_t)uprv_strlen(ext->key); |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| if (reslen < capacity) { |
| *(appendAt + reslen) = SEP; |
| } |
| reslen++; |
| len = (int32_t)uprv_strlen(ext->value); |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| |
| ext = ext->next; |
| } else if (attr) { |
| /* write the value for the attributes */ |
| if (reslen < capacity) { |
| *(appendAt + reslen) = SEP; |
| } |
| reslen++; |
| len = (int32_t)uprv_strlen(attr->attribute); |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| |
| attr = attr->next; |
| } |
| } while (attr != NULL || ext != NULL); |
| } |
| cleanup: |
| /* clean up */ |
| ext = firstExt; |
| while (ext != NULL) { |
| ExtensionListEntry *tmpExt = ext->next; |
| uprv_free(ext); |
| ext = tmpExt; |
| } |
| |
| attr = firstAttr; |
| while (attr != NULL) { |
| AttributeListEntry *tmpAttr = attr->next; |
| char *pValue = (char *)attr->attribute; |
| uprv_free(pValue); |
| uprv_free(attr); |
| attr = tmpAttr; |
| } |
| |
| uenum_close(keywordEnum); |
| |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| } |
| |
| return u_terminateChars(appendAt, capacity, reslen, status); |
| } |
| |
| /** |
| * Append keywords parsed from LDML extension value |
| * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} |
| * Note: char* buf is used for storing keywords |
| */ |
| static void |
| _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { |
| const char *pTag; /* beginning of current subtag */ |
| const char *pKwds; /* beginning of key-type pairs */ |
| UBool variantExists = *posixVariant; |
| |
| ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ |
| ExtensionListEntry *kwd, *nextKwd; |
| |
| AttributeListEntry *attrFirst = NULL; /* first attribute */ |
| AttributeListEntry *attr, *nextAttr; |
| |
| int32_t len; |
| int32_t bufIdx = 0; |
| |
| char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| int32_t attrBufIdx = 0; |
| |
| /* Reset the posixVariant value */ |
| *posixVariant = FALSE; |
| |
| pTag = ldmlext; |
| pKwds = NULL; |
| |
| /* Iterate through u extension attributes */ |
| while (*pTag) { |
| /* locate next separator char */ |
| for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); |
| |
| if (ultag_isUnicodeLocaleKey(pTag, len)) { |
| pKwds = pTag; |
| break; |
| } |
| |
| /* add this attribute to the list */ |
| attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); |
| if (attr == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| goto cleanup; |
| } |
| |
| if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { |
| uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); |
| attrBuf[attrBufIdx + len] = 0; |
| attr->attribute = &attrBuf[attrBufIdx]; |
| attrBufIdx += (len + 1); |
| } else { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| goto cleanup; |
| } |
| |
| if (!_addAttributeToList(&attrFirst, attr)) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| uprv_free(attr); |
| goto cleanup; |
| } |
| |
| /* next tag */ |
| pTag += len; |
| if (*pTag) { |
| /* next to the separator */ |
| pTag++; |
| } |
| } |
| |
| if (attrFirst) { |
| /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ |
| |
| if (attrBufIdx > bufSize) { |
| /* attrBufIdx == <total length of attribute subtag> + 1 */ |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| goto cleanup; |
| } |
| |
| kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
| if (kwd == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| goto cleanup; |
| } |
| |
| kwd->key = LOCALE_ATTRIBUTE_KEY; |
| kwd->value = buf; |
| |
| /* attribute subtags sorted in alphabetical order as type */ |
| attr = attrFirst; |
| while (attr != NULL) { |
| nextAttr = attr->next; |
| |
| /* buffer size check is done above */ |
| if (attr != attrFirst) { |
| *(buf + bufIdx) = SEP; |
| bufIdx++; |
| } |
| |
| len = uprv_strlen(attr->attribute); |
| uprv_memcpy(buf + bufIdx, attr->attribute, len); |
| bufIdx += len; |
| |
| attr = nextAttr; |
| } |
| *(buf + bufIdx) = 0; |
| bufIdx++; |
| |
| if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| uprv_free(kwd); |
| goto cleanup; |
| } |
| |
| /* once keyword entry is created, delete the attribute list */ |
| attr = attrFirst; |
| while (attr != NULL) { |
| nextAttr = attr->next; |
| uprv_free(attr); |
| attr = nextAttr; |
| } |
| attrFirst = NULL; |
| } |
| |
| if (pKwds) { |
| const char *pBcpKey = NULL; /* u extenstion key subtag */ |
| const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ |
| int32_t bcpKeyLen = 0; |
| int32_t bcpTypeLen = 0; |
| UBool isDone = FALSE; |
| |
| pTag = pKwds; |
| /* BCP47 representation of LDML key/type pairs */ |
| while (!isDone) { |
| const char *pNextBcpKey = NULL; |
| int32_t nextBcpKeyLen = 0; |
| UBool emitKeyword = FALSE; |
| |
| if (*pTag) { |
| /* locate next separator char */ |
| for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); |
| |
| if (ultag_isUnicodeLocaleKey(pTag, len)) { |
| if (pBcpKey) { |
| emitKeyword = TRUE; |
| pNextBcpKey = pTag; |
| nextBcpKeyLen = len; |
| } else { |
| pBcpKey = pTag; |
| bcpKeyLen = len; |
| } |
| } else { |
| U_ASSERT(pBcpKey != NULL); |
| /* within LDML type subtags */ |
| if (pBcpType) { |
| bcpTypeLen += (len + 1); |
| } else { |
| pBcpType = pTag; |
| bcpTypeLen = len; |
| } |
| } |
| |
| /* next tag */ |
| pTag += len; |
| if (*pTag) { |
| /* next to the separator */ |
| pTag++; |
| } |
| } else { |
| /* processing last one */ |
| emitKeyword = TRUE; |
| isDone = TRUE; |
| } |
| |
| if (emitKeyword) { |
| const char *pKey = NULL; /* LDML key */ |
| const char *pType = NULL; /* LDML type */ |
| |
| char bcpKeyBuf[9]; /* BCP key length is always 2 for now */ |
| |
| U_ASSERT(pBcpKey != NULL); |
| |
| if (bcpKeyLen >= sizeof(bcpKeyBuf)) { |
| /* the BCP key is invalid */ |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| goto cleanup; |
| } |
| |
| uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); |
| bcpKeyBuf[bcpKeyLen] = 0; |
| |
| /* u extension key to LDML key */ |
| pKey = uloc_toLegacyKey(bcpKeyBuf); |
| if (pKey == NULL) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| goto cleanup; |
| } |
| if (pKey == bcpKeyBuf) { |
| /* |
| The key returned by toLegacyKey points to the input buffer. |
| We normalize the result key to lower case. |
| */ |
| T_CString_toLowerCase(bcpKeyBuf); |
| if (bufSize - bufIdx - 1 >= bcpKeyLen) { |
| uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen); |
| pKey = buf + bufIdx; |
| bufIdx += bcpKeyLen; |
| *(buf + bufIdx) = 0; |
| bufIdx++; |
| } else { |
| *status = U_BUFFER_OVERFLOW_ERROR; |
| goto cleanup; |
| } |
| } |
| |
| if (pBcpType) { |
| char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ |
| if (bcpTypeLen >= sizeof(bcpTypeBuf)) { |
| /* the BCP type is too long */ |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| goto cleanup; |
| } |
| |
| uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); |
| bcpTypeBuf[bcpTypeLen] = 0; |
| |
| /* BCP type to locale type */ |
| pType = uloc_toLegacyType(pKey, bcpTypeBuf); |
| if (pType == NULL) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| goto cleanup; |
| } |
| if (pType == bcpTypeBuf) { |
| /* |
| The type returned by toLegacyType points to the input buffer. |
| We normalize the result type to lower case. |
| */ |
| /* normalize to lower case */ |
| T_CString_toLowerCase(bcpTypeBuf); |
| if (bufSize - bufIdx - 1 >= bcpTypeLen) { |
| uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen); |
| pType = buf + bufIdx; |
| bufIdx += bcpTypeLen; |
| *(buf + bufIdx) = 0; |
| bufIdx++; |
| } else { |
| *status = U_BUFFER_OVERFLOW_ERROR; |
| goto cleanup; |
| } |
| } |
| } else { |
| /* typeless - default type value is "yes" */ |
| pType = LOCALE_TYPE_YES; |
| } |
| |
| /* Special handling for u-va-posix, since we want to treat this as a variant, |
| not as a keyword */ |
| if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { |
| *posixVariant = TRUE; |
| } else { |
| /* create an ExtensionListEntry for this keyword */ |
| kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
| if (kwd == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| goto cleanup; |
| } |
| |
| kwd->key = pKey; |
| kwd->value = pType; |
| |
| if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| uprv_free(kwd); |
| goto cleanup; |
| } |
| } |
| |
| pBcpKey = pNextBcpKey; |
| bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; |
| pBcpType = NULL; |
| bcpTypeLen = 0; |
| } |
| } |
| } |
| |
| kwd = kwdFirst; |
| while (kwd != NULL) { |
| nextKwd = kwd->next; |
| _addExtensionToList(appendTo, kwd, FALSE); |
| kwd = nextKwd; |
| } |
| |
| return; |
| |
| cleanup: |
| attr = attrFirst; |
| while (attr != NULL) { |
| nextAttr = attr->next; |
| uprv_free(attr); |
| attr = nextAttr; |
| } |
| |
| kwd = kwdFirst; |
| while (kwd != NULL) { |
| nextKwd = kwd->next; |
| uprv_free(kwd); |
| kwd = nextKwd; |
| } |
| } |
| |
| |
| static int32_t |
| _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { |
| int32_t reslen = 0; |
| int32_t i, n; |
| int32_t len; |
| ExtensionListEntry *kwdFirst = NULL; |
| ExtensionListEntry *kwd; |
| const char *key, *type; |
| char *kwdBuf = NULL; |
| int32_t kwdBufLength = capacity; |
| UBool posixVariant = FALSE; |
| |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| |
| kwdBuf = (char*)uprv_malloc(kwdBufLength); |
| if (kwdBuf == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| return 0; |
| } |
| |
| /* Determine if variants already exists */ |
| if (ultag_getVariantsSize(langtag)) { |
| posixVariant = TRUE; |
| } |
| |
| n = ultag_getExtensionsSize(langtag); |
| |
| /* resolve locale keywords and reordering keys */ |
| for (i = 0; i < n; i++) { |
| key = ultag_getExtensionKey(langtag, i); |
| type = ultag_getExtensionValue(langtag, i); |
| if (*key == LDMLEXT) { |
| _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); |
| if (U_FAILURE(*status)) { |
| break; |
| } |
| } else { |
| kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
| if (kwd == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| break; |
| } |
| kwd->key = key; |
| kwd->value = type; |
| if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
| uprv_free(kwd); |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } |
| } |
| } |
| |
| if (U_SUCCESS(*status)) { |
| type = ultag_getPrivateUse(langtag); |
| if ((int32_t)uprv_strlen(type) > 0) { |
| /* add private use as a keyword */ |
| kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
| if (kwd == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| } else { |
| kwd->key = PRIVATEUSE_KEY; |
| kwd->value = type; |
| if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
| uprv_free(kwd); |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| } |
| } |
| } |
| |
| /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ |
| |
| if (U_SUCCESS(*status) && posixVariant) { |
| len = (int32_t) uprv_strlen(_POSIX); |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| } |
| |
| if (U_SUCCESS(*status) && kwdFirst != NULL) { |
| /* write out the sorted keywords */ |
| UBool firstValue = TRUE; |
| kwd = kwdFirst; |
| do { |
| if (reslen < capacity) { |
| if (firstValue) { |
| /* '@' */ |
| *(appendAt + reslen) = LOCALE_EXT_SEP; |
| firstValue = FALSE; |
| } else { |
| /* ';' */ |
| *(appendAt + reslen) = LOCALE_KEYWORD_SEP; |
| } |
| } |
| reslen++; |
| |
| /* key */ |
| len = (int32_t)uprv_strlen(kwd->key); |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| |
| /* '=' */ |
| if (reslen < capacity) { |
| *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; |
| } |
| reslen++; |
| |
| /* type */ |
| len = (int32_t)uprv_strlen(kwd->value); |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| |
| kwd = kwd->next; |
| } while (kwd); |
| } |
| |
| /* clean up */ |
| kwd = kwdFirst; |
| while (kwd != NULL) { |
| ExtensionListEntry *tmpKwd = kwd->next; |
| uprv_free(kwd); |
| kwd = tmpKwd; |
| } |
| |
| uprv_free(kwdBuf); |
| |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| |
| return u_terminateChars(appendAt, capacity, reslen, status); |
| } |
| |
| static int32_t |
| _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { |
| char buf[ULOC_FULLNAME_CAPACITY]; |
| char tmpAppend[ULOC_FULLNAME_CAPACITY]; |
| UErrorCode tmpStatus = U_ZERO_ERROR; |
| int32_t len, i; |
| int32_t reslen = 0; |
| |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| |
| len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); |
| if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
| if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| return 0; |
| } |
| |
| if (len > 0) { |
| char *p, *pPriv; |
| UBool bNext = TRUE; |
| UBool firstValue = TRUE; |
| UBool writeValue; |
| |
| pPriv = NULL; |
| p = buf; |
| while (bNext) { |
| writeValue = FALSE; |
| if (*p == SEP || *p == LOCALE_SEP || *p == 0) { |
| if (*p == 0) { |
| bNext = FALSE; |
| } else { |
| *p = 0; /* terminate */ |
| } |
| if (pPriv != NULL) { |
| /* Private use in the canonical format is lowercase in BCP47 */ |
| for (i = 0; *(pPriv + i) != 0; i++) { |
| *(pPriv + i) = uprv_tolower(*(pPriv + i)); |
| } |
| |
| /* validate */ |
| if (_isPrivateuseValueSubtag(pPriv, -1)) { |
| if (firstValue) { |
| if (!_isVariantSubtag(pPriv, -1)) { |
| writeValue = TRUE; |
| } |
| } else { |
| writeValue = TRUE; |
| } |
| } else if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| break; |
| } else { |
| break; |
| } |
| |
| if (writeValue) { |
| if (reslen < capacity) { |
| tmpAppend[reslen++] = SEP; |
| } |
| |
| if (firstValue) { |
| if (reslen < capacity) { |
| tmpAppend[reslen++] = *PRIVATEUSE_KEY; |
| } |
| |
| if (reslen < capacity) { |
| tmpAppend[reslen++] = SEP; |
| } |
| |
| len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); |
| if (reslen < capacity) { |
| uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| |
| if (reslen < capacity) { |
| tmpAppend[reslen++] = SEP; |
| } |
| |
| firstValue = FALSE; |
| } |
| |
| len = (int32_t)uprv_strlen(pPriv); |
| if (reslen < capacity) { |
| uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); |
| } |
| reslen += len; |
| } |
| } |
| /* reset private use starting position */ |
| pPriv = NULL; |
| } else if (pPriv == NULL) { |
| pPriv = p; |
| } |
| p++; |
| } |
| |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| } |
| |
| if (U_SUCCESS(*status)) { |
| len = reslen; |
| if (reslen < capacity) { |
| uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); |
| } |
| } |
| |
| u_terminateChars(appendAt, capacity, reslen, status); |
| |
| return reslen; |
| } |
| |
| /* |
| * ------------------------------------------------- |
| * |
| * ultag_ functions |
| * |
| * ------------------------------------------------- |
| */ |
| |
| /* Bit flags used by the parser */ |
| #define LANG 0x0001 |
| #define EXTL 0x0002 |
| #define SCRT 0x0004 |
| #define REGN 0x0008 |
| #define VART 0x0010 |
| #define EXTS 0x0020 |
| #define EXTV 0x0040 |
| #define PRIV 0x0080 |
| |
| static ULanguageTag* |
| ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { |
| ULanguageTag *t; |
| char *tagBuf; |
| int16_t next; |
| char *pSubtag, *pNext, *pLastGoodPosition; |
| int32_t subtagLen; |
| int32_t extlangIdx; |
| ExtensionListEntry *pExtension; |
| char *pExtValueSubtag, *pExtValueSubtagEnd; |
| int32_t i; |
| UBool privateuseVar = FALSE; |
| int32_t grandfatheredLen = 0; |
| |
| if (parsedLen != NULL) { |
| *parsedLen = 0; |
| } |
| |
| if (U_FAILURE(*status)) { |
| return NULL; |
| } |
| |
| if (tagLen < 0) { |
| tagLen = (int32_t)uprv_strlen(tag); |
| } |
| |
| /* copy the entire string */ |
| tagBuf = (char*)uprv_malloc(tagLen + 1); |
| if (tagBuf == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| uprv_memcpy(tagBuf, tag, tagLen); |
| *(tagBuf + tagLen) = 0; |
| |
| /* create a ULanguageTag */ |
| t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); |
| if (t == NULL) { |
| uprv_free(tagBuf); |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| _initializeULanguageTag(t); |
| t->buf = tagBuf; |
| |
| if (tagLen < MINLEN) { |
| /* the input tag is too short - return empty ULanguageTag */ |
| return t; |
| } |
| |
| /* check if the tag is grandfathered */ |
| for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { |
| if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { |
| int32_t newTagLength; |
| |
| grandfatheredLen = tagLen; /* back up for output parsedLen */ |
| newTagLength = uprv_strlen(GRANDFATHERED[i+1]); |
| if (tagLen < newTagLength) { |
| uprv_free(tagBuf); |
| tagBuf = (char*)uprv_malloc(newTagLength + 1); |
| if (tagBuf == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| ultag_close(t); |
| return NULL; |
| } |
| t->buf = tagBuf; |
| tagLen = newTagLength; |
| } |
| uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); |
| break; |
| } |
| } |
| |
| /* |
| * langtag = language |
| * ["-" script] |
| * ["-" region] |
| * *("-" variant) |
| * *("-" extension) |
| * ["-" privateuse] |
| */ |
| |
| next = LANG | PRIV; |
| pNext = pLastGoodPosition = tagBuf; |
| extlangIdx = 0; |
| pExtension = NULL; |
| pExtValueSubtag = NULL; |
| pExtValueSubtagEnd = NULL; |
| |
| while (pNext) { |
| char *pSep; |
| |
| pSubtag = pNext; |
| |
| /* locate next separator char */ |
| pSep = pSubtag; |
| while (*pSep) { |
| if (*pSep == SEP) { |
| break; |
| } |
| pSep++; |
| } |
| if (*pSep == 0) { |
| /* last subtag */ |
| pNext = NULL; |
| } else { |
| pNext = pSep + 1; |
| } |
| subtagLen = (int32_t)(pSep - pSubtag); |
| |
| if (next & LANG) { |
| if (_isLanguageSubtag(pSubtag, subtagLen)) { |
| *pSep = 0; /* terminate */ |
| t->language = T_CString_toLowerCase(pSubtag); |
| |
| pLastGoodPosition = pSep; |
| next = EXTL | SCRT | REGN | VART | EXTS | PRIV; |
| continue; |
| } |
| } |
| if (next & EXTL) { |
| if (_isExtlangSubtag(pSubtag, subtagLen)) { |
| *pSep = 0; |
| t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); |
| |
| pLastGoodPosition = pSep; |
| if (extlangIdx < 3) { |
| next = EXTL | SCRT | REGN | VART | EXTS | PRIV; |
| } else { |
| next = SCRT | REGN | VART | EXTS | PRIV; |
| } |
| continue; |
| } |
| } |
| if (next & SCRT) { |
| if (_isScriptSubtag(pSubtag, subtagLen)) { |
| char *p = pSubtag; |
| |
| *pSep = 0; |
| |
| /* to title case */ |
| *p = uprv_toupper(*p); |
| p++; |
| for (; *p; p++) { |
| *p = uprv_tolower(*p); |
| } |
| |
| t->script = pSubtag; |
| |
| pLastGoodPosition = pSep; |
| next = REGN | VART | EXTS | PRIV; |
| continue; |
| } |
| } |
| if (next & REGN) { |
| if (_isRegionSubtag(pSubtag, subtagLen)) { |
| *pSep = 0; |
| t->region = T_CString_toUpperCase(pSubtag); |
| |
| pLastGoodPosition = pSep; |
| next = VART | EXTS | PRIV; |
| continue; |
| } |
| } |
| if (next & VART) { |
| if (_isVariantSubtag(pSubtag, subtagLen) || |
| (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { |
| VariantListEntry *var; |
| UBool isAdded; |
| |
| var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); |
| if (var == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| goto error; |
| } |
| *pSep = 0; |
| var->variant = T_CString_toUpperCase(pSubtag); |
| isAdded = _addVariantToList(&(t->variants), var); |
| if (!isAdded) { |
| /* duplicated variant entry */ |
| uprv_free(var); |
| break; |
| } |
| pLastGoodPosition = pSep; |
| next = VART | EXTS | PRIV; |
| continue; |
| } |
| } |
| if (next & EXTS) { |
| if (_isExtensionSingleton(pSubtag, subtagLen)) { |
| if (pExtension != NULL) { |
| if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { |
| /* the previous extension is incomplete */ |
| uprv_free(pExtension); |
| pExtension = NULL; |
| break; |
| } |
| |
| /* terminate the previous extension value */ |
| *pExtValueSubtagEnd = 0; |
| pExtension->value = T_CString_toLowerCase(pExtValueSubtag); |
| |
| /* insert the extension to the list */ |
| if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { |
| pLastGoodPosition = pExtValueSubtagEnd; |
| } else { |
| /* stop parsing here */ |
| uprv_free(pExtension); |
| pExtension = NULL; |
| break; |
| } |
| } |
| |
| /* create a new extension */ |
| pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
| if (pExtension == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| goto error; |
| } |
| *pSep = 0; |
| pExtension->key = T_CString_toLowerCase(pSubtag); |
| pExtension->value = NULL; /* will be set later */ |
| |
| /* |
| * reset the start and the end location of extension value |
| * subtags for this extension |
| */ |
| pExtValueSubtag = NULL; |
| pExtValueSubtagEnd = NULL; |
| |
| next = EXTV; |
| continue; |
| } |
| } |
| if (next & EXTV) { |
| if (_isExtensionSubtag(pSubtag, subtagLen)) { |
| if (pExtValueSubtag == NULL) { |
| /* if the start postion of this extension's value is not yet, |
| this one is the first value subtag */ |
| pExtValueSubtag = pSubtag; |
| } |
| |
| /* Mark the end of this subtag */ |
| pExtValueSubtagEnd = pSep; |
| next = EXTS | EXTV | PRIV; |
| |
| continue; |
| } |
| } |
| if (next & PRIV) { |
| if (uprv_tolower(*pSubtag) == PRIVATEUSE) { |
| char *pPrivuseVal; |
| |
| if (pExtension != NULL) { |
| /* Process the last extension */ |
| if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { |
| /* the previous extension is incomplete */ |
| uprv_free(pExtension); |
| pExtension = NULL; |
| break; |
| } else { |
| /* terminate the previous extension value */ |
| *pExtValueSubtagEnd = 0; |
| pExtension->value = T_CString_toLowerCase(pExtValueSubtag); |
| |
| /* insert the extension to the list */ |
| if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { |
| pLastGoodPosition = pExtValueSubtagEnd; |
| pExtension = NULL; |
| } else { |
| /* stop parsing here */ |
| uprv_free(pExtension); |
| pExtension = NULL; |
| break; |
| } |
| } |
| } |
| |
| /* The rest of part will be private use value subtags */ |
| if (pNext == NULL) { |
| /* empty private use subtag */ |
| break; |
| } |
| /* back up the private use value start position */ |
| pPrivuseVal = pNext; |
| |
| /* validate private use value subtags */ |
| while (pNext) { |
| pSubtag = pNext; |
| pSep = pSubtag; |
| while (*pSep) { |
| if (*pSep == SEP) { |
| break; |
| } |
| pSep++; |
| } |
| if (*pSep == 0) { |
| /* last subtag */ |
| pNext = NULL; |
| } else { |
| pNext = pSep + 1; |
| } |
| subtagLen = (int32_t)(pSep - pSubtag); |
| |
| if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { |
| *pSep = 0; |
| next = VART; |
| privateuseVar = TRUE; |
| break; |
| } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { |
| pLastGoodPosition = pSep; |
| } else { |
| break; |
| } |
| } |
| |
| if (next == VART) { |
| continue; |
| } |
| |
| if (pLastGoodPosition - pPrivuseVal > 0) { |
| *pLastGoodPosition = 0; |
| t->privateuse = T_CString_toLowerCase(pPrivuseVal); |
| } |
| /* No more subtags, exiting the parse loop */ |
| break; |
| } |
| break; |
| } |
| |
| /* If we fell through here, it means this subtag is illegal - quit parsing */ |
| break; |
| } |
| |
| if (pExtension != NULL) { |
| /* Process the last extension */ |
| if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { |
| /* the previous extension is incomplete */ |
| uprv_free(pExtension); |
| } else { |
| /* terminate the previous extension value */ |
| *pExtValueSubtagEnd = 0; |
| pExtension->value = T_CString_toLowerCase(pExtValueSubtag); |
| /* insert the extension to the list */ |
| if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { |
| pLastGoodPosition = pExtValueSubtagEnd; |
| } else { |
| uprv_free(pExtension); |
| } |
| } |
| } |
| |
| if (parsedLen != NULL) { |
| *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); |
| } |
| |
| return t; |
| |
| error: |
| ultag_close(t); |
| return NULL; |
| } |
| |
| static void |
| ultag_close(ULanguageTag* langtag) { |
| |
| if (langtag == NULL) { |
| return; |
| } |
| |
| uprv_free(langtag->buf); |
| |
| if (langtag->variants) { |
| VariantListEntry *curVar = langtag->variants; |
| while (curVar) { |
| VariantListEntry *nextVar = curVar->next; |
| uprv_free(curVar); |
| curVar = nextVar; |
| } |
| } |
| |
| if (langtag->extensions) { |
| ExtensionListEntry *curExt = langtag->extensions; |
| while (curExt) { |
| ExtensionListEntry *nextExt = curExt->next; |
| uprv_free(curExt); |
| curExt = nextExt; |
| } |
| } |
| |
| uprv_free(langtag); |
| } |
| |
| static const char* |
| ultag_getLanguage(const ULanguageTag* langtag) { |
| return langtag->language; |
| } |
| |
| #if 0 |
| static const char* |
| ultag_getJDKLanguage(const ULanguageTag* langtag) { |
| int32_t i; |
| for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { |
| if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { |
| return DEPRECATEDLANGS[i + 1]; |
| } |
| } |
| return langtag->language; |
| } |
| #endif |
| |
| static const char* |
| ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { |
| if (idx >= 0 && idx < MAXEXTLANG) { |
| return langtag->extlang[idx]; |
| } |
| return NULL; |
| } |
| |
| static int32_t |
| ultag_getExtlangSize(const ULanguageTag* langtag) { |
| int32_t size = 0; |
| int32_t i; |
| for (i = 0; i < MAXEXTLANG; i++) { |
| if (langtag->extlang[i]) { |
| size++; |
| } |
| } |
| return size; |
| } |
| |
| static const char* |
| ultag_getScript(const ULanguageTag* langtag) { |
| return langtag->script; |
| } |
| |
| static const char* |
| ultag_getRegion(const ULanguageTag* langtag) { |
| return langtag->region; |
| } |
| |
| static const char* |
| ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { |
| const char *var = NULL; |
| VariantListEntry *cur = langtag->variants; |
| int32_t i = 0; |
| while (cur) { |
| if (i == idx) { |
| var = cur->variant; |
| break; |
| } |
| cur = cur->next; |
| i++; |
| } |
| return var; |
| } |
| |
| static int32_t |
| ultag_getVariantsSize(const ULanguageTag* langtag) { |
| int32_t size = 0; |
| VariantListEntry *cur = langtag->variants; |
| while (TRUE) { |
| if (cur == NULL) { |
| break; |
| } |
| size++; |
| cur = cur->next; |
| } |
| return size; |
| } |
| |
| static const char* |
| ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { |
| const char *key = NULL; |
| ExtensionListEntry *cur = langtag->extensions; |
| int32_t i = 0; |
| while (cur) { |
| if (i == idx) { |
| key = cur->key; |
| break; |
| } |
| cur = cur->next; |
| i++; |
| } |
| return key; |
| } |
| |
| static const char* |
| ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { |
| const char *val = NULL; |
| ExtensionListEntry *cur = langtag->extensions; |
| int32_t i = 0; |
| while (cur) { |
| if (i == idx) { |
| val = cur->value; |
| break; |
| } |
| cur = cur->next; |
| i++; |
| } |
| return val; |
| } |
| |
| static int32_t |
| ultag_getExtensionsSize(const ULanguageTag* langtag) { |
| int32_t size = 0; |
| ExtensionListEntry *cur = langtag->extensions; |
| while (TRUE) { |
| if (cur == NULL) { |
| break; |
| } |
| size++; |
| cur = cur->next; |
| } |
| return size; |
| } |
| |
| static const char* |
| ultag_getPrivateUse(const ULanguageTag* langtag) { |
| return langtag->privateuse; |
| } |
| |
| #if 0 |
| static const char* |
| ultag_getGrandfathered(const ULanguageTag* langtag) { |
| return langtag->grandfathered; |
| } |
| #endif |
| |
| |
| /* |
| * ------------------------------------------------- |
| * |
| * Locale/BCP47 conversion APIs, exposed as uloc_* |
| * |
| * ------------------------------------------------- |
| */ |
| U_CAPI int32_t U_EXPORT2 |
| uloc_toLanguageTag(const char* localeID, |
| char* langtag, |
| int32_t langtagCapacity, |
| UBool strict, |
| UErrorCode* status) { |
| /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ |
| char canonical[256]; |
| int32_t reslen = 0; |
| UErrorCode tmpStatus = U_ZERO_ERROR; |
| UBool hadPosix = FALSE; |
| const char* pKeywordStart; |
| |
| /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ |
| canonical[0] = 0; |
| if (uprv_strlen(localeID) > 0) { |
| uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); |
| if (tmpStatus != U_ZERO_ERROR) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| } |
| |
| /* For handling special case - private use only tag */ |
| pKeywordStart = locale_getKeywordsStart(canonical); |
| if (pKeywordStart == canonical) { |
| UEnumeration *kwdEnum; |
| int kwdCnt = 0; |
| UBool done = FALSE; |
| |
| kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); |
| if (kwdEnum != NULL) { |
| kwdCnt = uenum_count(kwdEnum, &tmpStatus); |
| if (kwdCnt == 1) { |
| const char *key; |
| int32_t len = 0; |
| |
| key = uenum_next(kwdEnum, &len, &tmpStatus); |
| if (len == 1 && *key == PRIVATEUSE) { |
| char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| buf[0] = PRIVATEUSE; |
| buf[1] = SEP; |
| len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); |
| if (U_SUCCESS(tmpStatus)) { |
| if (_isPrivateuseValueSubtags(&buf[2], len)) { |
| /* return private use only tag */ |
| reslen = len + 2; |
| uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); |
| u_terminateChars(langtag, langtagCapacity, reslen, status); |
| done = TRUE; |
| } else if (strict) { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| done = TRUE; |
| } |
| /* if not strict mode, then "und" will be returned */ |
| } else { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| done = TRUE; |
| } |
| } |
| } |
| uenum_close(kwdEnum); |
| if (done) { |
| return reslen; |
| } |
| } |
| } |
| |
| reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); |
| reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); |
| reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); |
| reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); |
| reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); |
| reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); |
| |
| return reslen; |
| } |
| |
| |
| U_CAPI int32_t U_EXPORT2 |
| uloc_forLanguageTag(const char* langtag, |
| char* localeID, |
| int32_t localeIDCapacity, |
| int32_t* parsedLength, |
| UErrorCode* status) { |
| ULanguageTag *lt; |
| int32_t reslen = 0; |
| const char *subtag, *p; |
| int32_t len; |
| int32_t i, n; |
| UBool noRegion = TRUE; |
| |
| lt = ultag_parse(langtag, -1, parsedLength, status); |
| if (U_FAILURE(*status)) { |
| return 0; |
| } |
| |
| /* language */ |
| subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); |
| if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { |
| len = (int32_t)uprv_strlen(subtag); |
| if (len > 0) { |
| if (reslen < localeIDCapacity) { |
| uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); |
| } |
| reslen += len; |
| } |
| } |
| |
| /* script */ |
| subtag = ultag_getScript(lt); |
| len = (int32_t)uprv_strlen(subtag); |
| if (len > 0) { |
| if (reslen < localeIDCapacity) { |
| *(localeID + reslen) = LOCALE_SEP; |
| } |
| reslen++; |
| |
| /* write out the script in title case */ |
| p = subtag; |
| while (*p) { |
| if (reslen < localeIDCapacity) { |
| if (p == subtag) { |
| *(localeID + reslen) = uprv_toupper(*p); |
| } else { |
| *(localeID + reslen) = *p; |
| } |
| } |
| reslen++; |
| p++; |
| } |
| } |
| |
| /* region */ |
| subtag = ultag_getRegion(lt); |
| len = (int32_t)uprv_strlen(subtag); |
| if (len > 0) { |
| if (reslen < localeIDCapacity) { |
| *(localeID + reslen) = LOCALE_SEP; |
| } |
| reslen++; |
| /* write out the retion in upper case */ |
| p = subtag; |
| while (*p) { |
| if (reslen < localeIDCapacity) { |
| *(localeID + reslen) = uprv_toupper(*p); |
| } |
| reslen++; |
| p++; |
| } |
| noRegion = FALSE; |
| } |
| |
| /* variants */ |
| n = ultag_getVariantsSize(lt); |
| if (n > 0) { |
| if (noRegion) { |
| if (reslen < localeIDCapacity) { |
| *(localeID + reslen) = LOCALE_SEP; |
| } |
| reslen++; |
| } |
| |
| for (i = 0; i < n; i++) { |
| subtag = ultag_getVariant(lt, i); |
| if (reslen < localeIDCapacity) { |
| *(localeID + reslen) = LOCALE_SEP; |
| } |
| reslen++; |
| /* write out the variant in upper case */ |
| p = subtag; |
| while (*p) { |
| if (reslen < localeIDCapacity) { |
| *(localeID + reslen) = uprv_toupper(*p); |
| } |
| reslen++; |
| p++; |
| } |
| } |
| } |
| |
| /* keywords */ |
| n = ultag_getExtensionsSize(lt); |
| subtag = ultag_getPrivateUse(lt); |
| if (n > 0 || uprv_strlen(subtag) > 0) { |
| if (reslen == 0 && n > 0) { |
| /* need a language */ |
| if (reslen < localeIDCapacity) { |
| uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); |
| } |
| reslen += LANG_UND_LEN; |
| } |
| len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); |
| reslen += len; |
| } |
| |
| ultag_close(lt); |
| return u_terminateChars(localeID, localeIDCapacity, reslen, status); |
| } |
| |
| |