| /* |
| * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> |
| * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> |
| * Copyright (C) 2010 Igalia S.L. |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public License |
| * along with this library; see the file COPYING.LIB. If not, write to |
| * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| * Boston, MA 02110-1301, USA. |
| * |
| */ |
| |
| #include "config.h" |
| #include "UnicodeGLib.h" |
| |
| #include <wtf/Vector.h> |
| #include <wtf/unicode/UTF8.h> |
| |
| #define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF) |
| |
| namespace WTF { |
| namespace Unicode { |
| |
| UChar32 foldCase(UChar32 ch) |
| { |
| GOwnPtr<GError> gerror; |
| |
| GOwnPtr<char> utf8char; |
| utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr())); |
| if (gerror) |
| return ch; |
| |
| GOwnPtr<char> utf8caseFolded; |
| utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1)); |
| |
| GOwnPtr<gunichar> ucs4Result; |
| ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0)); |
| |
| return *ucs4Result; |
| } |
| |
| static int getUTF16LengthFromUTF8(const gchar* utf8String, int length) |
| { |
| int utf16Length = 0; |
| const gchar* inputString = utf8String; |
| |
| while ((utf8String + length - inputString > 0) && *inputString) { |
| gunichar character = g_utf8_get_char(inputString); |
| |
| utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1; |
| inputString = g_utf8_next_char(inputString); |
| } |
| |
| return utf16Length; |
| } |
| |
| typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length); |
| |
| static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction) |
| { |
| *error = false; |
| |
| // Allocate a buffer big enough to hold all the characters. |
| Vector<char> buffer(srcLength * 3); |
| char* utf8Target = buffer.data(); |
| const UChar* utf16Source = src; |
| ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true); |
| if (conversionResult != conversionOK) { |
| *error = true; |
| return -1; |
| } |
| buffer.shrink(utf8Target - buffer.data()); |
| |
| GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size())); |
| long utf8ResultLength = strlen(utf8Result.get()); |
| |
| // Calculate the destination buffer size. |
| int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength); |
| if (realLength > resultLength) { |
| *error = true; |
| return realLength; |
| } |
| |
| // Convert the result to UTF-16. |
| UChar* utf16Target = result; |
| const char* utf8Source = utf8Result.get(); |
| bool unusedISAllASCII; |
| conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, &unusedIsAllASCII, true); |
| long utf16ResultLength = utf16Target - result; |
| if (conversionResult != conversionOK) |
| *error = true; |
| |
| return utf16ResultLength <= 0 ? -1 : utf16ResultLength; |
| } |
| int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
| { |
| return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold); |
| } |
| |
| int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
| { |
| return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown); |
| } |
| |
| int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
| { |
| return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup); |
| } |
| |
| Direction direction(UChar32 c) |
| { |
| PangoBidiType type = pango_bidi_type_for_unichar(c); |
| switch (type) { |
| case PANGO_BIDI_TYPE_L: |
| return LeftToRight; |
| case PANGO_BIDI_TYPE_R: |
| return RightToLeft; |
| case PANGO_BIDI_TYPE_AL: |
| return RightToLeftArabic; |
| case PANGO_BIDI_TYPE_LRE: |
| return LeftToRightEmbedding; |
| case PANGO_BIDI_TYPE_RLE: |
| return RightToLeftEmbedding; |
| case PANGO_BIDI_TYPE_LRO: |
| return LeftToRightOverride; |
| case PANGO_BIDI_TYPE_RLO: |
| return RightToLeftOverride; |
| case PANGO_BIDI_TYPE_PDF: |
| return PopDirectionalFormat; |
| case PANGO_BIDI_TYPE_EN: |
| return EuropeanNumber; |
| case PANGO_BIDI_TYPE_AN: |
| return ArabicNumber; |
| case PANGO_BIDI_TYPE_ES: |
| return EuropeanNumberSeparator; |
| case PANGO_BIDI_TYPE_ET: |
| return EuropeanNumberTerminator; |
| case PANGO_BIDI_TYPE_CS: |
| return CommonNumberSeparator; |
| case PANGO_BIDI_TYPE_NSM: |
| return NonSpacingMark; |
| case PANGO_BIDI_TYPE_BN: |
| return BoundaryNeutral; |
| case PANGO_BIDI_TYPE_B: |
| return BlockSeparator; |
| case PANGO_BIDI_TYPE_S: |
| return SegmentSeparator; |
| case PANGO_BIDI_TYPE_WS: |
| return WhiteSpaceNeutral; |
| default: |
| return OtherNeutral; |
| } |
| } |
| |
| int umemcasecmp(const UChar* a, const UChar* b, int len) |
| { |
| GOwnPtr<char> utf8a; |
| GOwnPtr<char> utf8b; |
| |
| utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0)); |
| utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0)); |
| |
| GOwnPtr<char> foldedA; |
| GOwnPtr<char> foldedB; |
| |
| foldedA.set(g_utf8_casefold(utf8a.get(), -1)); |
| foldedB.set(g_utf8_casefold(utf8b.get(), -1)); |
| |
| // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu |
| // from the ICU docs: |
| // "Compare two strings case-insensitively using full case folding. |
| // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))." |
| // |
| // So it looks like we don't need the full g_utf8_collate here, |
| // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes). |
| // As there is no direct equivalent to this icu function in GLib, for now |
| // we'll use g_utf8_collate(): |
| |
| return g_utf8_collate(foldedA.get(), foldedB.get()); |
| } |
| |
| } |
| } |