blob: b7ae09814e884ceee4bbb575db9bc96445c43b25 [file] [log] [blame]
/*
* Copyright 2007 Google Inc. All rights reserved.
* Copyright 2012 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "URLCanonInternal.h"
#include <cstdio>
#include <errno.h>
#include <stdlib.h>
#include <string>
#if USE(WTFURL)
namespace WTF {
namespace URLCanonicalizer {
namespace {
template<typename CharacterType, typename UCHAR>
void doAppendStringOfType(const CharacterType* source, int length, URLCharacterTypes::CharacterTypes type, URLBuffer<char>& output)
{
for (int i = 0; i < length; ++i) {
if (static_cast<UCHAR>(source[i]) >= 0x80) {
// ReadChar will fill the code point with kUnicodeReplacementCharacter
// when the input is invalid, which is what we want.
unsigned codePoint;
readUTFChar(source, &i, length, &codePoint);
AppendUTF8EscapedValue(codePoint, output);
} else {
// Just append the 7-bit character, possibly escaping it.
unsigned char uch = static_cast<unsigned char>(source[i]);
if (!URLCharacterTypes::isCharacterOfType(uch, type))
appendURLEscapedCharacter(uch, output);
else
output.append(uch);
}
}
}
// This function assumes the input values are all contained in 8-bit,
// although it allows any type. Returns true if input is valid, false if not.
template<typename CharacterType, typename UCHAR>
void doAppendInvalidNarrowString(const CharacterType* spec, int begin, int end, URLBuffer<char>& output)
{
for (int i = begin; i < end; ++i) {
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (uch >= 0x80) {
// Handle UTF-8/16 encodings. This call will correctly handle the error
// case by appending the invalid character.
AppendUTF8EscapedChar(spec, &i, end, output);
} else if (uch <= ' ' || uch == 0x7f) {
// This function is for error handling, so we escape all control
// characters and spaces, but not anything else since we lack
// context to do something more specific.
appendURLEscapedCharacter(static_cast<unsigned char>(uch), output);
} else
output.append(static_cast<char>(uch));
}
}
// Overrides one component, see the URLCanonicalizer::Replacements structure for
// what the various combionations of source pointer and component mean.
void doOverrideComponent(const char* overrideSource, const URLComponent& overrideComponent, const char*& destination, URLComponent& destinationComponent)
{
if (overrideSource) {
destination = overrideSource;
destinationComponent = overrideComponent;
}
}
// Similar to doOverrideComponent except that it takes a UTF-16 input and does
// not actually set the output character pointer.
//
// The input is converted to UTF-8 at the end of the given buffer as a temporary
// holding place. The component indentifying the portion of the buffer used in
// the |utf8Buffer| will be specified in |*destinationComponent|.
//
// This will not actually set any |dest| pointer like doOverrideComponent
// does because all of the pointers will point into the |utf8Buffer|, which
// may get resized while we're overriding a subsequent component. Instead, the
// caller should use the beginning of the |utf8Buffer| as the string pointer
// for all components once all overrides have been prepared.
bool PrepareUTF16OverrideComponent(const UChar* overrideSource,
const URLComponent& overrideComponent,
URLBuffer<char>& utf8Buffer,
URLComponent* destinationComponent)
{
bool success = true;
if (overrideSource) {
if (!overrideComponent.isValid()) {
// Non-"valid" component (means delete), so we need to preserve that.
*destinationComponent = URLComponent();
} else {
// Convert to UTF-8.
destinationComponent->setBegin(utf8Buffer.length());
success = ConvertUTF16ToUTF8(&overrideSource[overrideComponent.begin()],
overrideComponent.length(), utf8Buffer);
destinationComponent->setLength(utf8Buffer.length() - destinationComponent->begin());
}
}
return success;
}
} // namespace
const char kCharToHexLookup[8] = {
0, // 0x00 - 0x1f
'0', // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
'A' - 10, // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
'a' - 10, // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
0, // 0x80 - 0x9F
0, // 0xA0 - 0xBF
0, // 0xC0 - 0xDF
0, // 0xE0 - 0xFF
};
const UChar kUnicodeReplacementCharacter = 0xfffd;
void appendStringOfType(const char* source, int length, URLCharacterTypes::CharacterTypes urlComponentType, URLBuffer<char>& output)
{
doAppendStringOfType<char, unsigned char>(source, length, urlComponentType, output);
}
void appendStringOfType(const UChar* source, int length, URLCharacterTypes::CharacterTypes urlComponentType, URLBuffer<char>& output)
{
doAppendStringOfType<UChar, UChar>(source, length, urlComponentType, output);
}
void AppendInvalidNarrowString(const char* spec, int begin, int end, URLBuffer<char>& output)
{
doAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
}
void AppendInvalidNarrowString(const UChar* spec, int begin, int end, URLBuffer<char>& output)
{
doAppendInvalidNarrowString<UChar, UChar>(spec, begin, end, output);
}
bool ConvertUTF16ToUTF8(const UChar* input, int inputLength, URLBuffer<char>& output)
{
bool success = true;
for (int i = 0; i < inputLength; ++i) {
unsigned codePoint;
success &= readUTFChar(input, &i, inputLength, &codePoint);
AppendUTF8Value(codePoint, output);
}
return success;
}
bool ConvertUTF8ToUTF16(const char* input, int inputLength, URLBuffer<UChar>& output)
{
bool success = true;
for (int i = 0; i < inputLength; i++) {
unsigned codePoint;
success &= readUTFChar(input, &i, inputLength, &codePoint);
AppendUTF16Value(codePoint, output);
}
return success;
}
void SetupOverrideComponents(const char* /* base */,
const Replacements<char>& repl,
URLComponentSource<char>* source,
URLSegments* parsed)
{
// Get the source and parsed structures of the things we are replacing.
const URLComponentSource<char>& replSource = repl.sources();
const URLSegments& replParsed = repl.components();
doOverrideComponent(replSource.scheme, replParsed.scheme, source->scheme, parsed->scheme);
doOverrideComponent(replSource.username, replParsed.username, source->username, parsed->username);
doOverrideComponent(replSource.password, replParsed.password, source->password, parsed->password);
// Our host should be empty if not present, so override the default setup.
doOverrideComponent(replSource.host, replParsed.host, source->host, parsed->host);
if (parsed->host.length() == -1)
parsed->host.setLength(0);
doOverrideComponent(replSource.port, replParsed.port, source->port, parsed->port);
doOverrideComponent(replSource.path, replParsed.path, source->path, parsed->path);
doOverrideComponent(replSource.query, replParsed.query, source->query, parsed->query);
doOverrideComponent(replSource.ref, replParsed.fragment, source->ref, parsed->fragment);
}
bool SetupUTF16OverrideComponents(const char* /* base */,
const Replacements<UChar>& repl,
URLBuffer<char>& utf8Buffer,
URLComponentSource<char>* source,
URLSegments* parsed)
{
bool success = true;
// Get the source and parsed structures of the things we are replacing.
const URLComponentSource<UChar>& replSource = repl.sources();
const URLSegments& replParsed = repl.components();
success &= PrepareUTF16OverrideComponent(replSource.scheme, replParsed.scheme,
utf8Buffer, &parsed->scheme);
success &= PrepareUTF16OverrideComponent(replSource.username, replParsed.username,
utf8Buffer, &parsed->username);
success &= PrepareUTF16OverrideComponent(replSource.password, replParsed.password,
utf8Buffer, &parsed->password);
success &= PrepareUTF16OverrideComponent(replSource.host, replParsed.host,
utf8Buffer, &parsed->host);
success &= PrepareUTF16OverrideComponent(replSource.port, replParsed.port,
utf8Buffer, &parsed->port);
success &= PrepareUTF16OverrideComponent(replSource.path, replParsed.path,
utf8Buffer, &parsed->path);
success &= PrepareUTF16OverrideComponent(replSource.query, replParsed.query,
utf8Buffer, &parsed->query);
success &= PrepareUTF16OverrideComponent(replSource.ref, replParsed.fragment,
utf8Buffer, &parsed->fragment);
// PrepareUTF16OverrideComponent will not have set the data pointer since the
// buffer could be resized, invalidating the pointers. We set the data
// pointers for affected components now that the buffer is finalized.
if (replSource.scheme)
source->scheme = utf8Buffer.data();
if (replSource.username)
source->username = utf8Buffer.data();
if (replSource.password)
source->password = utf8Buffer.data();
if (replSource.host)
source->host = utf8Buffer.data();
if (replSource.port)
source->port = utf8Buffer.data();
if (replSource.path)
source->path = utf8Buffer.data();
if (replSource.query)
source->query = utf8Buffer.data();
if (replSource.ref)
source->ref = utf8Buffer.data();
return success;
}
#if !OS(WINDOWS)
int _itoa_s(int value, char* buffer, size_t sizeInChars, int radix)
{
int written;
if (radix == 10)
written = snprintf(buffer, sizeInChars, "%d", value);
else if (radix == 16)
written = snprintf(buffer, sizeInChars, "%x", value);
else
return EINVAL;
if (static_cast<size_t>(written) >= sizeInChars) {
// Output was truncated, or written was negative.
return EINVAL;
}
return 0;
}
int _itow_s(int value, UChar* buffer, size_t sizeInChars, int radix)
{
if (radix != 10)
return EINVAL;
// No more than 12 characters will be required for a 32-bit integer.
// Add an extra byte for the terminating null.
char temp[13];
int written = snprintf(temp, sizeof(temp), "%d", value);
if (static_cast<size_t>(written) >= sizeInChars) {
// Output was truncated, or written was negative.
return EINVAL;
}
for (int i = 0; i < written; ++i)
buffer[i] = static_cast<UChar>(temp[i]);
buffer[written] = '\0';
return 0;
}
#endif // !OS(WINDOWS)
} // namespace URLCanonicalizer
} // namespace WTF
#endif // USE(WTFURL)