blob: 73a9f6dd1bef6a9c2e3b9172d49d70407fddd22c [file] [log] [blame]
/*
* Copyright 2007 Google Inc. All rights reserved.
* Copyright 2012 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Functions for canonicalizing "file:" URLs.
#include "config.h"
#include "URLCanon.h"
#include "RawURLBuffer.h"
#include "URLCanonInternal.h"
#include "URLFile.h"
#include "URLParseInternal.h"
#if USE(WTFURL)
namespace WTF {
namespace URLCanonicalizer {
namespace {
#if OS(WINDOWS)
// Given a pointer into the spec, this copies and canonicalizes the drive
// letter and colon to the output, if one is found. If there is not a drive
// spec, it won't do anything. The index of the next character in the input
// spec is returned (after the colon when a drive spec is found, the begin
// offset if one is not).
template<typename CHAR>
int FileDoDriveSpec(const CHAR* spec, int begin, int end, URLBuffer<char>& output)
{
// The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
// (with backslashes instead of slashes as well).
int numSlashes = URLParser::countConsecutiveSlashes(spec, begin, end);
int afterSlashes = begin + numSlashes;
if (!URLParser::doesBeginWindowsDriveSpec(spec, afterSlashes, end))
return begin; // Haven't consumed any characters
// A drive spec is the start of a path, so we need to add a slash for the
// authority terminator (typically the third slash).
output.append('/');
// doesBeginWindowsDriveSpec will ensure that the drive letter is valid
// and that it is followed by a colon/pipe.
// Normalize Windows drive letters to uppercase
if (spec[afterSlashes] >= 'a' && spec[afterSlashes] <= 'z')
output.append(spec[afterSlashes] - 'a' + 'A');
else
output.append(static_cast<char>(spec[afterSlashes]));
// Normalize the character following it to a colon rather than pipe.
output.append(':');
return afterSlashes + 2;
}
#endif // OS(WINDOWS)
template<typename CharacterType, typename UCHAR>
bool doFileCanonicalizePath(const CharacterType* spec,
const URLComponent& path,
URLBuffer<char>& output,
URLComponent& outputPath)
{
// Copies and normalizes the "c:" at the beginning, if present.
outputPath.setBegin(output.length());
int afterDrive;
#if OS(WINDOWS)
afterDrive = FileDoDriveSpec(spec, path.begin, path.end(), output);
#else
afterDrive = path.begin();
#endif
// Copies the rest of the path, starting from the slash following the
// drive colon (if any, Windows only), or the first slash of the path.
bool success = true;
if (afterDrive < path.end()) {
// Use the regular path canonicalizer to canonicalize the rest of the
// path. Give it a fake output component to write into. DoCanonicalizeFile
// will compute the full path component.
URLComponent subPath = URLComponent::fromRange(afterDrive, path.end());
URLComponent fakeOutputPath;
success = CanonicalizePath(spec, subPath, output, &fakeOutputPath);
} else {
// No input path, canonicalize to a slash.
output.append('/');
}
outputPath.setLength(output.length() - outputPath.begin());
return success;
}
template<typename CharacterType, typename UCHAR>
bool doCanonicalizeFileURL(const URLComponentSource<CharacterType>& source,
const URLSegments& parsed,
URLQueryCharsetConverter* queryConverter,
URLBuffer<char>& output,
URLSegments& outputParsed)
{
// Things we don't set in file: URLs.
outputParsed.username = URLComponent();
outputParsed.password = URLComponent();
outputParsed.port = URLComponent();
// Scheme (known, so we don't bother running it through the more
// complicated scheme canonicalizer).
outputParsed.scheme.setBegin(output.length());
output.append("file://", 7);
outputParsed.scheme.setLength(4);
// Append the host. For many file URLs, this will be empty. For UNC, this
// will be present.
// TODO(brettw) This doesn't do any checking for host name validity. We
// should probably handle validity checking of UNC hosts differently than
// for regular IP hosts.
bool success = canonicalizeHost(source.host, parsed.host, output, outputParsed.host);
success &= doFileCanonicalizePath<CharacterType, UCHAR>(source.path, parsed.path, output, outputParsed.path);
CanonicalizeQuery(source.query, parsed.query, queryConverter, output, &outputParsed.query);
// Ignore failure for refs since the URL can probably still be loaded.
canonicalizeFragment(source.ref, parsed.fragment, output, outputParsed.fragment);
return success;
}
} // namespace
bool CanonicalizeFileURL(const char* spec,
int /* specLength */,
const URLSegments& parsed,
URLQueryCharsetConverter* queryConverter,
URLBuffer<char>& output,
URLSegments* outputParsed)
{
return doCanonicalizeFileURL<char, unsigned char>(URLComponentSource<char>(spec), parsed, queryConverter, output, *outputParsed);
}
bool CanonicalizeFileURL(const UChar* spec,
int /* specLength */,
const URLSegments& parsed,
URLQueryCharsetConverter* queryConverter,
URLBuffer<char>& output,
URLSegments* outputParsed)
{
return doCanonicalizeFileURL<UChar, UChar>(URLComponentSource<UChar>(spec), parsed, queryConverter, output, *outputParsed);
}
bool FileCanonicalizePath(const char* spec,
const URLComponent& path,
URLBuffer<char>& output,
URLComponent* outputPath)
{
return doFileCanonicalizePath<char, unsigned char>(spec, path, output, *outputPath);
}
bool FileCanonicalizePath(const UChar* spec,
const URLComponent& path,
URLBuffer<char>& output,
URLComponent* outputPath)
{
return doFileCanonicalizePath<UChar, UChar>(spec, path, output, *outputPath);
}
bool ReplaceFileURL(const char* base,
const URLSegments& baseParsed,
const Replacements<char>& replacements,
URLQueryCharsetConverter* queryConverter,
URLBuffer<char>& output,
URLSegments* outputParsed)
{
URLComponentSource<char> source(base);
URLSegments parsed(baseParsed);
SetupOverrideComponents(base, replacements, &source, &parsed);
return doCanonicalizeFileURL<char, unsigned char>(source, parsed, queryConverter, output, *outputParsed);
}
bool ReplaceFileURL(const char* base,
const URLSegments& baseParsed,
const Replacements<UChar>& replacements,
URLQueryCharsetConverter* queryConverter,
URLBuffer<char>& output,
URLSegments* outputParsed)
{
RawURLBuffer<char> utf8;
URLComponentSource<char> source(base);
URLSegments parsed(baseParsed);
SetupUTF16OverrideComponents(base, replacements, utf8, &source, &parsed);
return doCanonicalizeFileURL<char, unsigned char>(source, parsed, queryConverter, output, *outputParsed);
}
} // namespace URLCanonicalizer
} // namespace WTF
#endif // USE(WTFURL)