src/third_party/WebKit/Source/WTF/wtf/url/src/URLParseFile.cpp - cobalt - Git at Google

 /*
  * Copyright 2007 Google Inc. All rights reserved.
  * Copyright 2012 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  *     * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
  *     * Neither the name of Google Inc. nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "config.h"
 #include "URLParse.h"

 #include "URLFile.h"
 #include "URLParseInternal.h"

 // Interesting IE file:isms...
 //
 //  INPUT                      OUTPUT
 //  =========================  ==============================
 //  file:/foo/bar              file:///foo/bar
 //      The result here seems totally invalid!?!? This isn't UNC.
 //
 //  file:/
 //  file:// or any other number of slashes
 //      IE6 doesn't do anything at all if you click on this link. No error:
 //      nothing. IE6's history system seems to always color this link, so I'm
 //      guessing that it maps internally to the empty URL.
 //
 //  C:\                        file:///C:/
 //      When on a file: URL source page, this link will work. When over HTTP,
 //      the file: URL will appear in the status bar but the link will not work
 //      (security restriction for all file URLs).
 //
 //  file:foo/                  file:foo/     (invalid?!?!?)
 //  file:/foo/                 file:///foo/  (invalid?!?!?)
 //  file://foo/                file://foo/   (UNC to server "foo")
 //  file:///foo/               file:///foo/  (invalid, seems to be a file)
 //  file:////foo/              file://foo/   (UNC to server "foo")
 //      Any more than four slashes is also treated as UNC.
 //
 //  file:C:/                   file://C:/
 //  file:/C:/                  file://C:/
 //      The number of slashes after "file:" don't matter if the thing following
 //      it looks like an absolute drive path. Also, slashes and backslashes are
 //      equally valid here.

 #if USE(WTFURL)

 namespace WTF {

 namespace URLParser {

 namespace {

 // A subcomponent of DoInitFileURL, the input of this function should be a UNC
 // path name, with the index of the first character after the slashes following
 // the scheme given in |afterSlashes|. This will initialize the host, path,
 // query, and ref, and leave the other output components untouched
 // (DoInitFileURL handles these for us).
 template<typename CharacterType>
 void doParseUNC(const CharacterType* spec, int afterSlashes, int specLength, URLSegments& parsed)
 {
     int nextSlash = findNextSlash(spec, afterSlashes, specLength);
     if (nextSlash == specLength) {
         // No additional slash found, as in "file://foo", treat the text as the
         // host with no path (this will end up being UNC to server "foo").
         int hostLength = specLength - afterSlashes;
         if (hostLength)
             parsed.host = URLComponent(afterSlashes, hostLength);
         else
             parsed.host.reset();
         parsed.path.reset();
         return;
     }

 #if OS(WINDOWS)
     // See if we have something that looks like a path following the first
     // component. As in "file://localhost/c:/", we get "c:/" out. We want to
     // treat this as a having no host but the path given. Works on Windows only.
     if (doesBeginWindowsDriveSpec(spec, nextSlash + 1, specLength)) {
         parsed.host.reset();
         parsePathInternal(spec, MakeRange(nextSlash, specLength),
                           &parsed.path, &parsed.query, &parsed.ref);
         return;
     }
 #endif

     // Otherwise, everything up until that first slash we found is the host name,
     // which will end up being the UNC host. For example "file://foo/bar.txt"
     // will get a server name of "foo" and a path of "/bar". Later, on Windows,
     // this should be treated as the filename "\\foo\bar.txt" in proper UNC
     // notation.
     int hostLength = nextSlash - afterSlashes;
     if (hostLength)
         parsed.host = URLComponent::fromRange(afterSlashes, nextSlash);
     else
         parsed.host.reset();
     if (nextSlash < specLength) {
         parsePathInternal(spec, URLComponent::fromRange(nextSlash, specLength),
                           &parsed.path, &parsed.query, &parsed.fragment);
     } else
         parsed.path.reset();
 }

 // A subcomponent of DoParseFileURL, the input should be a local file, with the
 // beginning of the path indicated by the index in |pathBegin|. This will
 // initialize the host, path, query, and ref, and leave the other output
 // components untouched (DoInitFileURL handles these for us).
 template<typename CharacterType>
 void doParseLocalFile(const CharacterType* spec, int pathBegin, int specLength, URLSegments& parsed)
 {
     parsed.host.reset();
     parsePathInternal(spec, URLComponent::fromRange(pathBegin, specLength),
                       &parsed.path, &parsed.query, &parsed.fragment);
 }

 // Backend for the external functions that operates on either char type.
 // We are handed the character after the "file:" at the beginning of the spec.
 // Usually this is a slash, but needn't be; we allow paths like "file:c:\foo".
 template<typename CharacterType>
 void doParseFileURL(const CharacterType* spec, int specLength, URLSegments& parsed)
 {
     ASSERT(specLength >= 0);

     // Get the parts we never use for file URLs out of the way.
     parsed.username.reset();
     parsed.password.reset();
     parsed.port.reset();

     // Many of the code paths don't set these, so it's convenient to just clear
     // them. We'll write them in those cases we need them.
     parsed.query.reset();
     parsed.fragment.reset();

     // Strip leading & trailing spaces and control characters.
     int begin = 0;
     trimURL(spec, begin, specLength);

     // Find the scheme.
     int numSlashes;
     int afterScheme;
     int afterSlashes;
 #if OS(WINDOWS)
     // See how many slashes there are. We want to handle cases like UNC but also
     // "/c:/foo". This is when there is no scheme, so we can allow pages to do
     // links like "c:/foo/bar" or "//foo/bar". This is also called by the
     // relative URL resolver when it determines there is an absolute URL, which
     // may give us input like "/c:/foo".
     numSlashes = countConsecutiveSlashes(spec, begin, specLength);
     afterSlashes = begin + numSlashes;
     if (doesBeginWindowsDriveSpec(spec, afterSlashes, specLength)) {
         // Windows path, don't try to extract the scheme (for example, "c:\foo").
         parsed.scheme.reset();
         afterScheme = afterSlashes;
     } else if (doesBeginUNCPath(spec, begin, specLength, false)) {
         // Windows UNC path: don't try to extract the scheme, but keep the slashes.
         parsed.scheme.reset();
         afterScheme = begin;
     } else
 #endif
     {
         if (ExtractScheme(&spec[begin], specLength - begin, &parsed.scheme)) {
             // Offset the results since we gave ExtractScheme a substring.
             parsed.scheme.moveBy(begin);
             afterScheme = parsed.scheme.end() + 1;
         } else {
             // No scheme found, remember that.
             parsed.scheme.reset();
             afterScheme = begin;
         }
     }

     // Handle empty specs ones that contain only whitespace or control chars,
     // or that are just the scheme (for example "file:").
     if (afterScheme == specLength) {
         parsed.host.reset();
         parsed.path.reset();
         return;
     }

     numSlashes = countConsecutiveSlashes(spec, afterScheme, specLength);

     afterSlashes = afterScheme + numSlashes;
 #if OS(WINDOWS)
     // Check whether the input is a drive again. We checked above for windows
     // drive specs, but that's only at the very beginning to see if we have a
     // scheme at all. This test will be duplicated in that case, but will
     // additionally handle all cases with a real scheme such as "file:///C:/".
     if (!doesBeginWindowsDriveSpec(spec, afterSlashes, specLength) &&  numSlashes != 3) {
         // Anything not beginning with a drive spec ("c:\") on Windows is treated
         // as UNC, with the exception of three slashes which always means a file.
         // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
         doParseUNC(spec, afterSlashes, specLength, parsed);
         return;
     }
 #else
     // file: URL with exactly 2 slashes is considered to have a host component.
     if (numSlashes == 2) {
         doParseUNC(spec, afterSlashes, specLength, parsed);
         return;
     }
 #endif // OS(WINDOWS)

     // Easy and common case, the full path immediately follows the scheme
     // (modulo slashes), as in "file://c:/foo". Just treat everything from
     // there to the end as the path. Empty hosts have 0 length instead of -1.
     // We include the last slash as part of the path if there is one.
     doParseLocalFile(spec,
                      numSlashes > 0 ? afterScheme + numSlashes - 1 : afterScheme,
                      specLength, parsed);
 }

 } // namespace

 void ParseFileURL(const char* url, int urlLength, URLSegments* parsed)
 {
     doParseFileURL(url, urlLength, *parsed);
 }

 void ParseFileURL(const UChar* url, int urlLength, URLSegments* parsed)
 {
     doParseFileURL(url, urlLength, *parsed);
 }

 } // namespace URLParser

 } // namespace WTF

 #endif // USE(WTFURL)
	/*
	* Copyright 2007 Google Inc. All rights reserved.
	* Copyright 2012 Apple Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are
	* met:
	*
	* * Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* * Redistributions in binary form must reproduce the above
	* copyright notice, this list of conditions and the following disclaimer
	* in the documentation and/or other materials provided with the
	* distribution.
	* * Neither the name of Google Inc. nor the names of its
	* contributors may be used to endorse or promote products derived from
	* this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "config.h"
	#include "URLParse.h"

	#include "URLFile.h"
	#include "URLParseInternal.h"

	// Interesting IE file:isms...
	//
	// INPUT OUTPUT
	// ========================= ==============================
	// file:/foo/bar file:///foo/bar
	// The result here seems totally invalid!?!? This isn't UNC.
	//
	// file:/
	// file:// or any other number of slashes
	// IE6 doesn't do anything at all if you click on this link. No error:
	// nothing. IE6's history system seems to always color this link, so I'm
	// guessing that it maps internally to the empty URL.
	//
	// C:\ file:///C:/
	// When on a file: URL source page, this link will work. When over HTTP,
	// the file: URL will appear in the status bar but the link will not work
	// (security restriction for all file URLs).
	//
	// file:foo/ file:foo/ (invalid?!?!?)
	// file:/foo/ file:///foo/ (invalid?!?!?)
	// file://foo/ file://foo/ (UNC to server "foo")
	// file:///foo/ file:///foo/ (invalid, seems to be a file)
	// file:////foo/ file://foo/ (UNC to server "foo")
	// Any more than four slashes is also treated as UNC.
	//
	// file:C:/ file://C:/
	// file:/C:/ file://C:/
	// The number of slashes after "file:" don't matter if the thing following
	// it looks like an absolute drive path. Also, slashes and backslashes are
	// equally valid here.

	#if USE(WTFURL)

	namespace WTF {

	namespace URLParser {

	namespace {

	// A subcomponent of DoInitFileURL, the input of this function should be a UNC
	// path name, with the index of the first character after the slashes following
	// the scheme given in \|afterSlashes\|. This will initialize the host, path,
	// query, and ref, and leave the other output components untouched
	// (DoInitFileURL handles these for us).
	template<typename CharacterType>
	void doParseUNC(const CharacterType* spec, int afterSlashes, int specLength, URLSegments& parsed)
	{
	int nextSlash = findNextSlash(spec, afterSlashes, specLength);
	if (nextSlash == specLength) {
	// No additional slash found, as in "file://foo", treat the text as the
	// host with no path (this will end up being UNC to server "foo").
	int hostLength = specLength - afterSlashes;
	if (hostLength)
	parsed.host = URLComponent(afterSlashes, hostLength);
	else
	parsed.host.reset();
	parsed.path.reset();
	return;
	}

	#if OS(WINDOWS)
	// See if we have something that looks like a path following the first
	// component. As in "file://localhost/c:/", we get "c:/" out. We want to
	// treat this as a having no host but the path given. Works on Windows only.
	if (doesBeginWindowsDriveSpec(spec, nextSlash + 1, specLength)) {
	parsed.host.reset();
	parsePathInternal(spec, MakeRange(nextSlash, specLength),
	&parsed.path, &parsed.query, &parsed.ref);
	return;
	}
	#endif

	// Otherwise, everything up until that first slash we found is the host name,
	// which will end up being the UNC host. For example "file://foo/bar.txt"
	// will get a server name of "foo" and a path of "/bar". Later, on Windows,
	// this should be treated as the filename "\\foo\bar.txt" in proper UNC
	// notation.
	int hostLength = nextSlash - afterSlashes;
	if (hostLength)
	parsed.host = URLComponent::fromRange(afterSlashes, nextSlash);
	else
	parsed.host.reset();
	if (nextSlash < specLength) {
	parsePathInternal(spec, URLComponent::fromRange(nextSlash, specLength),
	&parsed.path, &parsed.query, &parsed.fragment);
	} else
	parsed.path.reset();
	}

	// A subcomponent of DoParseFileURL, the input should be a local file, with the
	// beginning of the path indicated by the index in \|pathBegin\|. This will
	// initialize the host, path, query, and ref, and leave the other output
	// components untouched (DoInitFileURL handles these for us).
	template<typename CharacterType>
	void doParseLocalFile(const CharacterType* spec, int pathBegin, int specLength, URLSegments& parsed)
	{
	parsed.host.reset();
	parsePathInternal(spec, URLComponent::fromRange(pathBegin, specLength),
	&parsed.path, &parsed.query, &parsed.fragment);
	}

	// Backend for the external functions that operates on either char type.
	// We are handed the character after the "file:" at the beginning of the spec.
	// Usually this is a slash, but needn't be; we allow paths like "file:c:\foo".
	template<typename CharacterType>
	void doParseFileURL(const CharacterType* spec, int specLength, URLSegments& parsed)
	{
	ASSERT(specLength >= 0);

	// Get the parts we never use for file URLs out of the way.
	parsed.username.reset();
	parsed.password.reset();
	parsed.port.reset();

	// Many of the code paths don't set these, so it's convenient to just clear
	// them. We'll write them in those cases we need them.
	parsed.query.reset();
	parsed.fragment.reset();

	// Strip leading & trailing spaces and control characters.
	int begin = 0;
	trimURL(spec, begin, specLength);

	// Find the scheme.
	int numSlashes;
	int afterScheme;
	int afterSlashes;
	#if OS(WINDOWS)
	// See how many slashes there are. We want to handle cases like UNC but also
	// "/c:/foo". This is when there is no scheme, so we can allow pages to do
	// links like "c:/foo/bar" or "//foo/bar". This is also called by the
	// relative URL resolver when it determines there is an absolute URL, which
	// may give us input like "/c:/foo".
	numSlashes = countConsecutiveSlashes(spec, begin, specLength);
	afterSlashes = begin + numSlashes;
	if (doesBeginWindowsDriveSpec(spec, afterSlashes, specLength)) {
	// Windows path, don't try to extract the scheme (for example, "c:\foo").
	parsed.scheme.reset();
	afterScheme = afterSlashes;
	} else if (doesBeginUNCPath(spec, begin, specLength, false)) {
	// Windows UNC path: don't try to extract the scheme, but keep the slashes.
	parsed.scheme.reset();
	afterScheme = begin;
	} else
	#endif
	{
	if (ExtractScheme(&spec[begin], specLength - begin, &parsed.scheme)) {
	// Offset the results since we gave ExtractScheme a substring.
	parsed.scheme.moveBy(begin);
	afterScheme = parsed.scheme.end() + 1;
	} else {
	// No scheme found, remember that.
	parsed.scheme.reset();
	afterScheme = begin;
	}
	}

	// Handle empty specs ones that contain only whitespace or control chars,
	// or that are just the scheme (for example "file:").
	if (afterScheme == specLength) {
	parsed.host.reset();
	parsed.path.reset();
	return;
	}

	numSlashes = countConsecutiveSlashes(spec, afterScheme, specLength);

	afterSlashes = afterScheme + numSlashes;
	#if OS(WINDOWS)
	// Check whether the input is a drive again. We checked above for windows
	// drive specs, but that's only at the very beginning to see if we have a
	// scheme at all. This test will be duplicated in that case, but will
	// additionally handle all cases with a real scheme such as "file:///C:/".
	if (!doesBeginWindowsDriveSpec(spec, afterSlashes, specLength) && numSlashes != 3) {
	// Anything not beginning with a drive spec ("c:\") on Windows is treated
	// as UNC, with the exception of three slashes which always means a file.
	// Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
	doParseUNC(spec, afterSlashes, specLength, parsed);
	return;
	}
	#else
	// file: URL with exactly 2 slashes is considered to have a host component.
	if (numSlashes == 2) {
	doParseUNC(spec, afterSlashes, specLength, parsed);
	return;
	}
	#endif // OS(WINDOWS)

	// Easy and common case, the full path immediately follows the scheme
	// (modulo slashes), as in "file://c:/foo". Just treat everything from
	// there to the end as the path. Empty hosts have 0 length instead of -1.
	// We include the last slash as part of the path if there is one.
	doParseLocalFile(spec,
	numSlashes > 0 ? afterScheme + numSlashes - 1 : afterScheme,
	specLength, parsed);
	}

	} // namespace

	void ParseFileURL(const char* url, int urlLength, URLSegments* parsed)
	{
	doParseFileURL(url, urlLength, *parsed);
	}

	void ParseFileURL(const UChar* url, int urlLength, URLSegments* parsed)
	{
	doParseFileURL(url, urlLength, *parsed);
	}

	} // namespace URLParser

	} // namespace WTF

	#endif // USE(WTFURL)