blob: 5d13c748b3d620addb6b6619519c4d6feda742d7 [file] [log] [blame]
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "jsstr.h"
#include "mozilla/Attributes.h"
#include "mozilla/Casting.h"
#include "mozilla/CheckedInt.h"
#include "mozilla/FloatingPoint.h"
#include "mozilla/PodOperations.h"
#include "mozilla/Range.h"
#include "mozilla/TypeTraits.h"
#include "mozilla/UniquePtr.h"
#include <ctype.h>
#include <string.h>
#include "jsapi.h"
#include "jsarray.h"
#include "jsatom.h"
#include "jsbool.h"
#include "jscntxt.h"
#include "jsgc.h"
#include "jsnum.h"
#include "jsobj.h"
#include "jsopcode.h"
#include "jstypes.h"
#include "jsutil.h"
#include "builtin/Intl.h"
#include "builtin/RegExp.h"
#include "jit/InlinableNatives.h"
#include "js/Conversions.h"
#if ENABLE_INTL_API
#include "unicode/unorm.h"
#endif
#include "vm/GlobalObject.h"
#include "vm/Interpreter.h"
#include "vm/Opcodes.h"
#include "vm/Printer.h"
#include "vm/RegExpObject.h"
#include "vm/RegExpStatics.h"
#include "vm/ScopeObject.h"
#include "vm/StringBuffer.h"
#include "vm/Interpreter-inl.h"
#include "vm/String-inl.h"
#include "vm/StringObject-inl.h"
#include "vm/TypeInference-inl.h"
using namespace js;
using namespace js::gc;
using namespace js::unicode;
using JS::Symbol;
using JS::SymbolCode;
using JS::ToInt32;
using JS::ToUint32;
using mozilla::AssertedCast;
using mozilla::CheckedInt;
using mozilla::IsNaN;
using mozilla::IsNegativeZero;
using mozilla::IsSame;
using mozilla::Move;
using mozilla::PodCopy;
using mozilla::PodEqual;
using mozilla::RangedPtr;
using mozilla::UniquePtr;
using JS::AutoCheckCannotGC;
static JSLinearString*
ArgToRootedString(JSContext* cx, const CallArgs& args, unsigned argno)
{
if (argno >= args.length())
return cx->names().undefined;
JSString* str = ToString<CanGC>(cx, args[argno]);
if (!str)
return nullptr;
args[argno].setString(str);
return str->ensureLinear(cx);
}
/*
* Forward declarations for URI encode/decode and helper routines
*/
static bool
str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
static bool
str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
static bool
str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
static bool
str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
/*
* Global string methods
*/
/* ES5 B.2.1 */
template <typename CharT>
static Latin1Char*
Escape(JSContext* cx, const CharT* chars, uint32_t length, uint32_t* newLengthOut)
{
static const uint8_t shouldPassThrough[128] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
};
/* Take a first pass and see how big the result string will need to be. */
uint32_t newLength = length;
for (size_t i = 0; i < length; i++) {
char16_t ch = chars[i];
if (ch < 128 && shouldPassThrough[ch])
continue;
/* The character will be encoded as %XX or %uXXXX. */
newLength += (ch < 256) ? 2 : 5;
/*
* newlength is incremented by at most 5 on each iteration, so worst
* case newlength == length * 6. This can't overflow.
*/
static_assert(JSString::MAX_LENGTH < UINT32_MAX / 6,
"newlength must not overflow");
}
Latin1Char* newChars = cx->pod_malloc<Latin1Char>(newLength + 1);
if (!newChars)
return nullptr;
static const char digits[] = "0123456789ABCDEF";
size_t i, ni;
for (i = 0, ni = 0; i < length; i++) {
char16_t ch = chars[i];
if (ch < 128 && shouldPassThrough[ch]) {
newChars[ni++] = ch;
} else if (ch < 256) {
newChars[ni++] = '%';
newChars[ni++] = digits[ch >> 4];
newChars[ni++] = digits[ch & 0xF];
} else {
newChars[ni++] = '%';
newChars[ni++] = 'u';
newChars[ni++] = digits[ch >> 12];
newChars[ni++] = digits[(ch & 0xF00) >> 8];
newChars[ni++] = digits[(ch & 0xF0) >> 4];
newChars[ni++] = digits[ch & 0xF];
}
}
MOZ_ASSERT(ni == newLength);
newChars[newLength] = 0;
*newLengthOut = newLength;
return newChars;
}
static bool
str_escape(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
JSLinearString* str = ArgToRootedString(cx, args, 0);
if (!str)
return false;
ScopedJSFreePtr<Latin1Char> newChars;
uint32_t newLength = 0; // initialize to silence GCC warning
if (str->hasLatin1Chars()) {
AutoCheckCannotGC nogc;
newChars = Escape(cx, str->latin1Chars(nogc), str->length(), &newLength);
} else {
AutoCheckCannotGC nogc;
newChars = Escape(cx, str->twoByteChars(nogc), str->length(), &newLength);
}
if (!newChars)
return false;
JSString* res = NewString<CanGC>(cx, newChars.get(), newLength);
if (!res)
return false;
newChars.forget();
args.rval().setString(res);
return true;
}
template <typename CharT>
static inline bool
Unhex4(const RangedPtr<const CharT> chars, char16_t* result)
{
char16_t a = chars[0],
b = chars[1],
c = chars[2],
d = chars[3];
if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
return false;
*result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d);
return true;
}
template <typename CharT>
static inline bool
Unhex2(const RangedPtr<const CharT> chars, char16_t* result)
{
char16_t a = chars[0],
b = chars[1];
if (!(JS7_ISHEX(a) && JS7_ISHEX(b)))
return false;
*result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
return true;
}
template <typename CharT>
static bool
Unescape(StringBuffer& sb, const mozilla::Range<const CharT> chars)
{
/*
* NB: use signed integers for length/index to allow simple length
* comparisons without unsigned-underflow hazards.
*/
static_assert(JSString::MAX_LENGTH <= INT_MAX, "String length must fit in a signed integer");
int length = AssertedCast<int>(chars.length());
/*
* Note that the spec algorithm has been optimized to avoid building
* a string in the case where no escapes are present.
*/
/* Step 4. */
int k = 0;
bool building = false;
/* Step 5. */
while (k < length) {
/* Step 6. */
char16_t c = chars[k];
/* Step 7. */
if (c != '%')
goto step_18;
/* Step 8. */
if (k > length - 6)
goto step_14;
/* Step 9. */
if (chars[k + 1] != 'u')
goto step_14;
#define ENSURE_BUILDING \
do { \
if (!building) { \
building = true; \
if (!sb.reserve(length)) \
return false; \
sb.infallibleAppend(chars.start().get(), k); \
} \
} while(false);
/* Step 10-13. */
if (Unhex4(chars.start() + k + 2, &c)) {
ENSURE_BUILDING;
k += 5;
goto step_18;
}
step_14:
/* Step 14. */
if (k > length - 3)
goto step_18;
/* Step 15-17. */
if (Unhex2(chars.start() + k + 1, &c)) {
ENSURE_BUILDING;
k += 2;
}
step_18:
if (building && !sb.append(c))
return false;
/* Step 19. */
k += 1;
}
return true;
#undef ENSURE_BUILDING
}
/* ES5 B.2.2 */
static bool
str_unescape(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
/* Step 1. */
RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
if (!str)
return false;
/* Step 3. */
StringBuffer sb(cx);
if (str->hasTwoByteChars() && !sb.ensureTwoByteChars())
return false;
if (str->hasLatin1Chars()) {
AutoCheckCannotGC nogc;
if (!Unescape(sb, str->latin1Range(nogc)))
return false;
} else {
AutoCheckCannotGC nogc;
if (!Unescape(sb, str->twoByteRange(nogc)))
return false;
}
JSLinearString* result;
if (!sb.empty()) {
result = sb.finishString();
if (!result)
return false;
} else {
result = str;
}
args.rval().setString(result);
return true;
}
#if JS_HAS_UNEVAL
static bool
str_uneval(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
JSString* str = ValueToSource(cx, args.get(0));
if (!str)
return false;
args.rval().setString(str);
return true;
}
#endif
static const JSFunctionSpec string_functions[] = {
JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING),
JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING),
#if JS_HAS_UNEVAL
JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING),
#endif
JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING),
JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING),
JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1, JSPROP_RESOLVING),
JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1, JSPROP_RESOLVING),
JS_FS_END
};
static const unsigned STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
static bool
str_enumerate(JSContext* cx, HandleObject obj)
{
RootedString str(cx, obj->as<StringObject>().unbox());
RootedValue value(cx);
for (size_t i = 0, length = str->length(); i < length; i++) {
JSString* str1 = NewDependentString(cx, str, i, 1);
if (!str1)
return false;
value.setString(str1);
if (!DefineElement(cx, obj, i, value, nullptr, nullptr,
STRING_ELEMENT_ATTRS | JSPROP_RESOLVING))
{
return false;
}
}
return true;
}
static bool
str_mayResolve(const JSAtomState&, jsid id, JSObject*)
{
// str_resolve ignores non-integer ids.
return JSID_IS_INT(id);
}
static bool
str_resolve(JSContext* cx, HandleObject obj, HandleId id, bool* resolvedp)
{
if (!JSID_IS_INT(id))
return true;
RootedString str(cx, obj->as<StringObject>().unbox());
int32_t slot = JSID_TO_INT(id);
if ((size_t)slot < str->length()) {
JSString* str1 = cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
if (!str1)
return false;
RootedValue value(cx, StringValue(str1));
if (!DefineElement(cx, obj, uint32_t(slot), value, nullptr, nullptr,
STRING_ELEMENT_ATTRS | JSPROP_RESOLVING))
{
return false;
}
*resolvedp = true;
}
return true;
}
const Class StringObject::class_ = {
js_String_str,
JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
JSCLASS_HAS_CACHED_PROTO(JSProto_String),
nullptr, /* addProperty */
nullptr, /* delProperty */
nullptr, /* getProperty */
nullptr, /* setProperty */
str_enumerate,
str_resolve,
str_mayResolve
};
/*
* Returns a JSString * for the |this| value associated with 'call', or throws
* a TypeError if |this| is null or undefined. This algorithm is the same as
* calling CheckObjectCoercible(this), then returning ToString(this), as all
* String.prototype.* methods do (other than toString and valueOf).
*/
static MOZ_ALWAYS_INLINE JSString*
ThisToStringForStringProto(JSContext* cx, CallReceiver call)
{
JS_CHECK_RECURSION(cx, return nullptr);
if (call.thisv().isString())
return call.thisv().toString();
if (call.thisv().isObject()) {
RootedObject obj(cx, &call.thisv().toObject());
if (obj->is<StringObject>()) {
StringObject* nobj = &obj->as<StringObject>();
Rooted<jsid> id(cx, NameToId(cx->names().toString));
if (ClassMethodIsNative(cx, nobj, &StringObject::class_, id, str_toString)) {
JSString* str = nobj->unbox();
call.setThis(StringValue(str));
return str;
}
}
} else if (call.thisv().isNullOrUndefined()) {
JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_CANT_CONVERT_TO,
call.thisv().isNull() ? "null" : "undefined", "object");
return nullptr;
}
JSString* str = ToStringSlow<CanGC>(cx, call.thisv());
if (!str)
return nullptr;
call.setThis(StringValue(str));
return str;
}
MOZ_ALWAYS_INLINE bool
IsString(HandleValue v)
{
return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
}
#if JS_HAS_TOSOURCE
MOZ_ALWAYS_INLINE bool
str_toSource_impl(JSContext* cx, const CallArgs& args)
{
MOZ_ASSERT(IsString(args.thisv()));
Rooted<JSString*> str(cx, ToString<CanGC>(cx, args.thisv()));
if (!str)
return false;
str = QuoteString(cx, str, '"');
if (!str)
return false;
StringBuffer sb(cx);
if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
return false;
str = sb.finishString();
if (!str)
return false;
args.rval().setString(str);
return true;
}
static bool
str_toSource(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
}
#endif /* JS_HAS_TOSOURCE */
MOZ_ALWAYS_INLINE bool
str_toString_impl(JSContext* cx, const CallArgs& args)
{
MOZ_ASSERT(IsString(args.thisv()));
args.rval().setString(args.thisv().isString()
? args.thisv().toString()
: args.thisv().toObject().as<StringObject>().unbox());
return true;
}
bool
js::str_toString(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
}
/*
* Java-like string native methods.
*/
JSString*
js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengthInt)
{
MOZ_ASSERT(0 <= beginInt);
MOZ_ASSERT(0 <= lengthInt);
MOZ_ASSERT(uint32_t(beginInt) <= str->length());
MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
uint32_t begin = beginInt;
uint32_t len = lengthInt;
/*
* Optimization for one level deep ropes.
* This is common for the following pattern:
*
* while() {
* text = text.substr(0, x) + "bla" + text.substr(x)
* test.charCodeAt(x + 1)
* }
*/
if (str->isRope()) {
JSRope* rope = &str->asRope();
/* Substring is totally in leftChild of rope. */
if (begin + len <= rope->leftChild()->length())
return NewDependentString(cx, rope->leftChild(), begin, len);
/* Substring is totally in rightChild of rope. */
if (begin >= rope->leftChild()->length()) {
begin -= rope->leftChild()->length();
return NewDependentString(cx, rope->rightChild(), begin, len);
}
/*
* Requested substring is partly in the left and partly in right child.
* Create a rope of substrings for both childs.
*/
MOZ_ASSERT(begin < rope->leftChild()->length() &&
begin + len > rope->leftChild()->length());
size_t lhsLength = rope->leftChild()->length() - begin;
size_t rhsLength = begin + len - rope->leftChild()->length();
Rooted<JSRope*> ropeRoot(cx, rope);
RootedString lhs(cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
if (!lhs)
return nullptr;
RootedString rhs(cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
if (!rhs)
return nullptr;
return JSRope::new_<CanGC>(cx, lhs, rhs, len);
}
return NewDependentString(cx, str, begin, len);
}
template <typename CharT>
static JSString*
ToLowerCase(JSContext* cx, JSLinearString* str)
{
// Unlike toUpperCase, toLowerCase has the nice invariant that if the input
// is a Latin1 string, the output is also a Latin1 string.
UniquePtr<CharT[], JS::FreePolicy> newChars;
size_t length = str->length();
{
AutoCheckCannotGC nogc;
const CharT* chars = str->chars<CharT>(nogc);
// Look for the first upper case character.
size_t i = 0;
for (; i < length; i++) {
char16_t c = chars[i];
if (unicode::CanLowerCase(c))
break;
}
// If all characters are lower case, return the input string.
if (i == length)
return str;
newChars = cx->make_pod_array<CharT>(length + 1);
if (!newChars)
return nullptr;
PodCopy(newChars.get(), chars, i);
for (; i < length; i++) {
char16_t c = unicode::ToLowerCase(chars[i]);
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
newChars[i] = c;
}
newChars[length] = 0;
}
JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
if (!res)
return nullptr;
newChars.release();
return res;
}
static inline bool
ToLowerCaseHelper(JSContext* cx, CallReceiver call)
{
RootedString str(cx, ThisToStringForStringProto(cx, call));
if (!str)
return false;
JSLinearString* linear = str->ensureLinear(cx);
if (!linear)
return false;
if (linear->hasLatin1Chars())
str = ToLowerCase<Latin1Char>(cx, linear);
else
str = ToLowerCase<char16_t>(cx, linear);
if (!str)
return false;
call.rval().setString(str);
return true;
}
bool
js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp)
{
return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
}
static bool
str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
/*
* Forcefully ignore the first (or any) argument and return toLowerCase(),
* ECMA has reserved that argument, presumably for defining the locale.
*/
if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) {
RootedString str(cx, ThisToStringForStringProto(cx, args));
if (!str)
return false;
RootedValue result(cx);
if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
return false;
args.rval().set(result);
return true;
}
return ToLowerCaseHelper(cx, args);
}
template <typename DestChar, typename SrcChar>
static void
ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length)
{
MOZ_ASSERT(firstLowerCase < length);
for (size_t i = 0; i < firstLowerCase; i++)
destChars[i] = srcChars[i];
for (size_t i = firstLowerCase; i < length; i++) {
char16_t c = unicode::ToUpperCase(srcChars[i]);
MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
destChars[i] = c;
}
destChars[length] = '\0';
}
template <typename CharT>
static JSString*
ToUpperCase(JSContext* cx, JSLinearString* str)
{
typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr;
typedef UniquePtr<char16_t[], JS::FreePolicy> TwoByteCharPtr;
mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
size_t length = str->length();
{
AutoCheckCannotGC nogc;
const CharT* chars = str->chars<CharT>(nogc);
// Look for the first lower case character.
size_t i = 0;
for (; i < length; i++) {
char16_t c = chars[i];
if (unicode::CanUpperCase(c))
break;
}
// If all characters are upper case, return the input string.
if (i == length)
return str;
// If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
// or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
// upper case characters are not in the Latin1 range.
bool resultIsLatin1;
if (IsSame<CharT, Latin1Char>::value) {
resultIsLatin1 = true;
for (size_t j = i; j < length; j++) {
Latin1Char c = chars[j];
if (c == 0xb5 || c == 0xff) {
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
resultIsLatin1 = false;
break;
} else {
MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
}
}
} else {
resultIsLatin1 = false;
}
if (resultIsLatin1) {
Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1);
if (!buf)
return nullptr;
ToUpperCaseImpl(buf.get(), chars, i, length);
newChars.construct<Latin1CharPtr>(Move(buf));
} else {
TwoByteCharPtr buf = cx->make_pod_array<char16_t>(length + 1);
if (!buf)
return nullptr;
ToUpperCaseImpl(buf.get(), chars, i, length);
newChars.construct<TwoByteCharPtr>(Move(buf));
}
}
JSString* res;
if (newChars.constructed<Latin1CharPtr>()) {
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length);
if (!res)
return nullptr;
newChars.ref<Latin1CharPtr>().release();
} else {
res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length);
if (!res)
return nullptr;
newChars.ref<TwoByteCharPtr>().release();
}
return res;
}
static bool
ToUpperCaseHelper(JSContext* cx, CallReceiver call)
{
RootedString str(cx, ThisToStringForStringProto(cx, call));
if (!str)
return false;
JSLinearString* linear = str->ensureLinear(cx);
if (!linear)
return false;
if (linear->hasLatin1Chars())
str = ToUpperCase<Latin1Char>(cx, linear);
else
str = ToUpperCase<char16_t>(cx, linear);
if (!str)
return false;
call.rval().setString(str);
return true;
}
bool
js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp)
{
return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
}
static bool
str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
/*
* Forcefully ignore the first (or any) argument and return toUpperCase(),
* ECMA has reserved that argument, presumably for defining the locale.
*/
if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) {
RootedString str(cx, ThisToStringForStringProto(cx, args));
if (!str)
return false;
RootedValue result(cx);
if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
return false;
args.rval().set(result);
return true;
}
return ToUpperCaseHelper(cx, args);
}
#if !EXPOSE_INTL_API
static bool
str_localeCompare(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
RootedString str(cx, ThisToStringForStringProto(cx, args));
if (!str)
return false;
RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
if (!thatStr)
return false;
if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeCompare) {
RootedValue result(cx);
if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr, &result))
return false;
args.rval().set(result);
return true;
}
int32_t result;
if (!CompareStrings(cx, str, thatStr, &result))
return false;
args.rval().setInt32(result);
return true;
}
#endif
#if EXPOSE_INTL_API
/* ES6 20140210 draft 21.1.3.12. */
static bool
str_normalize(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
// Steps 1-3.
RootedString str(cx, ThisToStringForStringProto(cx, args));
if (!str)
return false;
// Step 4.
UNormalizationMode form;
if (!args.hasDefined(0)) {
form = UNORM_NFC;
} else {
// Steps 5-6.
RootedLinearString formStr(cx, ArgToRootedString(cx, args, 0));
if (!formStr)
return false;
// Step 7.
if (EqualStrings(formStr, cx->names().NFC)) {
form = UNORM_NFC;
} else if (EqualStrings(formStr, cx->names().NFD)) {
form = UNORM_NFD;
} else if (EqualStrings(formStr, cx->names().NFKC)) {
form = UNORM_NFKC;
} else if (EqualStrings(formStr, cx->names().NFKD)) {
form = UNORM_NFKD;
} else {
JS_ReportErrorNumber(cx, GetErrorMessage, nullptr,
JSMSG_INVALID_NORMALIZE_FORM);
return false;
}
}
// Step 8.
AutoStableStringChars stableChars(cx);
if (!str->ensureFlat(cx) || !stableChars.initTwoByte(cx, str))
return false;
static const size_t INLINE_CAPACITY = 32;
const UChar* srcChars = Char16ToUChar(stableChars.twoByteRange().start().get());
int32_t srcLen = AssertedCast<int32_t>(str->length());
Vector<char16_t, INLINE_CAPACITY> chars(cx);
if (!chars.resize(INLINE_CAPACITY))
return false;
UErrorCode status = U_ZERO_ERROR;
int32_t size = unorm_normalize(srcChars, srcLen, form, 0,
Char16ToUChar(chars.begin()), INLINE_CAPACITY,
&status);
if (status == U_BUFFER_OVERFLOW_ERROR) {
if (!chars.resize(size))
return false;
status = U_ZERO_ERROR;
#ifdef DEBUG
int32_t finalSize =
#endif
unorm_normalize(srcChars, srcLen, form, 0,
Char16ToUChar(chars.begin()), size,
&status);
MOZ_ASSERT(size == finalSize || U_FAILURE(status), "unorm_normalize behaved inconsistently");
}
if (U_FAILURE(status))
return false;
JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
if (!ns)
return false;
// Step 9.
args.rval().setString(ns);
return true;
}
#endif
bool
js::str_charAt(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
RootedString str(cx);
size_t i;
if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
str = args.thisv().toString();
i = size_t(args[0].toInt32());
if (i >= str->length())
goto out_of_range;
} else {
str = ThisToStringForStringProto(cx, args);
if (!str)
return false;
double d = 0.0;
if (args.length() > 0 && !ToInteger(cx, args[0], &d))
return false;
if (d < 0 || str->length() <= d)
goto out_of_range;
i = size_t(d);
}
str = cx->staticStrings().getUnitStringForElement(cx, str, i);
if (!str)
return false;
args.rval().setString(str);
return true;
out_of_range:
args.rval().setString(cx->runtime()->emptyString);
return true;
}
bool
js::str_charCodeAt_impl(JSContext* cx, HandleString string, HandleValue index, MutableHandleValue res)
{
RootedString str(cx);
size_t i;
if (index.isInt32()) {
i = index.toInt32();
if (i >= string->length())
goto out_of_range;
} else {
double d = 0.0;
if (!ToInteger(cx, index, &d))
return false;
// check whether d is negative as size_t is unsigned
if (d < 0 || string->length() <= d )
goto out_of_range;
i = size_t(d);
}
char16_t c;
if (!string->getChar(cx, i , &c))
return false;
res.setInt32(c);
return true;
out_of_range:
res.setNaN();
return true;
}
bool
js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
RootedString str(cx);
RootedValue index(cx);
if (args.thisv().isString()) {
str = args.thisv().toString();
} else {
str = ThisToStringForStringProto(cx, args);
if (!str)
return false;
}
if (args.length() != 0)
index = args[0];
else
index.setInt32(0);
return js::str_charCodeAt_impl(cx, str, index, args.rval());
}
/*
* Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
* The patlen argument must be positive and no greater than sBMHPatLenMax.
*
* Return the index of pat in text, or -1 if not found.
*/
static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */
static const int sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */
template <typename TextChar, typename PatChar>
static int
BoyerMooreHorspool(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen)
{
MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
uint8_t skip[sBMHCharSetSize];
for (uint32_t i = 0; i < sBMHCharSetSize; i++)
skip[i] = uint8_t(patLen);
uint32_t patLast = patLen - 1;
for (uint32_t i = 0; i < patLast; i++) {
char16_t c = pat[i];
if (c >= sBMHCharSetSize)
return sBMHBadPattern;
skip[c] = uint8_t(patLast - i);
}
for (uint32_t k = patLast; k < textLen; ) {
for (uint32_t i = k, j = patLast; ; i--, j--) {
if (text[i] != pat[j])
break;
if (j == 0)
return static_cast<int>(i); /* safe: max string size */
}
char16_t c = text[k];
k += (c >= sBMHCharSetSize) ? patLen : skip[c];
}
return -1;
}
template <typename TextChar, typename PatChar>
struct MemCmp {
typedef uint32_t Extent;
static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*, uint32_t patLen) {
return (patLen - 1) * sizeof(PatChar);
}
static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) {
MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
return memcmp(p, t, extent) == 0;
}
};
template <typename TextChar, typename PatChar>
struct ManualCmp {
typedef const PatChar* Extent;
static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat, uint32_t patLen) {
return pat + patLen;
}
static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) {
for (; p != extent; ++p, ++t) {
if (*p != *t)
return false;
}
return true;
}
};
template <typename TextChar, typename PatChar>
static const TextChar*
FirstCharMatcherUnrolled(const TextChar* text, uint32_t n, const PatChar pat)
{
const TextChar* textend = text + n;
const TextChar* t = text;
switch ((textend - t) & 7) {
case 0: if (*t++ == pat) return t - 1;
case 7: if (*t++ == pat) return t - 1;
case 6: if (*t++ == pat) return t - 1;
case 5: if (*t++ == pat) return t - 1;
case 4: if (*t++ == pat) return t - 1;
case 3: if (*t++ == pat) return t - 1;
case 2: if (*t++ == pat) return t - 1;
case 1: if (*t++ == pat) return t - 1;
}
while (textend != t) {
if (t[0] == pat) return t;
if (t[1] == pat) return t + 1;
if (t[2] == pat) return t + 2;
if (t[3] == pat) return t + 3;
if (t[4] == pat) return t + 4;
if (t[5] == pat) return t + 5;
if (t[6] == pat) return t + 6;
if (t[7] == pat) return t + 7;
t += 8;
}
return nullptr;
}
static const char*
FirstCharMatcher8bit(const char* text, uint32_t n, const char pat)
{
#if defined(__clang__)
return FirstCharMatcherUnrolled<char, char>(text, n, pat);
#else
return reinterpret_cast<const char*>(memchr(text, pat, n));
#endif
}
static const char16_t*
FirstCharMatcher16bit(const char16_t* text, uint32_t n, const char16_t pat)
{
#if defined(XP_DARWIN) || defined(XP_WIN)
/*
* Performance of memchr is horrible in OSX. Windows is better,
* but it is still better to use UnrolledMatcher.
*/
return FirstCharMatcherUnrolled<char16_t, char16_t>(text, n, pat);
#else
/*
* For linux the best performance is obtained by slightly hacking memchr.
* memchr works only on 8bit char but char16_t is 16bit. So we treat char16_t
* in blocks of 8bit and use memchr.
*/
const char* text8 = (const char*) text;
const char* pat8 = reinterpret_cast<const char*>(&pat);
MOZ_ASSERT(n < UINT32_MAX/2);
n *= 2;
uint32_t i = 0;
while (i < n) {
/* Find the first 8 bits of 16bit character in text. */
const char* pos8 = FirstCharMatcher8bit(text8 + i, n - i, pat8[0]);
if (pos8 == nullptr)
return nullptr;
i = static_cast<uint32_t>(pos8 - text8);
/* Incorrect match if it matches the last 8 bits of 16bit char. */
if (i % 2 != 0) {
i++;
continue;
}
/* Test if last 8 bits match last 8 bits of 16bit char. */
if (pat8[1] == text8[i + 1])
return (text + (i/2));
i += 2;
}
return nullptr;
#endif
}
template <class InnerMatch, typename TextChar, typename PatChar>
static int
Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat, uint32_t patlen)
{
const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
uint32_t i = 0;
uint32_t n = textlen - patlen + 1;
while (i < n) {
const TextChar* pos;
if (sizeof(TextChar) == 2 && sizeof(PatChar) == 2)
pos = (TextChar*) FirstCharMatcher16bit((char16_t*)text + i, n - i, pat[0]);
else if (sizeof(TextChar) == 1 && sizeof(PatChar) == 1)
pos = (TextChar*) FirstCharMatcher8bit((char*) text + i, n - i, pat[0]);
else
pos = (TextChar*) FirstCharMatcherUnrolled<TextChar, PatChar>(text + i, n - i, pat[0]);
if (pos == nullptr)
return -1;
i = static_cast<uint32_t>(pos - text);
if (InnerMatch::match(pat + 1, text + i + 1, extent))
return i;
i += 1;
}
return -1;
}
template <typename TextChar, typename PatChar>
static MOZ_ALWAYS_INLINE int
StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen)
{
if (patLen == 0)
return 0;
if (textLen < patLen)
return -1;
#if defined(__i386__) || defined(_M_IX86) || defined(__i386)
/*
* Given enough registers, the unrolled loop below is faster than the
* following loop. 32-bit x86 does not have enough registers.
*/
if (patLen == 1) {
const PatChar p0 = *pat;
const TextChar* end = text + textLen;
for (const TextChar* c = text; c != end; ++c) {
if (*c == p0)
return c - text;
}
return -1;
}
#endif
/*
* If the text or pattern string is short, BMH will be more expensive than
* the basic linear scan due to initialization cost and a more complex loop
* body. While the correct threshold is input-dependent, we can make a few
* conservative observations:
* - When |textLen| is "big enough", the initialization time will be
* proportionally small, so the worst-case slowdown is minimized.
* - When |patLen| is "too small", even the best case for BMH will be
* slower than a simple scan for large |textLen| due to the more complex
* loop body of BMH.
* From this, the values for "big enough" and "too small" are determined
* empirically. See bug 526348.
*/
if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
int index = BoyerMooreHorspool(text, textLen, pat, patLen);
if (index != sBMHBadPattern)
return index;
}
/*
* For big patterns with large potential overlap we want the SIMD-optimized
* speed of memcmp. For small patterns, a simple loop is faster. We also can't
* use memcmp if one of the strings is TwoByte and the other is Latin1.
*
* FIXME: Linux memcmp performance is sad and the manual loop is faster.
*/
return
#if !defined(__linux__)
(patLen > 128 && IsSame<TextChar, PatChar>::value)
? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen)
:
#endif
Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen);
}
static int32_t
StringMatch(JSLinearString* text, JSLinearString* pat, uint32_t start = 0)
{
MOZ_ASSERT(start <= text->length());
uint32_t textLen = text->length() - start;
uint32_t patLen = pat->length();
int match;
AutoCheckCannotGC nogc;
if (text->hasLatin1Chars()) {
const Latin1Char* textChars = text->latin1Chars(nogc) + start;
if (pat->hasLatin1Chars())
match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
else
match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
} else {
const char16_t* textChars = text->twoByteChars(nogc) + start;
if (pat->hasLatin1Chars())
match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
else
match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
}
return (match == -1) ? -1 : start + match;
}
static const size_t sRopeMatchThresholdRatioLog2 = 5;
bool
js::StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patLen)
{
AutoCheckCannotGC nogc;
return text->hasLatin1Chars()
? StringMatch(text->latin1Chars(nogc), text->length(), pat, patLen) != -1
: StringMatch(text->twoByteChars(nogc), text->length(), pat, patLen) != -1;
}
int
js::StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start)
{
return StringMatch(text, pat, start);
}
// When an algorithm does not need a string represented as a single linear
// array of characters, this range utility may be used to traverse the string a
// sequence of linear arrays of characters. This avoids flattening ropes.
class StringSegmentRange
{
// If malloc() shows up in any profiles from this vector, we can add a new
// StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
Rooted<StringVector> stack;
RootedLinearString cur;
bool settle(JSString* str) {
while (str->isRope()) {
JSRope& rope = str->asRope();
if (!stack.append(rope.rightChild()))
return false;
str = rope.leftChild();
}
cur = &str->asLinear();
return true;
}
public:
explicit StringSegmentRange(JSContext* cx)
: stack(cx, StringVector(cx)), cur(cx)
{}
MOZ_WARN_UNUSED_RESULT bool init(JSString* str) {
MOZ_ASSERT(stack.empty());
return settle(str);
}
bool empty() const {
return cur == nullptr;
}
JSLinearString* front() const {
MOZ_ASSERT(!cur->isRope());
return cur;
}
MOZ_WARN_UNUSED_RESULT bool popFront() {
MOZ_ASSERT(!empty());
if (stack.empty()) {
cur = nullptr;
return true;
}
return settle(stack.popCopy());
}
};
typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
template <typename TextChar, typename PatChar>
static int
RopeMatchImpl(const AutoCheckCannotGC& nogc, LinearStringVector& strings,
const PatChar* pat, size_t patLen)
{
/* Absolute offset from the beginning of the logical text string. */
int pos = 0;
for (JSLinearString** outerp = strings.begin(); outerp != strings.end(); ++outerp) {
/* Try to find a match within 'outer'. */
JSLinearString* outer = *outerp;
const TextChar* chars = outer->chars<TextChar>(nogc);
size_t len = outer->length();
int matchResult = StringMatch(chars, len, pat, patLen);
if (matchResult != -1) {
/* Matched! */
return pos + matchResult;
}
/* Try to find a match starting in 'outer' and running into other nodes. */
const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
const TextChar* const textend = chars + len;
const PatChar p0 = *pat;
const PatChar* const p1 = pat + 1;
const PatChar* const patend = pat + patLen;
for (const TextChar* t = text; t != textend; ) {
if (*t++ != p0)
continue;
JSLinearString** innerp = outerp;
const TextChar* ttend = textend;
const TextChar* tt = t;
for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) {
while (tt == ttend) {
if (++innerp == strings.end())
return -1;
JSLinearString* inner = *innerp;
tt = inner->chars<TextChar>(nogc);
ttend = tt + inner->length();
}
if (*pp != *tt)
goto break_continue;
}
/* Matched! */
return pos + (t - chars) - 1; /* -1 because of *t++ above */
break_continue:;
}
pos += len;
}
return -1;
}
/*
* RopeMatch takes the text to search and the pattern to search for in the text.
* RopeMatch returns false on OOM and otherwise returns the match index through
* the 'match' outparam (-1 for not found).
*/
static bool
RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match)
{
uint32_t patLen = pat->length();
if (patLen == 0) {
*match = 0;
return true;
}
if (text->length() < patLen) {
*match = -1;
return true;
}
/*
* List of leaf nodes in the rope. If we run out of memory when trying to
* append to this list, we can still fall back to StringMatch, so use the
* system allocator so we don't report OOM in that case.
*/
LinearStringVector strings;
/*
* We don't want to do rope matching if there is a poor node-to-char ratio,
* since this means spending a lot of time in the match loop below. We also
* need to build the list of leaf nodes. Do both here: iterate over the
* nodes so long as there are not too many.
*
* We also don't use rope matching if the rope contains both Latin1 and
* TwoByte nodes, to simplify the match algorithm.
*/
{
size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
StringSegmentRange r(cx);
if (!r.init(text))
return false;
bool textIsLatin1 = text->hasLatin1Chars();
while (!r.empty()) {
if (threshold-- == 0 ||
r.front()->hasLatin1Chars() != textIsLatin1 ||
!strings.append(r.front()))
{
JSLinearString* linear = text->ensureLinear(cx);
if (!linear)
return false;
*match = StringMatch(linear, pat);
return true;
}
if (!r.popFront())
return false;
}
}
AutoCheckCannotGC nogc;
if (text->hasLatin1Chars()) {
if (pat->hasLatin1Chars())
*match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc), patLen);
else
*match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc), patLen);
} else {
if (pat->hasLatin1Chars())
*match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc), patLen);
else
*match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc), patLen);
}
return true;
}
/* ES6 draft rc4 21.1.3.7. */
static bool
str_includes(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
// Steps 1, 2, and 3
RootedString str(cx, ThisToStringForStringProto(cx, args));
if (!str)
return false;
// Steps 4 and 5
bool isRegExp;
if (!IsRegExp(cx, args.get(0), &isRegExp))
return false;
// Step 6
if (isRegExp) {
JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
"first", "", "Regular Expression");
return false;
}
// Steps 7 and 8
RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
if (!searchStr)
return false;
// Steps 9 and 10
uint32_t pos = 0;
if (args.hasDefined(1)) {
if (args[1].isInt32()) {
int i = args[1].toInt32();
pos = (i < 0) ? 0U : uint32_t(i);
} else {
double d;
if (!ToInteger(cx, args[1], &d))
return false;
pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
}
}
// Step 11
uint32_t textLen = str->length();
// Step 12
uint32_t start = Min(Max(pos, 0U), textLen);
// Steps 13 and 14
JSLinearString* text = str->ensureLinear(cx);
if (!text)
return false;
args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
return true;
}
/* TODO: remove String.prototype.contains (bug 1103588) */
static bool
str_contains(JSContext *cx, unsigned argc, Value *vp)
{
#ifndef RELEASE_BUILD
CallArgs args = CallArgsFromVp(argc, vp);
RootedObject callee(cx, &args.callee());
if (!GlobalObject::warnOnceAboutStringContains(cx, callee))
return false;
#endif
return str_includes(cx, argc, vp);
}
/* ES6 20120927 draft 15.5.4.7. */
bool
js::str_indexOf(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
// Steps 1, 2, and 3
RootedString str(cx, ThisToStringForStringProto(cx, args));
if (!str)
return false;
// Steps 4 and 5
RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
if (!searchStr)
return false;
// Steps 6 and 7
uint32_t pos = 0;
if (args.hasDefined(1)) {
if (args[1].isInt32()) {
int i = args[1].toInt32();
pos = (i < 0) ? 0U : uint32_t(i);
} else {
double d;
if (!ToInteger(cx, args[1], &d))
return false;
pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
}
}
// Step 8
uint32_t textLen = str->length();
// Step 9
uint32_t start = Min(Max(pos, 0U), textLen);
// Steps 10 and 11
JSLinearString* text = str->ensureLinear(cx);
if (!text)
return false;
args.rval().setInt32(StringMatch(text, searchStr, start));
return true;
}
template <typename TextChar, typename PatChar>
static int32_t
LastIndexOfImpl(const TextChar* text, size_t textLen, const PatChar* pat, size_t patLen,
size_t start)
{
MOZ_ASSERT(patLen > 0);
MOZ_ASSERT(patLen <= textLen);
MOZ_ASSERT(start <= textLen - patLen);
const PatChar p0 = *pat;
const PatChar* patNext = pat + 1;
const PatChar* patEnd = pat + patLen;
for (const TextChar* t = text + start; t >= text; --t) {
if (*t == p0) {
const TextChar* t1 = t + 1;
for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) {
if (*t1 != *p1)
goto break_continue;
}
return static_cast<int32_t>(t - text);
}
break_continue:;
}
return -1;
}
bool
js::str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
RootedString textstr(cx, ThisToStringForStringProto(cx, args));
if (!textstr)
return false;
RootedLinearString pat(cx, ArgToRootedString(cx, args, 0));
if (!pat)
return false;
size_t textLen = textstr->length();
size_t patLen = pat->length();
int start = textLen - patLen; // Start searching here
if (start < 0) {
args.rval().setInt32(-1);
return true;
}
if (args.hasDefined(1)) {
if (args[1].isInt32()) {
int i = args[1].toInt32();
if (i <= 0)
start = 0;
else if (i < start)
start = i;
} else {
double d;
if (!ToNumber(cx, args[1], &d))
return false;
if (!IsNaN(d)) {
d = JS::ToInteger(d);
if (d <= 0)
start = 0;
else if (d < start)
start = int(d);
}
}
}
if (patLen == 0) {
args.rval().setInt32(start);
return true;
}
JSLinearString* text = textstr->ensureLinear(cx);
if (!text)
return false;
int32_t res;
AutoCheckCannotGC nogc;
if (text->hasLatin1Chars()) {
const Latin1Char* textChars = text->latin1Chars(nogc);
if (pat->hasLatin1Chars())
res = LastIndexOfImpl(textChars, textLen, pat->latin1Chars(nogc), patLen, start);
else
res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
} else {
const char16_t* textChars = text->twoByteChars(nogc);
if (pat->hasLatin1Chars())
res = LastIndexOfImpl(textChars, textLen, pat->latin1Chars(nogc), patLen, start);
else
res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
}
args.rval().setInt32(res);
return true;
}
bool
js::HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
{
MOZ_ASSERT(start + pat->length() <= text->length());
size_t patLen = pat->length();
AutoCheckCannotGC nogc;
if (text->hasLatin1Chars()) {
const Latin1Char* textChars = text->latin1Chars(nogc) + start;
if (pat->hasLatin1Chars())
return PodEqual(textChars, pat->latin1Chars(nogc), patLen);
return EqualChars(textChars, pat->twoByteChars(nogc), patLen);
}
const char16_t* textChars = text->twoByteChars(nogc) + start;
if (pat->hasTwoByteChars())
return PodEqual(textChars, pat->twoByteChars(nogc), patLen);
return EqualChars(pat->latin1Chars(nogc), textChars, patLen);
}
/* ES6 draft rc3 21.1.3.18. */
bool
js::str_startsWith(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
// Steps 1, 2, and 3
RootedString str(cx, ThisToStringForStringProto(cx, args));
if (!str)
return false;
// Steps 4 and 5
bool isRegExp;
if (!IsRegExp(cx, args.get(0), &isRegExp))
return false;
// Step 6
if (isRegExp) {
JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
"first", "", "Regular Expression");
return false;
}
// Steps 7 and 8
RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
if (!searchStr)
return false;
// Steps 9 and 10
uint32_t pos = 0;
if (args.hasDefined(1)) {
if (args[1].isInt32()) {
int i = args[1].toInt32();
pos = (i < 0) ? 0U : uint32_t(i);
} else {
double d;
if (!ToInteger(cx, args[1], &d))
return false;
pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
}
}
// Step 11
uint32_t textLen = str->length();
// Step 12
uint32_t start = Min(Max(pos, 0U), textLen);
// Step 13
uint32_t searchLen = searchStr->length();
// Step 14
if (searchLen + start < searchLen || searchLen + start > textLen) {
args.rval().setBoolean(false);
return true;
}
// Steps 15 and 16
JSLinearString* text = str->ensureLinear(cx);
if (!text)
return false;
args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
return true;
}
/* ES6 draft rc3 21.1.3.6. */
static bool
str_endsWith(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
// Steps 1, 2, and 3
RootedString str(cx, ThisToStringForStringProto(cx, args));
if (!str)
return false;
// Steps 4 and 5
bool isRegExp;
if (!IsRegExp(cx, args.get(0), &isRegExp))
return false;
// Step 6
if (isRegExp) {
JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
"first", "", "Regular Expression");
return false;
}
// Steps 7 and 8
RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
if (!searchStr)
return false;
// Step 9
uint32_t textLen = str->length();
// Steps 10 and 11
uint32_t pos = textLen;
if (args.hasDefined(1)) {
if (args[1].isInt32()) {
int i = args[1].toInt32();
pos = (i < 0) ? 0U : uint32_t(i);
} else {
double d;
if (!ToInteger(cx, args[1], &d))
return false;
pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
}
}
// Step 12
uint32_t end = Min(Max(pos, 0U), textLen);
// Step 13
uint32_t searchLen = searchStr->length();
// Step 15 (reordered)
if (searchLen > end) {
args.rval().setBoolean(false);
return true;
}
// Step 14
uint32_t start = end - searchLen;
// Steps 16 and 17
JSLinearString* text = str->ensureLinear(cx);
if (!text)
return false;
args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
return true;
}
template <typename CharT>
static void
TrimString(const CharT* chars, bool trimLeft, bool trimRight, size_t length,
size_t* pBegin, size_t* pEnd)
{
size_t begin = 0, end = length;
if (trimLeft) {
while (begin < length && unicode::IsSpace(chars[begin]))
++begin;
}
if (trimRight) {
while (end > begin && unicode::IsSpace(chars[end - 1]))
--end;
}
*pBegin = begin;
*pEnd = end;
}
static bool
TrimString(JSContext* cx, Value* vp, bool trimLeft, bool trimRight)
{
CallReceiver call = CallReceiverFromVp(vp);
RootedString str(cx, ThisToStringForStringProto(cx, call));
if (!str)
return false;
JSLinearString* linear = str->ensureLinear(cx);
if (!linear)
return false;
size_t length = linear->length();
size_t begin, end;
if (linear->hasLatin1Chars()) {
AutoCheckCannotGC nogc;
TrimString(linear->latin1Chars(nogc), trimLeft, trimRight, length, &begin, &end);
} else {
AutoCheckCannotGC nogc;
TrimString(linear->twoByteChars(nogc), trimLeft, trimRight, length, &begin, &end);
}
str = NewDependentString(cx, str, begin, end - begin);
if (!str)
return false;
call.rval().setString(str);
return true;
}
static bool
str_trim(JSContext* cx, unsigned argc, Value* vp)
{
return TrimString(cx, vp, true, true);
}
static bool
str_trimLeft(JSContext* cx, unsigned argc, Value* vp)
{
return TrimString(cx, vp, true, false);
}
static bool
str_trimRight(JSContext* cx, unsigned argc, Value* vp)
{
return TrimString(cx, vp, false, true);
}
/*
* Perl-inspired string functions.
*/
namespace {
/* Result of a successfully performed flat match. */
class FlatMatch
{
RootedAtom pat_;
int32_t match_;
friend class StringRegExpGuard;
public:
explicit FlatMatch(JSContext* cx) : pat_(cx) {}
JSLinearString* pattern() const { return pat_; }
size_t patternLength() const { return pat_->length(); }
/*
* Note: The match is -1 when the match is performed successfully,
* but no match is found.
*/
int32_t match() const { return match_; }
};
} /* anonymous namespace */
static inline bool
IsRegExpMetaChar(char16_t c)
{
switch (c) {
/* Taken from the PatternCharacter production in 15.10.1. */
case '^': case '$': case '\\': case '.': case '*': case '+':
case '?': case '(': case ')': case '[': case ']': case '{':
case '}': case '|':
return true;
default:
return false;
}
}
template <typename CharT>
bool
js::HasRegExpMetaChars(const CharT* chars, size_t length)
{
for (size_t i = 0; i < length; ++i) {
if (IsRegExpMetaChar(chars[i]))
return true;
}
return false;
}
template bool
js::HasRegExpMetaChars<Latin1Char>(const Latin1Char* chars, size_t length);
template bool
js::HasRegExpMetaChars<char16_t>(const char16_t* chars, size_t length);
bool
js::StringHasRegExpMetaChars(JSLinearString* str)
{
AutoCheckCannotGC nogc;
if (str->hasLatin1Chars())
return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
}
namespace {
/*
* StringRegExpGuard factors logic out of String regexp operations.
*
* |optarg| indicates in which argument position RegExp flags will be found, if
* present. This is a Mozilla extension and not part of any ECMA spec.
*/
class MOZ_STACK_CLASS StringRegExpGuard
{
RegExpGuard re_;
FlatMatch fm;
RootedObject obj_;
/*
* Upper bound on the number of characters we are willing to potentially
* waste on searching for RegExp meta-characters.
*/
static const size_t MAX_FLAT_PAT_LEN = 256;
template <typename CharT>
static bool
flattenPattern(StringBuffer& sb, const CharT* chars, size_t len)
{
static const char ESCAPE_CHAR = '\\';
for (const CharT* it = chars; it < chars + len; ++it) {
if (IsRegExpMetaChar(*it)) {
if (!sb.append(ESCAPE_CHAR) || !sb.append(*it))
return false;
} else {
if (!sb.append(*it))
return false;
}
}
return true;
}
static JSAtom*
flattenPattern(JSContext* cx, JSAtom* pat)
{
StringBuffer sb(cx);
if (!sb.reserve(pat->length()))
return nullptr;
if (pat->hasLatin1Chars()) {
AutoCheckCannotGC nogc;
if (!flattenPattern(sb, pat->latin1Chars(nogc), pat->length()))
return nullptr;
} else {
AutoCheckCannotGC nogc;
if (!flattenPattern(sb, pat->twoByteChars(nogc), pat->length()))
return nullptr;
}
return sb.finishAtom();
}
public:
explicit StringRegExpGuard(JSContext* cx)
: re_(cx), fm(cx), obj_(cx)
{ }
/* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
bool init(JSContext* cx, const CallArgs& args, bool convertVoid = false)
{
if (args.length() != 0) {
ESClassValue cls;
if (!GetClassOfValue(cx, args[0], &cls))
return false;
if (cls == ESClass_RegExp)
return initRegExp(cx, &args[0].toObject());
}
if (convertVoid && !args.hasDefined(0)) {
fm.pat_ = cx->runtime()->emptyString;
return true;
}
JSString* arg = ArgToRootedString(cx, args, 0);
if (!arg)
return false;
fm.pat_ = AtomizeString(cx, arg);
if (!fm.pat_)
return false;
return true;
}
bool initRegExp(JSContext* cx, JSObject* regexp) {
obj_ = regexp;
return RegExpToShared(cx, obj_, &re_);
}
bool init(JSContext* cx, HandleString pattern) {
fm.pat_ = AtomizeString(cx, pattern);
if (!fm.pat_)
return false;
return true;
}
/*
* Attempt to match |patstr| to |textstr|. A flags argument, metachars in
* the pattern string, or a lengthy pattern string can thwart this process.
*
* |checkMetaChars| looks for regexp metachars in the pattern string.
*
* Return whether flat matching could be used.
*
* N.B. tryFlatMatch returns nullptr on OOM, so the caller must check
* cx->isExceptionPending().
*/
const FlatMatch*
tryFlatMatch(JSContext* cx, JSString* text, unsigned optarg, unsigned argc,
bool checkMetaChars = true)
{
if (re_.initialized())
return nullptr;
if (optarg < argc)
return nullptr;
size_t patLen = fm.pat_->length();
if (checkMetaChars && (patLen > MAX_FLAT_PAT_LEN || StringHasRegExpMetaChars(fm.pat_)))
return nullptr;
/*
* |text| could be a rope, so we want to avoid flattening it for as
* long as possible.
*/
if (text->isRope()) {
if (!RopeMatch(cx, &text->asRope(), fm.pat_, &fm.match_))
return nullptr;
} else {
fm.match_ = StringMatch(&text->asLinear(), fm.pat_, 0);
}
return &fm;
}
/* If the pattern is not already a regular expression, make it so. */
bool normalizeRegExp(JSContext* cx, bool flat, unsigned optarg, const CallArgs& args)
{
if (re_.initialized())
return true;
/* Build RegExp from pattern string. */
RootedString opt(cx);
if (optarg < args.length()) {
if (JSScript* script = cx->currentScript()) {
const char* filename = script->filename();
cx->compartment()->addTelemetry(filename, JSCompartment::DeprecatedFlagsArgument);
}
if (!cx->compartment()->warnedAboutFlagsArgument) {
if (!JS_ReportErrorFlagsAndNumber(cx, JSREPORT_WARNING, GetErrorMessage, nullptr,
JSMSG_DEPRECATED_FLAGS_ARG))
return false;
cx->compartment()->warnedAboutFlagsArgument = true;
}
opt = ToString<CanGC>(cx, args[optarg]);
if (!opt)
return false;
} else {
opt = nullptr;
}
Rooted<JSAtom*> pat(cx);
if (flat) {
pat = flattenPattern(cx, fm.pat_);
if (!pat)
return false;
} else {
pat = fm.pat_;
}
MOZ_ASSERT(pat);
return cx->compartment()->regExps.get(cx, pat, opt, &re_);
}
bool zeroLastIndex(JSContext* cx) {
if (!regExpIsObject())
return true;
// Use a fast path for same-global RegExp objects with writable
// lastIndex.
if (obj_->is<RegExpObject>()) {
RegExpObject* nobj = &obj_->as<RegExpObject>();
if (nobj->lookup(cx, cx->names().lastIndex)->writable()) {
nobj->zeroLastIndex(cx);
return true;
}
}
// Handle everything else generically (including throwing if .lastIndex is non-writable).
RootedValue zero(cx, Int32Value(0));
return SetProperty(cx, obj_, cx->names().lastIndex, zero);
}
RegExpShared& regExp() { return *re_; }
bool regExpIsObject() { return obj_ != nullptr; }
HandleObject regExpObject() {
MOZ_ASSERT(regExpIsObject());
return obj_;
}
private:
StringRegExpGuard(const StringRegExpGuard&) = delete;
void operator=(const StringRegExpGuard&) = delete;
};
} /* anonymous namespace */
static bool
DoMatchLocal(JSContext* cx, const CallArgs& args, RegExpStatics* res, HandleLinearString input,
RegExpShared& re)
{
ScopedMatchPairs matches(&cx->tempLifoAlloc());
RegExpRunStatus status = re.execute(cx, input, 0, &matches);
if (status == RegExpRunStatus_Error)
return false;
if (status == RegExpRunStatus_Success_NotFound) {
args.rval().setNull();
return true;
}
if (!res->updateFromMatchPairs(cx, input, matches))
return false;
RootedValue rval(cx);
if (!CreateRegExpMatchResult(cx, input, matches, &rval))
return false;
args.rval().set(rval);
return true;
}
/* ES5 15.5.4.10 step 8. */
static bool
DoMatchGlobal(JSContext* cx, const CallArgs& args, RegExpStatics* res, HandleLinearString input,
StringRegExpGuard& g)
{
// Step 8a.
//
// This single zeroing of "lastIndex" covers all "lastIndex" changes in the
// rest of String.prototype.match, particularly in steps 8f(i) and
// 8f(iii)(2)(a). Here's why.
//
// The inputs to the calls to RegExp.prototype.exec are a RegExp object
// whose .global is true and a string. The only side effect of a call in
// these circumstances is that the RegExp's .lastIndex will be modified to
// the next starting index after the discovered match (or to 0 if there's
// no remaining match). Because .lastIndex is a non-configurable data
// property and no script-controllable code executes after step 8a, passing
// step 8a implies *every* .lastIndex set succeeds. String.prototype.match
// calls RegExp.prototype.exec repeatedly, and the last call doesn't match,
// so the final value of .lastIndex is 0: exactly the state after step 8a
// succeeds. No spec step lets script observe intermediate .lastIndex
// values.
//
// The arrays returned by RegExp.prototype.exec always have a string at
// index 0, for which [[Get]]s have no side effects.
//
// Filling in a new array using [[DefineOwnProperty]] is unobservable.
//
// This is a tricky point, because after this set, our implementation *can*
// fail. The key is that script can't distinguish these failure modes from
// one where, in spec terms, we fail immediately after step 8a. That *in
// reality* we might have done extra matching work, or created a partial
// results array to return, or hit an interrupt, is irrelevant. The
// script can't tell we did any of those things but didn't update
// .lastIndex. Thus we can optimize steps 8b onward however we want,
// including eliminating intermediate .lastIndex sets, as long as we don't
// add ways for script to observe the intermediate states.
//
// In short: it's okay to cheat (by setting .lastIndex to 0, once) because
// we can't get caught.
if (!g.zeroLastIndex(cx))
return false;
// Step 8b.
AutoValueVector elements(cx);
size_t lastSuccessfulStart = 0;
// The loop variables from steps 8c-e aren't needed, as we use different
// techniques from the spec to implement step 8f's loop.
// Step 8f.
ScopedMatchPairs matches(&cx->tempLifoAlloc());
size_t charsLen = input->length();
RegExpShared& re = g.regExp();
for (size_t searchIndex = 0; searchIndex <= charsLen; ) {
if (!CheckForInterrupt(cx))
return false;
// Steps 8f(i-ii), minus "lastIndex" updates (see above).
RegExpRunStatus status = re.execute(cx, input, searchIndex, &matches);
if (status == RegExpRunStatus_Error)
return false;
// Step 8f(ii).
if (status == RegExpRunStatus_Success_NotFound)
break;
lastSuccessfulStart = searchIndex;
MatchPair& match = matches[0];
// Steps 8f(iii)(1-3).
searchIndex = match.isEmpty() ? match.limit + 1 : match.limit;
// Step 8f(iii)(4-5).
JSLinearString* str = NewDependentString(cx, input, match.start, match.length());
if (!str)
return false;
if (!elements.append(StringValue(str)))
return false;
}
// Step 8g.
if (elements.empty()) {
args.rval().setNull();
return true;
}
// The last *successful* match updates the RegExpStatics. (Interestingly,
// this implies that String.prototype.match's semantics aren't those
// implied by the RegExp.prototype.exec calls in the ES5 algorithm.)
res->updateLazily(cx, input, &re, lastSuccessfulStart);
// Steps 8b, 8f(iii)(5-6), 8h.
JSObject* array = NewDenseCopiedArray(cx, elements.length(), elements.begin());
if (!array)
return false;
args.rval().setObject(*array);
return true;
}
static bool
BuildFlatMatchArray(JSContext* cx, HandleString textstr, const FlatMatch& fm, CallArgs* args)
{
if (fm.match() < 0) {
args->rval().setNull();
return true;
}
/* Get the templateObject that defines the shape and type of the output object */
JSObject* templateObject = cx->compartment()->regExps.getOrCreateMatchResultTemplateObject(cx);
if (!templateObject)
return false;
RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(cx, 1, templateObject));
if (!arr)
return false;
/* Store a Value for each pair. */
arr->setDenseInitializedLength(1);
arr->initDenseElement(0, StringValue(fm.pattern()));
/* Set the |index| property. (TemplateObject positions it in slot 0) */
arr->setSlot(0, Int32Value(fm.match()));
/* Set the |input| property. (TemplateObject positions it in slot 1) */
arr->setSlot(1, StringValue(textstr));
#ifdef DEBUG
RootedValue test(cx);
RootedId id(cx, NameToId(cx->names().index));
if (!NativeGetProperty(cx, arr, id, &test))
return false;
MOZ_ASSERT(test == arr->getSlot(0));
id = NameToId(cx->names().input);
if (!NativeGetProperty(cx, arr, id, &test))
return false;
MOZ_ASSERT(test == arr->getSlot(1));
#endif
args->rval().setObject(*arr);
return true;
}
/* ES5 15.5.4.10. */
bool
js::str_match(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
/* Steps 1-2. */
RootedString str(cx, ThisToStringForStringProto(cx, args));
if (!str)
return false;
/* Steps 3-4, plus the trailing-argument "flags" extension. */
StringRegExpGuard g(cx);
if (!g.init(cx, args, true))
return false;
/* Fast path when the search pattern can be searched for as a string. */
if (const FlatMatch* fm = g.tryFlatMatch(cx, str, 1, args.length()))
return BuildFlatMatchArray(cx, str, *fm, &args);
/* Return if there was an error in tryFlatMatch. */
if (cx->isExceptionPending())
return false;
/* Create regular-expression internals as needed to perform the match. */
if (!g.normalizeRegExp(cx, false, 1, args))
return false;
RegExpStatics* res = cx->global()->getRegExpStatics(cx);
if (!res)
return false;
RootedLinearString linearStr(cx, str->ensureLinear(cx));
if (!linearStr)
return false;
/* Steps 5-6, 7. */
if (!g.regExp().global())
return DoMatchLocal(cx, args, res, linearStr, g.regExp());
/* Steps 6, 8. */
return DoMatchGlobal(cx, args, res, linearStr, g);
}
bool
js::str_search(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
RootedString str(cx, ThisToStringForStringProto(cx, args));
if (!str)
return false;
StringRegExpGuard g(cx);
if (!g.init(cx, args, true))
return false;
if (const FlatMatch* fm = g.tryFlatMatch(cx, str, 1, args.length())) {
args.rval().setInt32(fm->match());
return true;
}
if (cx->isExceptionPending()) /* from tryFlatMatch */
return false;
if (!g.normalizeRegExp(cx, false, 1, args))
return false;
RootedLinearString linearStr(cx, str->ensureLinear(cx));
if (!linearStr)
return false;
RegExpStatics* res = cx->global()->getRegExpStatics(cx);
if (!res)
return false;
/* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
ScopedMatchPairs matches(&cx->tempLifoAlloc());
RegExpRunStatus status = g.regExp().execute(cx, linearStr, 0, &matches);
if (status == RegExpRunStatus_Error)
return false;
if (status == RegExpRunStatus_Success)
res->updateLazily(cx, linearStr, &g.regExp(), 0);
args.rval().setInt32(status == RegExpRunStatus_Success_NotFound ? -1 : matches[0].start);
return true;
}
// Utility for building a rope (lazy concatenation) of strings.
class RopeBuilder {
JSContext* cx;
RootedString res;
RopeBuilder(const RopeBuilder& other) = delete;
void operator=(const RopeBuilder& other) = delete;
public:
explicit RopeBuilder(JSContext* cx)
: cx(cx), res(cx, cx->runtime()->emptyString)
{}
inline bool append(HandleString str) {
res = ConcatStrings<CanGC>(cx, res, str);
return !!res;
}
inline JSString* result() {
return res;
}
};
namespace {
template <typename CharT>
static uint32_t
FindDollarIndex(const CharT* chars, size_t length)
{
if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
uint32_t dollarIndex = p - chars;
MOZ_ASSERT(dollarIndex < length);
return dollarIndex;
}
return UINT32_MAX;
}
struct ReplaceData
{
explicit ReplaceData(JSContext* cx)
: str(cx), g(cx), lambda(cx), elembase(cx), repstr(cx),
fig(cx, NullValue()), sb(cx)
{}
inline void setReplacementString(JSLinearString* string) {
MOZ_ASSERT(string);
lambda = nullptr;
elembase = nullptr;
repstr = string;
AutoCheckCannotGC nogc;
dollarIndex = string->hasLatin1Chars()
? FindDollarIndex(string->latin1Chars(nogc), string->length())
: FindDollarIndex(string