blob: c59a41b27f6653011095d963bb0b5ab1585829cb [file] [log] [blame]
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include 'src/builtins/builtins-regexp-gen.h'
namespace regexp {
extern builtin
SubString(implicit context: Context)(String, Smi, Smi): String;
extern runtime RegExpExecMultiple(implicit context: Context)(
JSRegExp, String, RegExpMatchInfo, JSArray): Null|JSArray;
extern transitioning runtime
RegExpReplaceRT(Context, JSReceiver, String, Object): String;
extern transitioning runtime
StringBuilderConcat(implicit context: Context)(JSArray, Smi, String): String;
extern transitioning runtime
StringReplaceNonGlobalRegExpWithFunction(implicit context: Context)(
String, JSRegExp, Callable): String;
transitioning macro RegExpReplaceCallableNoExplicitCaptures(
implicit context: Context)(
matchesElements: FixedArray, matchesLength: intptr, string: String,
replaceFn: Callable) {
let matchStart: Smi = 0;
for (let i: intptr = 0; i < matchesLength; i++) {
typeswitch (matchesElements.objects[i]) {
// Element represents a slice.
case (elSmi: Smi): {
// The slice's match start and end is either encoded as one or two
// smis. A positive smi indicates a single smi encoding (see
// ReplacementStringBuilder::AddSubjectSlice()).
if (elSmi > 0) {
// For single smi encoding, see
// StringBuilderSubstringLength::encode() and
// StringBuilderSubstringPosition::encode().
const elInt: intptr = Convert<intptr>(elSmi);
const newMatchStart: intptr = (elInt >> 11) + (elInt & 0x7FF);
matchStart = Convert<Smi>(newMatchStart);
} else {
// For two smi encoding, the length is negative followed by the
// match start.
const nextEl: Smi = UnsafeCast<Smi>(matchesElements.objects[++i]);
matchStart = nextEl - elSmi;
}
}
// Element represents the matched substring, which is then passed to the
// replace function.
case (elString: String): {
const replacementObj: JSAny =
Call(context, replaceFn, Undefined, elString, matchStart, string);
const replacement: String = ToString_Inline(replacementObj);
matchesElements.objects[i] = replacement;
matchStart += elString.length_smi;
}
case (Object): deferred {
unreachable;
}
}
}
}
transitioning macro
RegExpReplaceCallableWithExplicitCaptures(implicit context: Context)(
matchesElements: FixedArray, matchesLength: intptr, replaceFn: Callable) {
for (let i: intptr = 0; i < matchesLength; i++) {
const elArray =
Cast<JSArray>(matchesElements.objects[i]) otherwise continue;
// The JSArray is expanded into the function args by Reflect.apply().
// TODO(jgruber): Remove indirection through Call->ReflectApply.
const replacementObj: JSAny = Call(
context, GetReflectApply(), Undefined, replaceFn, Undefined, elArray);
// Overwrite the i'th element in the results with the string
// we got back from the callback function.
matchesElements.objects[i] = ToString_Inline(replacementObj);
}
}
transitioning macro RegExpReplaceFastGlobalCallable(implicit context: Context)(
regexp: FastJSRegExp, string: String, replaceFn: Callable): String {
regexp.lastIndex = 0;
const kInitialCapacity: intptr = 16;
const kInitialLength: Smi = 0;
const result: Null|JSArray = RegExpExecMultiple(
regexp, string, GetRegExpLastMatchInfo(),
AllocateJSArray(
ElementsKind::PACKED_ELEMENTS, GetFastPackedElementsJSArrayMap(),
kInitialCapacity, kInitialLength));
regexp.lastIndex = 0;
// If no matches, return the subject string.
if (result == Null) return string;
const matches: JSArray = UnsafeCast<JSArray>(result);
const matchesLength: Smi = Cast<Smi>(matches.length) otherwise unreachable;
const matchesLengthInt: intptr = Convert<intptr>(matchesLength);
const matchesElements: FixedArray = UnsafeCast<FixedArray>(matches.elements);
// Reload last match info since it might have changed.
const nofCaptures: Smi = GetRegExpLastMatchInfo().NumberOfCaptures();
// If the number of captures is two then there are no explicit captures in
// the regexp, just the implicit capture that captures the whole match. In
// this case we can simplify quite a bit and end up with something faster.
if (nofCaptures == 2) {
RegExpReplaceCallableNoExplicitCaptures(
matchesElements, matchesLengthInt, string, replaceFn);
} else {
RegExpReplaceCallableWithExplicitCaptures(
matchesElements, matchesLengthInt, replaceFn);
}
return StringBuilderConcat(matches, matchesLength, string);
}
transitioning macro RegExpReplaceFastString(implicit context: Context)(
regexp: JSRegExp, string: String, replaceString: String): String {
// The fast path is reached only if {receiver} is an unmodified JSRegExp
// instance, {replace_value} is non-callable, and ToString({replace_value})
// does not contain '$', i.e. we're doing a simple string replacement.
let result: String = kEmptyString;
let lastMatchEnd: Smi = 0;
let unicode: bool = false;
const replaceLength: Smi = replaceString.length_smi;
const fastRegexp = UnsafeCast<FastJSRegExp>(regexp);
const global: bool = fastRegexp.global;
if (global) {
unicode = fastRegexp.unicode;
fastRegexp.lastIndex = 0;
}
while (true) {
const match: RegExpMatchInfo =
RegExpPrototypeExecBodyWithoutResultFast(regexp, string)
otherwise break;
const matchStart: Smi = match.GetStartOfCapture(0);
const matchEnd: Smi = match.GetEndOfCapture(0);
// TODO(jgruber): We could skip many of the checks that using SubString
// here entails.
result = result + SubString(string, lastMatchEnd, matchStart);
lastMatchEnd = matchEnd;
if (replaceLength != 0) result = result + replaceString;
// Non-global case ends here after the first replacement.
if (!global) break;
// If match is the empty string, we have to increment lastIndex.
if (matchEnd == matchStart) {
typeswitch (regexp) {
case (fastRegexp: FastJSRegExp): {
fastRegexp.lastIndex =
AdvanceStringIndexFast(string, fastRegexp.lastIndex, unicode);
}
case (Object): {
const lastIndex: JSAny = SlowLoadLastIndex(regexp);
const thisIndex: Number = ToLength_Inline(lastIndex);
const nextIndex: Number =
AdvanceStringIndexSlow(string, thisIndex, unicode);
SlowStoreLastIndex(regexp, nextIndex);
}
}
}
}
return result + SubString(string, lastMatchEnd, string.length_smi);
}
transitioning builtin RegExpReplace(implicit context: Context)(
regexp: FastJSRegExp, string: String, replaceValue: JSAny): String {
// TODO(pwong): Remove assert when all callers (StringPrototypeReplace) are
// from Torque.
assert(Is<FastJSRegExp>(regexp));
// 2. Is {replace_value} callable?
typeswitch (replaceValue) {
case (replaceFn: Callable): {
return regexp.global ?
RegExpReplaceFastGlobalCallable(regexp, string, replaceFn) :
StringReplaceNonGlobalRegExpWithFunction(string, regexp, replaceFn);
}
case (JSAny): {
const stableRegexp: JSRegExp = regexp;
const replaceString: String = ToString_Inline(replaceValue);
try {
// ToString(replaceValue) could potentially change the shape of the
// RegExp object. Recheck that we are still on the fast path and bail
// to runtime otherwise.
const fastRegexp = Cast<FastJSRegExp>(stableRegexp) otherwise Runtime;
if (StringIndexOf(
replaceString, SingleCharacterStringConstant('$'), 0) != -1) {
goto Runtime;
}
return RegExpReplaceFastString(fastRegexp, string, replaceString);
} label Runtime deferred {
return RegExpReplaceRT(context, stableRegexp, string, replaceString);
}
}
}
}
const kRegExpReplaceCalledOnSlowRegExp: constexpr int31
generates 'v8::Isolate::kRegExpReplaceCalledOnSlowRegExp';
transitioning javascript builtin RegExpPrototypeReplace(
js-implicit context: NativeContext, receiver: JSAny)(...arguments): JSAny {
const methodName: constexpr string = 'RegExp.prototype.@@replace';
// RegExpPrototypeReplace is a bit of a beast - a summary of dispatch logic:
//
// if (!IsFastRegExp(receiver)) CallRuntime(RegExpReplace)
// if (IsCallable(replace)) {
// if (IsGlobal(receiver)) {
// // Called 'fast-path' but contains several runtime calls.
// RegExpReplaceFastGlobalCallable()
// } else {
// CallRuntime(StringReplaceNonGlobalRegExpWithFunction)
// }
// } else {
// if (replace.contains("$")) {
// CallRuntime(RegExpReplace)
// } else {
// RegExpReplaceFastString()
// }
// }
const string: JSAny = arguments[0];
const replaceValue: JSAny = arguments[1];
// Let rx be the this value.
// If Type(rx) is not Object, throw a TypeError exception.
const rx = Cast<JSReceiver>(receiver)
otherwise ThrowTypeError(
MessageTemplate::kIncompatibleMethodReceiver, methodName);
// Let S be ? ToString(string).
const s = ToString_Inline(string);
// Fast-path checks: 1. Is the {receiver} an unmodified JSRegExp instance?
try {
const fastRx: FastJSRegExp = Cast<FastJSRegExp>(rx) otherwise Runtime;
return RegExpReplace(fastRx, s, replaceValue);
} label Runtime deferred {
IncrementUseCounter(context, SmiConstant(kRegExpReplaceCalledOnSlowRegExp));
return RegExpReplaceRT(context, rx, s, replaceValue);
}
}
}