blob: 56c0596bb42ace7fe12b0ad86d3a141ef9be1bb7 [file] [log] [blame]
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/regexp/experimental/experimental.h"
#include "src/objects/js-regexp-inl.h"
#include "src/regexp/experimental/experimental-compiler.h"
#include "src/regexp/experimental/experimental-interpreter.h"
#include "src/regexp/regexp-parser.h"
#include "src/utils/ostreams.h"
namespace v8 {
namespace internal {
bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags,
int capture_count) {
DCHECK(FLAG_enable_experimental_regexp_engine ||
FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count);
}
void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> source,
JSRegExp::Flags flags, int capture_count) {
DCHECK(FLAG_enable_experimental_regexp_engine);
if (FLAG_trace_experimental_regexp_engine) {
StdoutStream{} << "Initializing experimental regexp " << *source
<< std::endl;
}
isolate->factory()->SetRegExpExperimentalData(re, source, flags,
capture_count);
}
bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) {
DCHECK(FLAG_enable_experimental_regexp_engine);
DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL);
#ifdef VERIFY_HEAP
re->JSRegExpVerify(isolate);
#endif
return re->DataAt(JSRegExp::kIrregexpLatin1BytecodeIndex) !=
Smi::FromInt(JSRegExp::kUninitializedValue);
}
template <class T>
Handle<ByteArray> VectorToByteArray(Isolate* isolate, Vector<T> data) {
STATIC_ASSERT(std::is_trivial<T>::value);
int byte_length = sizeof(T) * data.length();
Handle<ByteArray> byte_array = isolate->factory()->NewByteArray(byte_length);
DisallowHeapAllocation no_gc;
MemCopy(byte_array->GetDataStartAddress(), data.begin(), byte_length);
return byte_array;
}
namespace {
struct CompilationResult {
Handle<ByteArray> bytecode;
Handle<FixedArray> capture_name_map;
};
// Compiles source pattern, but doesn't change the regexp object.
base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
Handle<JSRegExp> regexp) {
Zone zone(isolate->allocator(), ZONE_NAME);
Handle<String> source(regexp->Pattern(), isolate);
JSRegExp::Flags flags = regexp->GetFlags();
// Parse and compile the regexp source.
RegExpCompileData parse_result;
FlatStringReader reader(isolate, source);
DCHECK(!isolate->has_pending_exception());
bool parse_success =
RegExpParser::ParseRegExp(isolate, &zone, &reader, flags, &parse_result);
if (!parse_success) {
// The pattern was already parsed successfully during initialization, so
// the only way parsing can fail now is because of stack overflow.
DCHECK_EQ(parse_result.error, RegExpError::kStackOverflow);
USE(RegExp::ThrowRegExpException(isolate, regexp, source,
parse_result.error));
return base::nullopt;
}
ZoneList<RegExpInstruction> bytecode =
ExperimentalRegExpCompiler::Compile(parse_result.tree, flags, &zone);
CompilationResult result;
result.bytecode = VectorToByteArray(isolate, bytecode.ToVector());
result.capture_name_map = parse_result.capture_name_map;
return result;
}
} // namespace
bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
DCHECK(FLAG_enable_experimental_regexp_engine);
DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL);
#ifdef VERIFY_HEAP
re->JSRegExpVerify(isolate);
#endif
Handle<String> source(re->Pattern(), isolate);
if (FLAG_trace_experimental_regexp_engine) {
StdoutStream{} << "Compiling experimental regexp " << *source << std::endl;
}
base::Optional<CompilationResult> compilation_result =
CompileImpl(isolate, re);
if (!compilation_result.has_value()) {
DCHECK(isolate->has_pending_exception());
return false;
}
re->SetDataAt(JSRegExp::kIrregexpLatin1BytecodeIndex,
*compilation_result->bytecode);
re->SetDataAt(JSRegExp::kIrregexpUC16BytecodeIndex,
*compilation_result->bytecode);
Handle<Code> trampoline = BUILTIN_CODE(isolate, RegExpExperimentalTrampoline);
re->SetDataAt(JSRegExp::kIrregexpLatin1CodeIndex, *trampoline);
re->SetDataAt(JSRegExp::kIrregexpUC16CodeIndex, *trampoline);
re->SetCaptureNameMap(compilation_result->capture_name_map);
return true;
}
Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
RegExpInstruction* inst_begin =
reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress());
int inst_num = raw_bytes.length() / sizeof(RegExpInstruction);
DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length());
return Vector<RegExpInstruction>(inst_begin, inst_num);
}
namespace {
int32_t ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin,
ByteArray bytecode, String subject, int capture_count,
int32_t* output_registers, int32_t output_register_count,
int32_t subject_index) {
DisallowHeapAllocation no_gc;
int register_count_per_match =
JSRegExp::RegistersForCaptureCount(capture_count);
int32_t result;
do {
DCHECK(subject.IsFlat());
Zone zone(isolate->allocator(), ZONE_NAME);
result = ExperimentalRegExpInterpreter::FindMatches(
isolate, call_origin, bytecode, register_count_per_match, subject,
subject_index, output_registers, output_register_count, &zone);
} while (result == RegExp::kInternalRegExpRetry &&
call_origin == RegExp::kFromRuntime);
return result;
}
} // namespace
// Returns the number of matches.
int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate,
RegExp::CallOrigin call_origin,
JSRegExp regexp, String subject,
int32_t* output_registers,
int32_t output_register_count,
int32_t subject_index) {
DCHECK(FLAG_enable_experimental_regexp_engine);
DisallowHeapAllocation no_gc;
if (FLAG_trace_experimental_regexp_engine) {
String source = String::cast(regexp.DataAt(JSRegExp::kSourceIndex));
StdoutStream{} << "Executing experimental regexp " << source << std::endl;
}
ByteArray bytecode =
ByteArray::cast(regexp.DataAt(JSRegExp::kIrregexpLatin1BytecodeIndex));
return ExecRawImpl(isolate, call_origin, bytecode, subject,
regexp.CaptureCount(), output_registers,
output_register_count, subject_index);
}
int32_t ExperimentalRegExp::MatchForCallFromJs(
Address subject, int32_t start_position, Address input_start,
Address input_end, int* output_registers, int32_t output_register_count,
Address backtrack_stack, RegExp::CallOrigin call_origin, Isolate* isolate,
Address regexp) {
DCHECK(FLAG_enable_experimental_regexp_engine);
DCHECK_NOT_NULL(isolate);
DCHECK_NOT_NULL(output_registers);
DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
DisallowHeapAllocation no_gc;
DisallowJavascriptExecution no_js(isolate);
DisallowHandleAllocation no_handles;
DisallowHandleDereference no_deref;
String subject_string = String::cast(Object(subject));
JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
return ExecRaw(isolate, RegExp::kFromJs, regexp_obj, subject_string,
output_registers, output_register_count, start_position);
}
MaybeHandle<Object> ExperimentalRegExp::Exec(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int subject_index, Handle<RegExpMatchInfo> last_match_info) {
DCHECK(FLAG_enable_experimental_regexp_engine);
DCHECK_EQ(regexp->TypeTag(), JSRegExp::EXPERIMENTAL);
#ifdef VERIFY_HEAP
regexp->JSRegExpVerify(isolate);
#endif
if (!IsCompiled(regexp, isolate) && !Compile(isolate, regexp)) {
DCHECK(isolate->has_pending_exception());
return MaybeHandle<Object>();
}
DCHECK(IsCompiled(regexp, isolate));
subject = String::Flatten(isolate, subject);
int capture_count = regexp->CaptureCount();
int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);
int32_t* output_registers;
std::unique_ptr<int32_t[]> output_registers_release;
if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
output_registers = isolate->jsregexp_static_offsets_vector();
} else {
output_registers = NewArray<int32_t>(output_register_count);
output_registers_release.reset(output_registers);
}
int num_matches =
ExecRaw(isolate, RegExp::kFromRuntime, *regexp, *subject,
output_registers, output_register_count, subject_index);
if (num_matches > 0) {
DCHECK_EQ(num_matches, 1);
return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
capture_count, output_registers);
} else if (num_matches == 0) {
return isolate->factory()->null_value();
} else {
DCHECK_LT(num_matches, 0);
DCHECK(isolate->has_pending_exception());
return MaybeHandle<Object>();
}
}
int32_t ExperimentalRegExp::OneshotExecRaw(Isolate* isolate,
Handle<JSRegExp> regexp,
Handle<String> subject,
int32_t* output_registers,
int32_t output_register_count,
int32_t subject_index) {
DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
if (FLAG_trace_experimental_regexp_engine) {
StdoutStream{} << "Experimental execution (oneshot) of regexp "
<< regexp->Pattern() << std::endl;
}
base::Optional<CompilationResult> compilation_result =
CompileImpl(isolate, regexp);
if (!compilation_result.has_value()) return RegExp::kInternalRegExpException;
DisallowHeapAllocation no_gc;
return ExecRawImpl(isolate, RegExp::kFromRuntime,
*compilation_result->bytecode, *subject,
regexp->CaptureCount(), output_registers,
output_register_count, subject_index);
}
MaybeHandle<Object> ExperimentalRegExp::OneshotExec(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int subject_index, Handle<RegExpMatchInfo> last_match_info) {
DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
DCHECK_NE(regexp->TypeTag(), JSRegExp::NOT_COMPILED);
int capture_count = regexp->CaptureCount();
int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);
int32_t* output_registers;
std::unique_ptr<int32_t[]> output_registers_release;
if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
output_registers = isolate->jsregexp_static_offsets_vector();
} else {
output_registers = NewArray<int32_t>(output_register_count);
output_registers_release.reset(output_registers);
}
int num_matches = OneshotExecRaw(isolate, regexp, subject, output_registers,
output_register_count, subject_index);
if (num_matches > 0) {
DCHECK_EQ(num_matches, 1);
return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
capture_count, output_registers);
} else if (num_matches == 0) {
return isolate->factory()->null_value();
} else {
DCHECK_LT(num_matches, 0);
DCHECK(isolate->has_pending_exception());
return MaybeHandle<Object>();
}
}
} // namespace internal
} // namespace v8