|  | // Copyright 2012 the V8 project authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "src/regexp/regexp.h" | 
|  |  | 
|  | #include "src/codegen/compilation-cache.h" | 
|  | #include "src/diagnostics/code-tracer.h" | 
|  | #include "src/heap/heap-inl.h" | 
|  | #include "src/objects/js-regexp-inl.h" | 
|  | #include "src/regexp/experimental/experimental.h" | 
|  | #include "src/regexp/regexp-bytecode-generator.h" | 
|  | #include "src/regexp/regexp-bytecodes.h" | 
|  | #include "src/regexp/regexp-compiler.h" | 
|  | #include "src/regexp/regexp-dotprinter.h" | 
|  | #include "src/regexp/regexp-interpreter.h" | 
|  | #include "src/regexp/regexp-macro-assembler-arch.h" | 
|  | #include "src/regexp/regexp-macro-assembler-tracer.h" | 
|  | #include "src/regexp/regexp-parser.h" | 
|  | #include "src/regexp/regexp-utils.h" | 
|  | #include "src/strings/string-search.h" | 
|  | #include "src/utils/ostreams.h" | 
|  |  | 
|  | namespace v8 { | 
|  | namespace internal { | 
|  |  | 
|  | using namespace regexp_compiler_constants;  // NOLINT(build/namespaces) | 
|  |  | 
|  | class RegExpImpl final : public AllStatic { | 
|  | public: | 
|  | // Returns a string representation of a regular expression. | 
|  | // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4. | 
|  | // This function calls the garbage collector if necessary. | 
|  | static Handle<String> ToString(Handle<Object> value); | 
|  |  | 
|  | // Prepares a JSRegExp object with Irregexp-specific data. | 
|  | static void IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re, | 
|  | Handle<String> pattern, JSRegExp::Flags flags, | 
|  | int capture_count, uint32_t backtrack_limit); | 
|  |  | 
|  | // Prepare a RegExp for being executed one or more times (using | 
|  | // IrregexpExecOnce) on the subject. | 
|  | // This ensures that the regexp is compiled for the subject, and that | 
|  | // the subject is flat. | 
|  | // Returns the number of integer spaces required by IrregexpExecOnce | 
|  | // as its "registers" argument.  If the regexp cannot be compiled, | 
|  | // an exception is set as pending, and this function returns negative. | 
|  | static int IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp, | 
|  | Handle<String> subject); | 
|  |  | 
|  | static void AtomCompile(Isolate* isolate, Handle<JSRegExp> re, | 
|  | Handle<String> pattern, JSRegExp::Flags flags, | 
|  | Handle<String> match_pattern); | 
|  |  | 
|  | static int AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, | 
|  | Handle<String> subject, int index, int32_t* output, | 
|  | int output_size); | 
|  |  | 
|  | static Handle<Object> AtomExec(Isolate* isolate, Handle<JSRegExp> regexp, | 
|  | Handle<String> subject, int index, | 
|  | Handle<RegExpMatchInfo> last_match_info); | 
|  |  | 
|  | // Execute a regular expression on the subject, starting from index. | 
|  | // If matching succeeds, return the number of matches.  This can be larger | 
|  | // than one in the case of global regular expressions. | 
|  | // The captures and subcaptures are stored into the registers vector. | 
|  | // If matching fails, returns RE_FAILURE. | 
|  | // If execution fails, sets a pending exception and returns RE_EXCEPTION. | 
|  | static int IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, | 
|  | Handle<String> subject, int index, int32_t* output, | 
|  | int output_size); | 
|  |  | 
|  | // Execute an Irregexp bytecode pattern. | 
|  | // On a successful match, the result is a JSArray containing | 
|  | // captured positions.  On a failure, the result is the null value. | 
|  | // Returns an empty handle in case of an exception. | 
|  | V8_WARN_UNUSED_RESULT static MaybeHandle<Object> IrregexpExec( | 
|  | Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, | 
|  | int index, Handle<RegExpMatchInfo> last_match_info); | 
|  |  | 
|  | static bool CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re, | 
|  | Handle<String> sample_subject, bool is_one_byte); | 
|  | static inline bool EnsureCompiledIrregexp(Isolate* isolate, | 
|  | Handle<JSRegExp> re, | 
|  | Handle<String> sample_subject, | 
|  | bool is_one_byte); | 
|  |  | 
|  | // Returns true on success, false on failure. | 
|  | static bool Compile(Isolate* isolate, Zone* zone, RegExpCompileData* input, | 
|  | JSRegExp::Flags flags, Handle<String> pattern, | 
|  | Handle<String> sample_subject, bool is_one_byte, | 
|  | uint32_t& backtrack_limit); | 
|  |  | 
|  | // For acting on the JSRegExp data FixedArray. | 
|  | static int IrregexpMaxRegisterCount(FixedArray re); | 
|  | static void SetIrregexpMaxRegisterCount(FixedArray re, int value); | 
|  | static int IrregexpNumberOfCaptures(FixedArray re); | 
|  | static ByteArray IrregexpByteCode(FixedArray re, bool is_one_byte); | 
|  | static Code IrregexpNativeCode(FixedArray re, bool is_one_byte); | 
|  | }; | 
|  |  | 
|  | MaybeHandle<Object> RegExp::ThrowRegExpException(Isolate* isolate, | 
|  | Handle<JSRegExp> re, | 
|  | Handle<String> pattern, | 
|  | RegExpError error) { | 
|  | Vector<const char> error_data = CStrVector(RegExpErrorString(error)); | 
|  | Handle<String> error_text = | 
|  | isolate->factory() | 
|  | ->NewStringFromOneByte(Vector<const uint8_t>::cast(error_data)) | 
|  | .ToHandleChecked(); | 
|  | THROW_NEW_ERROR( | 
|  | isolate, | 
|  | NewSyntaxError(MessageTemplate::kMalformedRegExp, pattern, error_text), | 
|  | Object); | 
|  | } | 
|  |  | 
|  | void RegExp::ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re, | 
|  | RegExpError error_text) { | 
|  | USE(ThrowRegExpException(isolate, re, Handle<String>(re->Pattern(), isolate), | 
|  | error_text)); | 
|  | } | 
|  |  | 
|  | bool RegExp::IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp) { | 
|  | return RegExpUtils::IsUnmodifiedRegExp(isolate, regexp); | 
|  | } | 
|  |  | 
|  | // Identifies the sort of regexps where the regexp engine is faster | 
|  | // than the code used for atom matches. | 
|  | static bool HasFewDifferentCharacters(Handle<String> pattern) { | 
|  | int length = Min(kMaxLookaheadForBoyerMoore, pattern->length()); | 
|  | if (length <= kPatternTooShortForBoyerMoore) return false; | 
|  | const int kMod = 128; | 
|  | bool character_found[kMod]; | 
|  | int different = 0; | 
|  | memset(&character_found[0], 0, sizeof(character_found)); | 
|  | for (int i = 0; i < length; i++) { | 
|  | int ch = (pattern->Get(i) & (kMod - 1)); | 
|  | if (!character_found[ch]) { | 
|  | character_found[ch] = true; | 
|  | different++; | 
|  | // We declare a regexp low-alphabet if it has at least 3 times as many | 
|  | // characters as it has different characters. | 
|  | if (different * 3 > length) return false; | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // Generic RegExp methods. Dispatches to implementation specific methods. | 
|  |  | 
|  | // static | 
|  | MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re, | 
|  | Handle<String> pattern, | 
|  | JSRegExp::Flags flags, | 
|  | uint32_t backtrack_limit) { | 
|  | DCHECK(pattern->IsFlat()); | 
|  |  | 
|  | // Caching is based only on the pattern and flags, but code also differs when | 
|  | // a backtrack limit is set. A present backtrack limit is very much *not* the | 
|  | // common case, so just skip the cache for these. | 
|  | const bool is_compilation_cache_enabled = | 
|  | (backtrack_limit == JSRegExp::kNoBacktrackLimit); | 
|  |  | 
|  | Zone zone(isolate->allocator(), ZONE_NAME); | 
|  | CompilationCache* compilation_cache = nullptr; | 
|  | if (is_compilation_cache_enabled) { | 
|  | compilation_cache = isolate->compilation_cache(); | 
|  | MaybeHandle<FixedArray> maybe_cached = | 
|  | compilation_cache->LookupRegExp(pattern, flags); | 
|  | Handle<FixedArray> cached; | 
|  | if (maybe_cached.ToHandle(&cached)) { | 
|  | re->set_data(*cached); | 
|  | return re; | 
|  | } | 
|  | } | 
|  |  | 
|  | PostponeInterruptsScope postpone(isolate); | 
|  | RegExpCompileData parse_result; | 
|  | FlatStringReader reader(isolate, pattern); | 
|  | DCHECK(!isolate->has_pending_exception()); | 
|  | if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags, | 
|  | &parse_result)) { | 
|  | // Throw an exception if we fail to parse the pattern. | 
|  | return RegExp::ThrowRegExpException(isolate, re, pattern, | 
|  | parse_result.error); | 
|  | } | 
|  |  | 
|  | bool has_been_compiled = false; | 
|  |  | 
|  | if (FLAG_default_to_experimental_regexp_engine && | 
|  | ExperimentalRegExp::CanBeHandled(parse_result.tree, flags, | 
|  | parse_result.capture_count)) { | 
|  | DCHECK(FLAG_enable_experimental_regexp_engine); | 
|  | ExperimentalRegExp::Initialize(isolate, re, pattern, flags, | 
|  | parse_result.capture_count); | 
|  | has_been_compiled = true; | 
|  | } else if (flags & JSRegExp::kLinear) { | 
|  | DCHECK(FLAG_enable_experimental_regexp_engine); | 
|  | if (!ExperimentalRegExp::CanBeHandled(parse_result.tree, flags, | 
|  | parse_result.capture_count)) { | 
|  | // TODO(mbid): The error could provide a reason for why the regexp can't | 
|  | // be executed in linear time (e.g. due to back references). | 
|  | return RegExp::ThrowRegExpException(isolate, re, pattern, | 
|  | RegExpError::kNotLinear); | 
|  | } | 
|  | ExperimentalRegExp::Initialize(isolate, re, pattern, flags, | 
|  | parse_result.capture_count); | 
|  | has_been_compiled = true; | 
|  | } else if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) && | 
|  | !HasFewDifferentCharacters(pattern)) { | 
|  | // Parse-tree is a single atom that is equal to the pattern. | 
|  | RegExpImpl::AtomCompile(isolate, re, pattern, flags, pattern); | 
|  | has_been_compiled = true; | 
|  | } else if (parse_result.tree->IsAtom() && !IsSticky(flags) && | 
|  | parse_result.capture_count == 0) { | 
|  | RegExpAtom* atom = parse_result.tree->AsAtom(); | 
|  | // The pattern source might (?) contain escape sequences, but they're | 
|  | // resolved in atom_string. | 
|  | Vector<const uc16> atom_pattern = atom->data(); | 
|  | Handle<String> atom_string; | 
|  | ASSIGN_RETURN_ON_EXCEPTION( | 
|  | isolate, atom_string, | 
|  | isolate->factory()->NewStringFromTwoByte(atom_pattern), Object); | 
|  | if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) { | 
|  | RegExpImpl::AtomCompile(isolate, re, pattern, flags, atom_string); | 
|  | has_been_compiled = true; | 
|  | } | 
|  | } | 
|  | if (!has_been_compiled) { | 
|  | RegExpImpl::IrregexpInitialize(isolate, re, pattern, flags, | 
|  | parse_result.capture_count, backtrack_limit); | 
|  | } | 
|  | DCHECK(re->data().IsFixedArray()); | 
|  | // Compilation succeeded so the data is set on the regexp | 
|  | // and we can store it in the cache. | 
|  | Handle<FixedArray> data(FixedArray::cast(re->data()), isolate); | 
|  | if (is_compilation_cache_enabled) { | 
|  | compilation_cache->PutRegExp(pattern, flags, data); | 
|  | } | 
|  |  | 
|  | return re; | 
|  | } | 
|  |  | 
|  | // static | 
|  | bool RegExp::EnsureFullyCompiled(Isolate* isolate, Handle<JSRegExp> re, | 
|  | Handle<String> subject) { | 
|  | switch (re->TypeTag()) { | 
|  | case JSRegExp::NOT_COMPILED: | 
|  | UNREACHABLE(); | 
|  | case JSRegExp::ATOM: | 
|  | return true; | 
|  | case JSRegExp::IRREGEXP: | 
|  | if (RegExpImpl::IrregexpPrepare(isolate, re, subject) == -1) { | 
|  | DCHECK(isolate->has_pending_exception()); | 
|  | return false; | 
|  | } | 
|  | return true; | 
|  | case JSRegExp::EXPERIMENTAL: | 
|  | if (!ExperimentalRegExp::IsCompiled(re, isolate) && | 
|  | !ExperimentalRegExp::Compile(isolate, re)) { | 
|  | DCHECK(isolate->has_pending_exception()); | 
|  | return false; | 
|  | } | 
|  | return true; | 
|  | } | 
|  | } | 
|  |  | 
|  | // static | 
|  | MaybeHandle<Object> RegExp::ExperimentalOneshotExec( | 
|  | Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, | 
|  | int index, Handle<RegExpMatchInfo> last_match_info) { | 
|  | return ExperimentalRegExp::OneshotExec(isolate, regexp, subject, index, | 
|  | last_match_info); | 
|  | } | 
|  |  | 
|  | // static | 
|  | MaybeHandle<Object> RegExp::Exec(Isolate* isolate, Handle<JSRegExp> regexp, | 
|  | Handle<String> subject, int index, | 
|  | Handle<RegExpMatchInfo> last_match_info) { | 
|  | switch (regexp->TypeTag()) { | 
|  | case JSRegExp::NOT_COMPILED: | 
|  | UNREACHABLE(); | 
|  | case JSRegExp::ATOM: | 
|  | return RegExpImpl::AtomExec(isolate, regexp, subject, index, | 
|  | last_match_info); | 
|  | case JSRegExp::IRREGEXP: | 
|  | return RegExpImpl::IrregexpExec(isolate, regexp, subject, index, | 
|  | last_match_info); | 
|  | case JSRegExp::EXPERIMENTAL: | 
|  | return ExperimentalRegExp::Exec(isolate, regexp, subject, index, | 
|  | last_match_info); | 
|  | } | 
|  | } | 
|  |  | 
|  | // RegExp Atom implementation: Simple string search using indexOf. | 
|  |  | 
|  | void RegExpImpl::AtomCompile(Isolate* isolate, Handle<JSRegExp> re, | 
|  | Handle<String> pattern, JSRegExp::Flags flags, | 
|  | Handle<String> match_pattern) { | 
|  | isolate->factory()->SetRegExpAtomData(re, pattern, flags, match_pattern); | 
|  | } | 
|  |  | 
|  | static void SetAtomLastCapture(Isolate* isolate, | 
|  | Handle<RegExpMatchInfo> last_match_info, | 
|  | String subject, int from, int to) { | 
|  | SealHandleScope shs(isolate); | 
|  | last_match_info->SetNumberOfCaptureRegisters(2); | 
|  | last_match_info->SetLastSubject(subject); | 
|  | last_match_info->SetLastInput(subject); | 
|  | last_match_info->SetCapture(0, from); | 
|  | last_match_info->SetCapture(1, to); | 
|  | } | 
|  |  | 
|  | int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, | 
|  | Handle<String> subject, int index, int32_t* output, | 
|  | int output_size) { | 
|  | DCHECK_LE(0, index); | 
|  | DCHECK_LE(index, subject->length()); | 
|  |  | 
|  | subject = String::Flatten(isolate, subject); | 
|  | DisallowHeapAllocation no_gc;  // ensure vectors stay valid | 
|  |  | 
|  | String needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)); | 
|  | int needle_len = needle.length(); | 
|  | DCHECK(needle.IsFlat()); | 
|  | DCHECK_LT(0, needle_len); | 
|  |  | 
|  | if (index + needle_len > subject->length()) { | 
|  | return RegExp::RE_FAILURE; | 
|  | } | 
|  |  | 
|  | for (int i = 0; i < output_size; i += 2) { | 
|  | String::FlatContent needle_content = needle.GetFlatContent(no_gc); | 
|  | String::FlatContent subject_content = subject->GetFlatContent(no_gc); | 
|  | DCHECK(needle_content.IsFlat()); | 
|  | DCHECK(subject_content.IsFlat()); | 
|  | // dispatch on type of strings | 
|  | index = | 
|  | (needle_content.IsOneByte() | 
|  | ? (subject_content.IsOneByte() | 
|  | ? SearchString(isolate, subject_content.ToOneByteVector(), | 
|  | needle_content.ToOneByteVector(), index) | 
|  | : SearchString(isolate, subject_content.ToUC16Vector(), | 
|  | needle_content.ToOneByteVector(), index)) | 
|  | : (subject_content.IsOneByte() | 
|  | ? SearchString(isolate, subject_content.ToOneByteVector(), | 
|  | needle_content.ToUC16Vector(), index) | 
|  | : SearchString(isolate, subject_content.ToUC16Vector(), | 
|  | needle_content.ToUC16Vector(), index))); | 
|  | if (index == -1) { | 
|  | return i / 2;  // Return number of matches. | 
|  | } else { | 
|  | output[i] = index; | 
|  | output[i + 1] = index + needle_len; | 
|  | index += needle_len; | 
|  | } | 
|  | } | 
|  | return output_size / 2; | 
|  | } | 
|  |  | 
|  | Handle<Object> RegExpImpl::AtomExec(Isolate* isolate, Handle<JSRegExp> re, | 
|  | Handle<String> subject, int index, | 
|  | Handle<RegExpMatchInfo> last_match_info) { | 
|  | static const int kNumRegisters = 2; | 
|  | STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize); | 
|  | int32_t* output_registers = isolate->jsregexp_static_offsets_vector(); | 
|  |  | 
|  | int res = | 
|  | AtomExecRaw(isolate, re, subject, index, output_registers, kNumRegisters); | 
|  |  | 
|  | if (res == RegExp::RE_FAILURE) return isolate->factory()->null_value(); | 
|  |  | 
|  | DCHECK_EQ(res, RegExp::RE_SUCCESS); | 
|  | SealHandleScope shs(isolate); | 
|  | SetAtomLastCapture(isolate, last_match_info, *subject, output_registers[0], | 
|  | output_registers[1]); | 
|  | return last_match_info; | 
|  | } | 
|  |  | 
|  | // Irregexp implementation. | 
|  |  | 
|  | // Ensures that the regexp object contains a compiled version of the | 
|  | // source for either one-byte or two-byte subject strings. | 
|  | // If the compiled version doesn't already exist, it is compiled | 
|  | // from the source pattern. | 
|  | // If compilation fails, an exception is thrown and this function | 
|  | // returns false. | 
|  | bool RegExpImpl::EnsureCompiledIrregexp(Isolate* isolate, Handle<JSRegExp> re, | 
|  | Handle<String> sample_subject, | 
|  | bool is_one_byte) { | 
|  | Object compiled_code = re->Code(is_one_byte); | 
|  | Object bytecode = re->Bytecode(is_one_byte); | 
|  | bool needs_initial_compilation = | 
|  | compiled_code == Smi::FromInt(JSRegExp::kUninitializedValue); | 
|  | // Recompile is needed when we're dealing with the first execution of the | 
|  | // regexp after the decision to tier up has been made. If the tiering up | 
|  | // strategy is not in use, this value is always false. | 
|  | bool needs_tier_up_compilation = | 
|  | re->MarkedForTierUp() && bytecode.IsByteArray(); | 
|  |  | 
|  | if (FLAG_trace_regexp_tier_up && needs_tier_up_compilation) { | 
|  | PrintF("JSRegExp object %p needs tier-up compilation\n", | 
|  | reinterpret_cast<void*>(re->ptr())); | 
|  | } | 
|  |  | 
|  | if (!needs_initial_compilation && !needs_tier_up_compilation) { | 
|  | DCHECK(compiled_code.IsCode()); | 
|  | DCHECK_IMPLIES(FLAG_regexp_interpret_all, bytecode.IsByteArray()); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | DCHECK_IMPLIES(needs_tier_up_compilation, bytecode.IsByteArray()); | 
|  |  | 
|  | return CompileIrregexp(isolate, re, sample_subject, is_one_byte); | 
|  | } | 
|  |  | 
|  | #ifdef DEBUG | 
|  | namespace { | 
|  |  | 
|  | bool RegExpCodeIsValidForPreCompilation(Handle<JSRegExp> re, bool is_one_byte) { | 
|  | Object entry = re->Code(is_one_byte); | 
|  | Object bytecode = re->Bytecode(is_one_byte); | 
|  | // If we're not using the tier-up strategy, entry can only be a smi | 
|  | // representing an uncompiled regexp here. If we're using the tier-up | 
|  | // strategy, entry can still be a smi representing an uncompiled regexp, when | 
|  | // compiling the regexp before the tier-up, or it can contain a trampoline to | 
|  | // the regexp interpreter, in which case the bytecode field contains compiled | 
|  | // bytecode, when recompiling the regexp after the tier-up. If the | 
|  | // tier-up was forced, which happens for global replaces, entry is a smi | 
|  | // representing an uncompiled regexp, even though we're "recompiling" after | 
|  | // the tier-up. | 
|  | if (re->ShouldProduceBytecode()) { | 
|  | DCHECK(entry.IsSmi()); | 
|  | DCHECK(bytecode.IsSmi()); | 
|  | int entry_value = Smi::ToInt(entry); | 
|  | int bytecode_value = Smi::ToInt(bytecode); | 
|  | DCHECK_EQ(JSRegExp::kUninitializedValue, entry_value); | 
|  | DCHECK_EQ(JSRegExp::kUninitializedValue, bytecode_value); | 
|  | } else { | 
|  | DCHECK(entry.IsSmi() || (entry.IsCode() && bytecode.IsByteArray())); | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  | #endif | 
|  |  | 
|  | bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re, | 
|  | Handle<String> sample_subject, | 
|  | bool is_one_byte) { | 
|  | // Compile the RegExp. | 
|  | Zone zone(isolate->allocator(), ZONE_NAME); | 
|  | PostponeInterruptsScope postpone(isolate); | 
|  |  | 
|  | DCHECK(RegExpCodeIsValidForPreCompilation(re, is_one_byte)); | 
|  |  | 
|  | JSRegExp::Flags flags = re->GetFlags(); | 
|  |  | 
|  | Handle<String> pattern(re->Pattern(), isolate); | 
|  | pattern = String::Flatten(isolate, pattern); | 
|  | RegExpCompileData compile_data; | 
|  | FlatStringReader reader(isolate, pattern); | 
|  | if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags, | 
|  | &compile_data)) { | 
|  | // Throw an exception if we fail to parse the pattern. | 
|  | // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. | 
|  | USE(RegExp::ThrowRegExpException(isolate, re, pattern, compile_data.error)); | 
|  | return false; | 
|  | } | 
|  | // The compilation target is a kBytecode if we're interpreting all regexp | 
|  | // objects, or if we're using the tier-up strategy but the tier-up hasn't | 
|  | // happened yet. The compilation target is a kNative if we're using the | 
|  | // tier-up strategy and we need to recompile to tier-up, or if we're producing | 
|  | // native code for all regexp objects. | 
|  | compile_data.compilation_target = re->ShouldProduceBytecode() | 
|  | ? RegExpCompilationTarget::kBytecode | 
|  | : RegExpCompilationTarget::kNative; | 
|  | uint32_t backtrack_limit = re->BacktrackLimit(); | 
|  | const bool compilation_succeeded = | 
|  | Compile(isolate, &zone, &compile_data, flags, pattern, sample_subject, | 
|  | is_one_byte, backtrack_limit); | 
|  | if (!compilation_succeeded) { | 
|  | DCHECK(compile_data.error != RegExpError::kNone); | 
|  | RegExp::ThrowRegExpException(isolate, re, compile_data.error); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | Handle<FixedArray> data = | 
|  | Handle<FixedArray>(FixedArray::cast(re->data()), isolate); | 
|  | if (compile_data.compilation_target == RegExpCompilationTarget::kNative) { | 
|  | data->set(JSRegExp::code_index(is_one_byte), *compile_data.code); | 
|  | // Reset bytecode to uninitialized. In case we use tier-up we know that | 
|  | // tier-up has happened this way. | 
|  | data->set(JSRegExp::bytecode_index(is_one_byte), | 
|  | Smi::FromInt(JSRegExp::kUninitializedValue)); | 
|  | } else { | 
|  | DCHECK_EQ(compile_data.compilation_target, | 
|  | RegExpCompilationTarget::kBytecode); | 
|  | // Store code generated by compiler in bytecode and trampoline to | 
|  | // interpreter in code. | 
|  | data->set(JSRegExp::bytecode_index(is_one_byte), *compile_data.code); | 
|  | Handle<Code> trampoline = | 
|  | BUILTIN_CODE(isolate, RegExpInterpreterTrampoline); | 
|  | data->set(JSRegExp::code_index(is_one_byte), *trampoline); | 
|  | } | 
|  | re->SetCaptureNameMap(compile_data.capture_name_map); | 
|  | int register_max = IrregexpMaxRegisterCount(*data); | 
|  | if (compile_data.register_count > register_max) { | 
|  | SetIrregexpMaxRegisterCount(*data, compile_data.register_count); | 
|  | } | 
|  | data->set(JSRegExp::kIrregexpBacktrackLimit, Smi::FromInt(backtrack_limit)); | 
|  |  | 
|  | if (FLAG_trace_regexp_tier_up) { | 
|  | PrintF("JSRegExp object %p %s size: %d\n", | 
|  | reinterpret_cast<void*>(re->ptr()), | 
|  | re->ShouldProduceBytecode() ? "bytecode" : "native code", | 
|  | re->ShouldProduceBytecode() | 
|  | ? IrregexpByteCode(*data, is_one_byte).Size() | 
|  | : IrregexpNativeCode(*data, is_one_byte).Size()); | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | int RegExpImpl::IrregexpMaxRegisterCount(FixedArray re) { | 
|  | return Smi::ToInt(re.get(JSRegExp::kIrregexpMaxRegisterCountIndex)); | 
|  | } | 
|  |  | 
|  | void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray re, int value) { | 
|  | re.set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value)); | 
|  | } | 
|  |  | 
|  | int RegExpImpl::IrregexpNumberOfCaptures(FixedArray re) { | 
|  | return Smi::ToInt(re.get(JSRegExp::kIrregexpCaptureCountIndex)); | 
|  | } | 
|  |  | 
|  | ByteArray RegExpImpl::IrregexpByteCode(FixedArray re, bool is_one_byte) { | 
|  | return ByteArray::cast(re.get(JSRegExp::bytecode_index(is_one_byte))); | 
|  | } | 
|  |  | 
|  | Code RegExpImpl::IrregexpNativeCode(FixedArray re, bool is_one_byte) { | 
|  | return Code::cast(re.get(JSRegExp::code_index(is_one_byte))); | 
|  | } | 
|  |  | 
|  | void RegExpImpl::IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re, | 
|  | Handle<String> pattern, | 
|  | JSRegExp::Flags flags, int capture_count, | 
|  | uint32_t backtrack_limit) { | 
|  | // Initialize compiled code entries to null. | 
|  | isolate->factory()->SetRegExpIrregexpData(re, pattern, flags, capture_count, | 
|  | backtrack_limit); | 
|  | } | 
|  |  | 
|  | // static | 
|  | int RegExpImpl::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp, | 
|  | Handle<String> subject) { | 
|  | DCHECK(subject->IsFlat()); | 
|  |  | 
|  | // Check representation of the underlying storage. | 
|  | bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject); | 
|  | if (!RegExpImpl::EnsureCompiledIrregexp(isolate, regexp, subject, | 
|  | is_one_byte)) { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | // Only reserve room for output captures. Internal registers are allocated by | 
|  | // the engine. | 
|  | return JSRegExp::RegistersForCaptureCount(regexp->CaptureCount()); | 
|  | } | 
|  |  | 
|  | int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, | 
|  | Handle<String> subject, int index, | 
|  | int32_t* output, int output_size) { | 
|  | DCHECK_LE(0, index); | 
|  | DCHECK_LE(index, subject->length()); | 
|  | DCHECK(subject->IsFlat()); | 
|  | DCHECK_GE(output_size, | 
|  | JSRegExp::RegistersForCaptureCount(regexp->CaptureCount())); | 
|  |  | 
|  | bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject); | 
|  |  | 
|  | if (!regexp->ShouldProduceBytecode()) { | 
|  | do { | 
|  | EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte); | 
|  | // The stack is used to allocate registers for the compiled regexp code. | 
|  | // This means that in case of failure, the output registers array is left | 
|  | // untouched and contains the capture results from the previous successful | 
|  | // match.  We can use that to set the last match info lazily. | 
|  | int res = NativeRegExpMacroAssembler::Match(regexp, subject, output, | 
|  | output_size, index, isolate); | 
|  | if (res != NativeRegExpMacroAssembler::RETRY) { | 
|  | DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION || | 
|  | isolate->has_pending_exception()); | 
|  | STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == | 
|  | RegExp::RE_SUCCESS); | 
|  | STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == | 
|  | RegExp::RE_FAILURE); | 
|  | STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) == | 
|  | RegExp::RE_EXCEPTION); | 
|  | return res; | 
|  | } | 
|  | // If result is RETRY, the string has changed representation, and we | 
|  | // must restart from scratch. | 
|  | // In this case, it means we must make sure we are prepared to handle | 
|  | // the, potentially, different subject (the string can switch between | 
|  | // being internal and external, and even between being Latin1 and UC16, | 
|  | // but the characters are always the same). | 
|  | is_one_byte = String::IsOneByteRepresentationUnderneath(*subject); | 
|  | } while (true); | 
|  | UNREACHABLE(); | 
|  | } else { | 
|  | DCHECK(regexp->ShouldProduceBytecode()); | 
|  |  | 
|  | do { | 
|  | IrregexpInterpreter::Result result = | 
|  | IrregexpInterpreter::MatchForCallFromRuntime( | 
|  | isolate, regexp, subject, output, output_size, index); | 
|  | DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION, | 
|  | isolate->has_pending_exception()); | 
|  |  | 
|  | switch (result) { | 
|  | case IrregexpInterpreter::SUCCESS: | 
|  | case IrregexpInterpreter::EXCEPTION: | 
|  | case IrregexpInterpreter::FAILURE: | 
|  | case IrregexpInterpreter::FALLBACK_TO_EXPERIMENTAL: | 
|  | return result; | 
|  | case IrregexpInterpreter::RETRY: | 
|  | // The string has changed representation, and we must restart the | 
|  | // match. | 
|  | // We need to reset the tier up to start over with compilation. | 
|  | if (FLAG_regexp_tier_up) regexp->ResetLastTierUpTick(); | 
|  | is_one_byte = String::IsOneByteRepresentationUnderneath(*subject); | 
|  | EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte); | 
|  | break; | 
|  | } | 
|  | } while (true); | 
|  | UNREACHABLE(); | 
|  | } | 
|  | } | 
|  |  | 
|  | MaybeHandle<Object> RegExpImpl::IrregexpExec( | 
|  | Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, | 
|  | int previous_index, Handle<RegExpMatchInfo> last_match_info) { | 
|  | DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 
|  |  | 
|  | subject = String::Flatten(isolate, subject); | 
|  |  | 
|  | #ifdef DEBUG | 
|  | if (FLAG_trace_regexp_bytecodes && regexp->ShouldProduceBytecode()) { | 
|  | String pattern = regexp->Pattern(); | 
|  | PrintF("\n\nRegexp match:   /%s/\n\n", pattern.ToCString().get()); | 
|  | PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get()); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | // For very long subject strings, the regexp interpreter is currently much | 
|  | // slower than the jitted code execution. If the tier-up strategy is turned | 
|  | // on, we want to avoid this performance penalty so we eagerly tier-up if the | 
|  | // subject string length is equal or greater than the given heuristic value. | 
|  | if (FLAG_regexp_tier_up && | 
|  | subject->length() >= JSRegExp::kTierUpForSubjectLengthValue) { | 
|  | regexp->MarkTierUpForNextExec(); | 
|  | if (FLAG_trace_regexp_tier_up) { | 
|  | PrintF( | 
|  | "Forcing tier-up for very long strings in " | 
|  | "RegExpImpl::IrregexpExec\n"); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Prepare space for the return values. | 
|  | int required_registers = | 
|  | RegExpImpl::IrregexpPrepare(isolate, regexp, subject); | 
|  | if (required_registers < 0) { | 
|  | // Compiling failed with an exception. | 
|  | DCHECK(isolate->has_pending_exception()); | 
|  | return MaybeHandle<Object>(); | 
|  | } | 
|  |  | 
|  | int32_t* output_registers = nullptr; | 
|  | if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) { | 
|  | output_registers = NewArray<int32_t>(required_registers); | 
|  | } | 
|  | std::unique_ptr<int32_t[]> auto_release(output_registers); | 
|  | if (output_registers == nullptr) { | 
|  | output_registers = isolate->jsregexp_static_offsets_vector(); | 
|  | } | 
|  |  | 
|  | int res = | 
|  | RegExpImpl::IrregexpExecRaw(isolate, regexp, subject, previous_index, | 
|  | output_registers, required_registers); | 
|  |  | 
|  | if (res == RegExp::RE_SUCCESS) { | 
|  | int capture_count = regexp->CaptureCount(); | 
|  | return RegExp::SetLastMatchInfo(isolate, last_match_info, subject, | 
|  | capture_count, output_registers); | 
|  | } else if (res == RegExp::RE_FALLBACK_TO_EXPERIMENTAL) { | 
|  | return ExperimentalRegExp::OneshotExec(isolate, regexp, subject, | 
|  | previous_index, last_match_info); | 
|  | } else if (res == RegExp::RE_EXCEPTION) { | 
|  | DCHECK(isolate->has_pending_exception()); | 
|  | return MaybeHandle<Object>(); | 
|  | } else { | 
|  | DCHECK(res == RegExp::RE_FAILURE); | 
|  | return isolate->factory()->null_value(); | 
|  | } | 
|  | } | 
|  |  | 
|  | // static | 
|  | Handle<RegExpMatchInfo> RegExp::SetLastMatchInfo( | 
|  | Isolate* isolate, Handle<RegExpMatchInfo> last_match_info, | 
|  | Handle<String> subject, int capture_count, int32_t* match) { | 
|  | // This is the only place where match infos can grow. If, after executing the | 
|  | // regexp, RegExpExecStub finds that the match info is too small, it restarts | 
|  | // execution in RegExpImpl::Exec, which finally grows the match info right | 
|  | // here. | 
|  | Handle<RegExpMatchInfo> result = | 
|  | RegExpMatchInfo::ReserveCaptures(isolate, last_match_info, capture_count); | 
|  | if (*result != *last_match_info) { | 
|  | if (*last_match_info == *isolate->regexp_last_match_info()) { | 
|  | // This inner condition is only needed for special situations like the | 
|  | // regexp fuzzer, where we pass our own custom RegExpMatchInfo to | 
|  | // RegExpImpl::Exec; there actually want to bypass the Isolate's match | 
|  | // info and execute the regexp without side effects. | 
|  | isolate->native_context()->set_regexp_last_match_info(*result); | 
|  | } | 
|  | } | 
|  |  | 
|  | int capture_register_count = | 
|  | JSRegExp::RegistersForCaptureCount(capture_count); | 
|  | DisallowHeapAllocation no_allocation; | 
|  | if (match != nullptr) { | 
|  | for (int i = 0; i < capture_register_count; i += 2) { | 
|  | result->SetCapture(i, match[i]); | 
|  | result->SetCapture(i + 1, match[i + 1]); | 
|  | } | 
|  | } | 
|  | result->SetLastSubject(*subject); | 
|  | result->SetLastInput(*subject); | 
|  | return result; | 
|  | } | 
|  |  | 
|  | // static | 
|  | void RegExp::DotPrintForTesting(const char* label, RegExpNode* node) { | 
|  | DotPrinter::DotPrint(label, node); | 
|  | } | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | // Returns true if we've either generated too much irregex code within this | 
|  | // isolate, or the pattern string is too long. | 
|  | bool TooMuchRegExpCode(Isolate* isolate, Handle<String> pattern) { | 
|  | // Limit the space regexps take up on the heap.  In order to limit this we | 
|  | // would like to keep track of the amount of regexp code on the heap.  This | 
|  | // is not tracked, however.  As a conservative approximation we track the | 
|  | // total regexp code compiled including code that has subsequently been freed | 
|  | // and the total executable memory at any point. | 
|  | static constexpr size_t kRegExpExecutableMemoryLimit = 16 * MB; | 
|  | static constexpr size_t kRegExpCompiledLimit = 1 * MB; | 
|  |  | 
|  | Heap* heap = isolate->heap(); | 
|  | if (pattern->length() > RegExp::kRegExpTooLargeToOptimize) return true; | 
|  | return (isolate->total_regexp_code_generated() > kRegExpCompiledLimit && | 
|  | heap->CommittedMemoryExecutable() > kRegExpExecutableMemoryLimit); | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | // static | 
|  | bool RegExp::CompileForTesting(Isolate* isolate, Zone* zone, | 
|  | RegExpCompileData* data, JSRegExp::Flags flags, | 
|  | Handle<String> pattern, | 
|  | Handle<String> sample_subject, | 
|  | bool is_one_byte) { | 
|  | uint32_t backtrack_limit = JSRegExp::kNoBacktrackLimit; | 
|  | return RegExpImpl::Compile(isolate, zone, data, flags, pattern, | 
|  | sample_subject, is_one_byte, backtrack_limit); | 
|  | } | 
|  |  | 
|  | bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data, | 
|  | JSRegExp::Flags flags, Handle<String> pattern, | 
|  | Handle<String> sample_subject, bool is_one_byte, | 
|  | uint32_t& backtrack_limit) { | 
|  | if (JSRegExp::RegistersForCaptureCount(data->capture_count) > | 
|  | RegExpMacroAssembler::kMaxRegisterCount) { | 
|  | data->error = RegExpError::kTooLarge; | 
|  | return false; | 
|  | } | 
|  |  | 
|  | RegExpCompiler compiler(isolate, zone, data->capture_count, is_one_byte); | 
|  |  | 
|  | if (compiler.optimize()) { | 
|  | compiler.set_optimize(!TooMuchRegExpCode(isolate, pattern)); | 
|  | } | 
|  |  | 
|  | // Sample some characters from the middle of the string. | 
|  | static const int kSampleSize = 128; | 
|  |  | 
|  | sample_subject = String::Flatten(isolate, sample_subject); | 
|  | int chars_sampled = 0; | 
|  | int half_way = (sample_subject->length() - kSampleSize) / 2; | 
|  | for (int i = Max(0, half_way); | 
|  | i < sample_subject->length() && chars_sampled < kSampleSize; | 
|  | i++, chars_sampled++) { | 
|  | compiler.frequency_collator()->CountCharacter(sample_subject->Get(i)); | 
|  | } | 
|  |  | 
|  | data->node = compiler.PreprocessRegExp(data, flags, is_one_byte); | 
|  | data->error = AnalyzeRegExp(isolate, is_one_byte, data->node); | 
|  | if (data->error != RegExpError::kNone) { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Create the correct assembler for the architecture. | 
|  | std::unique_ptr<RegExpMacroAssembler> macro_assembler; | 
|  | if (data->compilation_target == RegExpCompilationTarget::kNative) { | 
|  | // Native regexp implementation. | 
|  | DCHECK(!FLAG_jitless); | 
|  |  | 
|  | NativeRegExpMacroAssembler::Mode mode = | 
|  | is_one_byte ? NativeRegExpMacroAssembler::LATIN1 | 
|  | : NativeRegExpMacroAssembler::UC16; | 
|  |  | 
|  | const int output_register_count = | 
|  | JSRegExp::RegistersForCaptureCount(data->capture_count); | 
|  | #if V8_TARGET_ARCH_IA32 | 
|  | macro_assembler.reset(new RegExpMacroAssemblerIA32(isolate, zone, mode, | 
|  | output_register_count)); | 
|  | #elif V8_TARGET_ARCH_X64 | 
|  | macro_assembler.reset(new RegExpMacroAssemblerX64(isolate, zone, mode, | 
|  | output_register_count)); | 
|  | #elif V8_TARGET_ARCH_ARM | 
|  | macro_assembler.reset(new RegExpMacroAssemblerARM(isolate, zone, mode, | 
|  | output_register_count)); | 
|  | #elif V8_TARGET_ARCH_ARM64 | 
|  | macro_assembler.reset(new RegExpMacroAssemblerARM64(isolate, zone, mode, | 
|  | output_register_count)); | 
|  | #elif V8_TARGET_ARCH_S390 | 
|  | macro_assembler.reset(new RegExpMacroAssemblerS390(isolate, zone, mode, | 
|  | output_register_count)); | 
|  | #elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64 | 
|  | macro_assembler.reset(new RegExpMacroAssemblerPPC(isolate, zone, mode, | 
|  | output_register_count)); | 
|  | #elif V8_TARGET_ARCH_MIPS | 
|  | macro_assembler.reset(new RegExpMacroAssemblerMIPS(isolate, zone, mode, | 
|  | output_register_count)); | 
|  | #elif V8_TARGET_ARCH_MIPS64 | 
|  | macro_assembler.reset(new RegExpMacroAssemblerMIPS(isolate, zone, mode, | 
|  | output_register_count)); | 
|  | #else | 
|  | #error "Unsupported architecture" | 
|  | #endif | 
|  | } else { | 
|  | DCHECK_EQ(data->compilation_target, RegExpCompilationTarget::kBytecode); | 
|  | // Interpreted regexp implementation. | 
|  | macro_assembler.reset(new RegExpBytecodeGenerator(isolate, zone)); | 
|  | } | 
|  |  | 
|  | macro_assembler->set_slow_safe(TooMuchRegExpCode(isolate, pattern)); | 
|  | if (FLAG_enable_experimental_regexp_engine_on_excessive_backtracks && | 
|  | ExperimentalRegExp::CanBeHandled(data->tree, flags, | 
|  | data->capture_count)) { | 
|  | if (backtrack_limit == JSRegExp::kNoBacktrackLimit) { | 
|  | backtrack_limit = FLAG_regexp_backtracks_before_fallback; | 
|  | } else { | 
|  | backtrack_limit = | 
|  | std::min(backtrack_limit, FLAG_regexp_backtracks_before_fallback); | 
|  | } | 
|  | macro_assembler->set_backtrack_limit(backtrack_limit); | 
|  | macro_assembler->set_can_fallback(true); | 
|  | } else { | 
|  | macro_assembler->set_backtrack_limit(backtrack_limit); | 
|  | macro_assembler->set_can_fallback(false); | 
|  | } | 
|  |  | 
|  | // Inserted here, instead of in Assembler, because it depends on information | 
|  | // in the AST that isn't replicated in the Node structure. | 
|  | bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 
|  | bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 
|  | int max_length = data->tree->max_match(); | 
|  | static const int kMaxBacksearchLimit = 1024; | 
|  | if (is_end_anchored && !is_start_anchored && !IsSticky(flags) && | 
|  | max_length < kMaxBacksearchLimit) { | 
|  | macro_assembler->SetCurrentPositionFromEnd(max_length); | 
|  | } | 
|  |  | 
|  | if (IsGlobal(flags)) { | 
|  | RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL; | 
|  | if (data->tree->min_match() > 0) { | 
|  | mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK; | 
|  | } else if (IsUnicode(flags)) { | 
|  | mode = RegExpMacroAssembler::GLOBAL_UNICODE; | 
|  | } | 
|  | macro_assembler->set_global_mode(mode); | 
|  | } | 
|  |  | 
|  | RegExpMacroAssembler* macro_assembler_ptr = macro_assembler.get(); | 
|  | #ifdef DEBUG | 
|  | std::unique_ptr<RegExpMacroAssembler> tracer_macro_assembler; | 
|  | if (FLAG_trace_regexp_assembler) { | 
|  | tracer_macro_assembler.reset( | 
|  | new RegExpMacroAssemblerTracer(isolate, macro_assembler_ptr)); | 
|  | macro_assembler_ptr = tracer_macro_assembler.get(); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | RegExpCompiler::CompilationResult result = compiler.Assemble( | 
|  | isolate, macro_assembler_ptr, data->node, data->capture_count, pattern); | 
|  |  | 
|  | // Code / bytecode printing. | 
|  | { | 
|  | #ifdef ENABLE_DISASSEMBLER | 
|  | if (FLAG_print_regexp_code && | 
|  | data->compilation_target == RegExpCompilationTarget::kNative) { | 
|  | CodeTracer::Scope trace_scope(isolate->GetCodeTracer()); | 
|  | OFStream os(trace_scope.file()); | 
|  | Handle<Code> c = Handle<Code>::cast(result.code); | 
|  | auto pattern_cstring = pattern->ToCString(); | 
|  | c->Disassemble(pattern_cstring.get(), os, isolate); | 
|  | } | 
|  | #endif | 
|  | if (FLAG_print_regexp_bytecode && | 
|  | data->compilation_target == RegExpCompilationTarget::kBytecode) { | 
|  | Handle<ByteArray> bytecode = Handle<ByteArray>::cast(result.code); | 
|  | auto pattern_cstring = pattern->ToCString(); | 
|  | RegExpBytecodeDisassemble(bytecode->GetDataStartAddress(), | 
|  | bytecode->length(), pattern_cstring.get()); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (result.error != RegExpError::kNone) { | 
|  | if (FLAG_correctness_fuzzer_suppressions && | 
|  | result.error == RegExpError::kStackOverflow) { | 
|  | FATAL("Aborting on stack overflow"); | 
|  | } | 
|  | data->error = result.error; | 
|  | } | 
|  |  | 
|  | data->code = result.code; | 
|  | data->register_count = result.num_registers; | 
|  |  | 
|  | return result.Succeeded(); | 
|  | } | 
|  |  | 
|  | RegExpGlobalCache::RegExpGlobalCache(Handle<JSRegExp> regexp, | 
|  | Handle<String> subject, Isolate* isolate) | 
|  | : register_array_(nullptr), | 
|  | register_array_size_(0), | 
|  | regexp_(regexp), | 
|  | subject_(subject), | 
|  | isolate_(isolate) { | 
|  | DCHECK(IsGlobal(regexp->GetFlags())); | 
|  |  | 
|  | switch (regexp_->TypeTag()) { | 
|  | case JSRegExp::NOT_COMPILED: | 
|  | UNREACHABLE(); | 
|  | case JSRegExp::ATOM: { | 
|  | // ATOM regexps do not have a global loop, so we search for one match at | 
|  | // a time. | 
|  | static const int kAtomRegistersPerMatch = 2; | 
|  | registers_per_match_ = kAtomRegistersPerMatch; | 
|  | register_array_size_ = registers_per_match_; | 
|  | break; | 
|  | } | 
|  | case JSRegExp::IRREGEXP: { | 
|  | registers_per_match_ = | 
|  | RegExpImpl::IrregexpPrepare(isolate_, regexp_, subject_); | 
|  | if (registers_per_match_ < 0) { | 
|  | num_matches_ = -1;  // Signal exception. | 
|  | return; | 
|  | } | 
|  | if (regexp->ShouldProduceBytecode()) { | 
|  | // Global loop in interpreted regexp is not implemented.  We choose the | 
|  | // size of the offsets vector so that it can only store one match. | 
|  | register_array_size_ = registers_per_match_; | 
|  | max_matches_ = 1; | 
|  | } else { | 
|  | register_array_size_ = Max(registers_per_match_, | 
|  | Isolate::kJSRegexpStaticOffsetsVectorSize); | 
|  | } | 
|  | break; | 
|  | } | 
|  | case JSRegExp::EXPERIMENTAL: { | 
|  | if (!ExperimentalRegExp::IsCompiled(regexp, isolate_) && | 
|  | !ExperimentalRegExp::Compile(isolate_, regexp)) { | 
|  | DCHECK(isolate->has_pending_exception()); | 
|  | num_matches_ = -1;  // Signal exception. | 
|  | return; | 
|  | } | 
|  | registers_per_match_ = | 
|  | JSRegExp::RegistersForCaptureCount(regexp->CaptureCount()); | 
|  | register_array_size_ = | 
|  | Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | max_matches_ = register_array_size_ / registers_per_match_; | 
|  |  | 
|  | if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | 
|  | register_array_ = NewArray<int32_t>(register_array_size_); | 
|  | } else { | 
|  | register_array_ = isolate->jsregexp_static_offsets_vector(); | 
|  | } | 
|  |  | 
|  | // Set state so that fetching the results the first time triggers a call | 
|  | // to the compiled regexp. | 
|  | current_match_index_ = max_matches_ - 1; | 
|  | num_matches_ = max_matches_; | 
|  | DCHECK_LE(2, registers_per_match_);  // Each match has at least one capture. | 
|  | DCHECK_GE(register_array_size_, registers_per_match_); | 
|  | int32_t* last_match = | 
|  | ®ister_array_[current_match_index_ * registers_per_match_]; | 
|  | last_match[0] = -1; | 
|  | last_match[1] = 0; | 
|  | } | 
|  |  | 
|  | RegExpGlobalCache::~RegExpGlobalCache() { | 
|  | // Deallocate the register array if we allocated it in the constructor | 
|  | // (as opposed to using the existing jsregexp_static_offsets_vector). | 
|  | if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | 
|  | DeleteArray(register_array_); | 
|  | } | 
|  | } | 
|  |  | 
|  | int RegExpGlobalCache::AdvanceZeroLength(int last_index) { | 
|  | if (IsUnicode(regexp_->GetFlags()) && last_index + 1 < subject_->length() && | 
|  | unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) && | 
|  | unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) { | 
|  | // Advance over the surrogate pair. | 
|  | return last_index + 2; | 
|  | } | 
|  | return last_index + 1; | 
|  | } | 
|  |  | 
|  | int32_t* RegExpGlobalCache::FetchNext() { | 
|  | current_match_index_++; | 
|  |  | 
|  | if (current_match_index_ >= num_matches_) { | 
|  | // Current batch of results exhausted. | 
|  | // Fail if last batch was not even fully filled. | 
|  | if (num_matches_ < max_matches_) { | 
|  | num_matches_ = 0;  // Signal failed match. | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | int32_t* last_match = | 
|  | ®ister_array_[(current_match_index_ - 1) * registers_per_match_]; | 
|  | int last_end_index = last_match[1]; | 
|  |  | 
|  | switch (regexp_->TypeTag()) { | 
|  | case JSRegExp::NOT_COMPILED: | 
|  | UNREACHABLE(); | 
|  | case JSRegExp::ATOM: | 
|  | num_matches_ = | 
|  | RegExpImpl::AtomExecRaw(isolate_, regexp_, subject_, last_end_index, | 
|  | register_array_, register_array_size_); | 
|  | break; | 
|  | case JSRegExp::EXPERIMENTAL: { | 
|  | DCHECK(ExperimentalRegExp::IsCompiled(regexp_, isolate_)); | 
|  | DisallowHeapAllocation no_gc; | 
|  | num_matches_ = ExperimentalRegExp::ExecRaw( | 
|  | isolate_, RegExp::kFromRuntime, *regexp_, *subject_, | 
|  | register_array_, register_array_size_, last_end_index); | 
|  | break; | 
|  | } | 
|  | case JSRegExp::IRREGEXP: { | 
|  | int last_start_index = last_match[0]; | 
|  | if (last_start_index == last_end_index) { | 
|  | // Zero-length match. Advance by one code point. | 
|  | last_end_index = AdvanceZeroLength(last_end_index); | 
|  | } | 
|  | if (last_end_index > subject_->length()) { | 
|  | num_matches_ = 0;  // Signal failed match. | 
|  | return nullptr; | 
|  | } | 
|  | num_matches_ = RegExpImpl::IrregexpExecRaw( | 
|  | isolate_, regexp_, subject_, last_end_index, register_array_, | 
|  | register_array_size_); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Fall back to experimental engine if needed and possible. | 
|  | if (num_matches_ == RegExp::kInternalRegExpFallbackToExperimental) { | 
|  | num_matches_ = ExperimentalRegExp::OneshotExecRaw( | 
|  | isolate_, regexp_, subject_, register_array_, register_array_size_, | 
|  | last_end_index); | 
|  | } | 
|  |  | 
|  | if (num_matches_ <= 0) { | 
|  | return nullptr; | 
|  | } | 
|  | current_match_index_ = 0; | 
|  | return register_array_; | 
|  | } else { | 
|  | return ®ister_array_[current_match_index_ * registers_per_match_]; | 
|  | } | 
|  | } | 
|  |  | 
|  | int32_t* RegExpGlobalCache::LastSuccessfulMatch() { | 
|  | int index = current_match_index_ * registers_per_match_; | 
|  | if (num_matches_ == 0) { | 
|  | // After a failed match we shift back by one result. | 
|  | index -= registers_per_match_; | 
|  | } | 
|  | return ®ister_array_[index]; | 
|  | } | 
|  |  | 
|  | Object RegExpResultsCache::Lookup(Heap* heap, String key_string, | 
|  | Object key_pattern, | 
|  | FixedArray* last_match_cache, | 
|  | ResultsCacheType type) { | 
|  | FixedArray cache; | 
|  | if (!key_string.IsInternalizedString()) return Smi::zero(); | 
|  | if (type == STRING_SPLIT_SUBSTRINGS) { | 
|  | DCHECK(key_pattern.IsString()); | 
|  | if (!key_pattern.IsInternalizedString()) return Smi::zero(); | 
|  | cache = heap->string_split_cache(); | 
|  | } else { | 
|  | DCHECK(type == REGEXP_MULTIPLE_INDICES); | 
|  | DCHECK(key_pattern.IsFixedArray()); | 
|  | cache = heap->regexp_multiple_cache(); | 
|  | } | 
|  |  | 
|  | uint32_t hash = key_string.Hash(); | 
|  | uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) & | 
|  | ~(kArrayEntriesPerCacheEntry - 1)); | 
|  | if (cache.get(index + kStringOffset) != key_string || | 
|  | cache.get(index + kPatternOffset) != key_pattern) { | 
|  | index = | 
|  | ((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1)); | 
|  | if (cache.get(index + kStringOffset) != key_string || | 
|  | cache.get(index + kPatternOffset) != key_pattern) { | 
|  | return Smi::zero(); | 
|  | } | 
|  | } | 
|  |  | 
|  | *last_match_cache = FixedArray::cast(cache.get(index + kLastMatchOffset)); | 
|  | return cache.get(index + kArrayOffset); | 
|  | } | 
|  |  | 
|  | void RegExpResultsCache::Enter(Isolate* isolate, Handle<String> key_string, | 
|  | Handle<Object> key_pattern, | 
|  | Handle<FixedArray> value_array, | 
|  | Handle<FixedArray> last_match_cache, | 
|  | ResultsCacheType type) { | 
|  | Factory* factory = isolate->factory(); | 
|  | Handle<FixedArray> cache; | 
|  | if (!key_string->IsInternalizedString()) return; | 
|  | if (type == STRING_SPLIT_SUBSTRINGS) { | 
|  | DCHECK(key_pattern->IsString()); | 
|  | if (!key_pattern->IsInternalizedString()) return; | 
|  | cache = factory->string_split_cache(); | 
|  | } else { | 
|  | DCHECK(type == REGEXP_MULTIPLE_INDICES); | 
|  | DCHECK(key_pattern->IsFixedArray()); | 
|  | cache = factory->regexp_multiple_cache(); | 
|  | } | 
|  |  | 
|  | uint32_t hash = key_string->Hash(); | 
|  | uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) & | 
|  | ~(kArrayEntriesPerCacheEntry - 1)); | 
|  | if (cache->get(index + kStringOffset) == Smi::zero()) { | 
|  | cache->set(index + kStringOffset, *key_string); | 
|  | cache->set(index + kPatternOffset, *key_pattern); | 
|  | cache->set(index + kArrayOffset, *value_array); | 
|  | cache->set(index + kLastMatchOffset, *last_match_cache); | 
|  | } else { | 
|  | uint32_t index2 = | 
|  | ((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1)); | 
|  | if (cache->get(index2 + kStringOffset) == Smi::zero()) { | 
|  | cache->set(index2 + kStringOffset, *key_string); | 
|  | cache->set(index2 + kPatternOffset, *key_pattern); | 
|  | cache->set(index2 + kArrayOffset, *value_array); | 
|  | cache->set(index2 + kLastMatchOffset, *last_match_cache); | 
|  | } else { | 
|  | cache->set(index2 + kStringOffset, Smi::zero()); | 
|  | cache->set(index2 + kPatternOffset, Smi::zero()); | 
|  | cache->set(index2 + kArrayOffset, Smi::zero()); | 
|  | cache->set(index2 + kLastMatchOffset, Smi::zero()); | 
|  | cache->set(index + kStringOffset, *key_string); | 
|  | cache->set(index + kPatternOffset, *key_pattern); | 
|  | cache->set(index + kArrayOffset, *value_array); | 
|  | cache->set(index + kLastMatchOffset, *last_match_cache); | 
|  | } | 
|  | } | 
|  | // If the array is a reasonably short list of substrings, convert it into a | 
|  | // list of internalized strings. | 
|  | if (type == STRING_SPLIT_SUBSTRINGS && value_array->length() < 100) { | 
|  | for (int i = 0; i < value_array->length(); i++) { | 
|  | Handle<String> str(String::cast(value_array->get(i)), isolate); | 
|  | Handle<String> internalized_str = factory->InternalizeString(str); | 
|  | value_array->set(i, *internalized_str); | 
|  | } | 
|  | } | 
|  | // Convert backing store to a copy-on-write array. | 
|  | value_array->set_map_no_write_barrier( | 
|  | ReadOnlyRoots(isolate).fixed_cow_array_map()); | 
|  | } | 
|  |  | 
|  | void RegExpResultsCache::Clear(FixedArray cache) { | 
|  | for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 
|  | cache.set(i, Smi::zero()); | 
|  | } | 
|  | } | 
|  |  | 
|  | }  // namespace internal | 
|  | }  // namespace v8 |