blob: b1d1399eab13166d5dc10be3e1bf8b4329076791 [file] [log] [blame]
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_OBJECTS_JS_REGEXP_H_
#define V8_OBJECTS_JS_REGEXP_H_
#include "src/objects/js-array.h"
#include "torque-generated/bit-fields.h"
// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"
namespace v8 {
namespace internal {
#include "torque-generated/src/objects/js-regexp-tq.inc"
// Regular expressions
// The regular expression holds a single reference to a FixedArray in
// the kDataOffset field.
// The FixedArray contains the following data:
// - tag : type of regexp implementation (not compiled yet, atom or irregexp)
// - reference to the original source string
// - reference to the original flag string
// If it is an atom regexp
// - a reference to a literal string to search for
// If it is an irregexp regexp:
// - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
// used for tracking the last usage (used for regexp code flushing).
// - a reference to code for UC16 inputs (bytecode or compiled), or a smi
// used for tracking the last usage (used for regexp code flushing).
// - max number of registers used by irregexp implementations.
// - number of capture registers (output values) of the regexp.
class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
public:
// Meaning of Type:
// NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
// ATOM: A simple string to match against using an indexOf operation.
// IRREGEXP: Compiled with Irregexp.
// EXPERIMENTAL: Compiled to use the new linear time engine.
enum Type { NOT_COMPILED, ATOM, IRREGEXP, EXPERIMENTAL };
DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()
static base::Optional<Flag> FlagFromChar(char c) {
STATIC_ASSERT(kFlagCount == 7);
// clang-format off
return c == 'g' ? base::Optional<Flag>(kGlobal)
: c == 'i' ? base::Optional<Flag>(kIgnoreCase)
: c == 'm' ? base::Optional<Flag>(kMultiline)
: c == 'y' ? base::Optional<Flag>(kSticky)
: c == 'u' ? base::Optional<Flag>(kUnicode)
: c == 's' ? base::Optional<Flag>(kDotAll)
: (FLAG_enable_experimental_regexp_engine && c == 'l')
? base::Optional<Flag>(kLinear)
: base::Optional<Flag>();
// clang-format on
}
STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone);
STATIC_ASSERT(static_cast<int>(kGlobal) == v8::RegExp::kGlobal);
STATIC_ASSERT(static_cast<int>(kIgnoreCase) == v8::RegExp::kIgnoreCase);
STATIC_ASSERT(static_cast<int>(kMultiline) == v8::RegExp::kMultiline);
STATIC_ASSERT(static_cast<int>(kSticky) == v8::RegExp::kSticky);
STATIC_ASSERT(static_cast<int>(kUnicode) == v8::RegExp::kUnicode);
STATIC_ASSERT(static_cast<int>(kDotAll) == v8::RegExp::kDotAll);
STATIC_ASSERT(static_cast<int>(kLinear) == v8::RegExp::kLinear);
STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount);
DECL_ACCESSORS(last_index, Object)
// If the backtrack limit is set to this marker value, no limit is applied.
static constexpr uint32_t kNoBacktrackLimit = 0;
V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(
Isolate* isolate, Handle<String> source, Flags flags,
uint32_t backtrack_limit = kNoBacktrackLimit);
static Handle<JSRegExp> Copy(Handle<JSRegExp> regexp);
static MaybeHandle<JSRegExp> Initialize(
Handle<JSRegExp> regexp, Handle<String> source, Flags flags,
uint32_t backtrack_limit = kNoBacktrackLimit);
static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
Handle<String> source,
Handle<String> flags_string);
static Flags FlagsFromString(Isolate* isolate, Handle<String> flags,
bool* success);
bool CanTierUp();
bool MarkedForTierUp();
void ResetLastTierUpTick();
void TierUpTick();
void MarkTierUpForNextExec();
inline Type TypeTag() const;
static bool TypeSupportsCaptures(Type t) {
return t == IRREGEXP || t == EXPERIMENTAL;
}
// Maximum number of captures allowed.
static constexpr int kMaxCaptures = 1 << 16;
// Number of captures (without the match itself).
inline int CaptureCount() const;
// Each capture (including the match itself) needs two registers.
static int RegistersForCaptureCount(int count) { return (count + 1) * 2; }
inline int MaxRegisterCount() const;
inline Flags GetFlags();
inline String Pattern();
inline Object CaptureNameMap();
inline Object DataAt(int index) const;
// Set implementation data after the object has been prepared.
inline void SetDataAt(int index, Object value);
inline void SetCaptureNameMap(Handle<FixedArray> capture_name_map);
static constexpr int code_index(bool is_latin1) {
return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex;
}
static constexpr int bytecode_index(bool is_latin1) {
return is_latin1 ? kIrregexpLatin1BytecodeIndex
: kIrregexpUC16BytecodeIndex;
}
// This could be a Smi kUninitializedValue or Code.
V8_EXPORT_PRIVATE Object Code(bool is_latin1) const;
// This could be a Smi kUninitializedValue or ByteArray.
V8_EXPORT_PRIVATE Object Bytecode(bool is_latin1) const;
bool ShouldProduceBytecode();
inline bool HasCompiledCode() const;
inline void DiscardCompiledCodeForSerialization();
uint32_t BacktrackLimit() const;
// Dispatched behavior.
DECL_PRINTER(JSRegExp)
DECL_VERIFIER(JSRegExp)
/* This is already an in-object field. */
// TODO(v8:8944): improve handling of in-object fields
static constexpr int kLastIndexOffset = kHeaderSize;
// Indices in the data array.
static const int kTagIndex = 0;
static const int kSourceIndex = kTagIndex + 1;
static const int kFlagsIndex = kSourceIndex + 1;
static const int kDataIndex = kFlagsIndex + 1;
// TODO(jgruber): Rename kDataIndex to something more appropriate.
// There is no 'data' field, kDataIndex is just a marker for the
// first non-generic index.
static constexpr int kMinDataArrayLength = kDataIndex;
// The data fields are used in different ways depending on the
// value of the tag.
// Atom regexps (literal strings).
static const int kAtomPatternIndex = kDataIndex;
static const int kAtomDataSize = kAtomPatternIndex + 1;
// Irregexp compiled code or trampoline to interpreter for Latin1. If
// compilation fails, this fields hold an exception object that should be
// thrown if the regexp is used again.
static const int kIrregexpLatin1CodeIndex = kDataIndex;
// Irregexp compiled code or trampoline to interpreter for UC16. If
// compilation fails, this fields hold an exception object that should be
// thrown if the regexp is used again.
static const int kIrregexpUC16CodeIndex = kDataIndex + 1;
// Bytecode to interpret the regexp for Latin1. Contains kUninitializedValue
// if we haven't compiled the regexp yet, regexp are always compiled or if
// tier-up has happened (i.e. when kIrregexpLatin1CodeIndex contains native
// irregexp code).
static const int kIrregexpLatin1BytecodeIndex = kDataIndex + 2;
// Bytecode to interpret the regexp for UC16. Contains kUninitializedValue if
// we haven't compiled the regxp yet, regexp are always compiled or if tier-up
// has happened (i.e. when kIrregexpUC16CodeIndex contains native irregexp
// code).
static const int kIrregexpUC16BytecodeIndex = kDataIndex + 3;
// Maximal number of registers used by either Latin1 or UC16.
// Only used to check that there is enough stack space
static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 4;
// Number of captures in the compiled regexp.
static const int kIrregexpCaptureCountIndex = kDataIndex + 5;
// Maps names of named capture groups (at indices 2i) to their corresponding
// (1-based) capture group indices (at indices 2i + 1).
static const int kIrregexpCaptureNameMapIndex = kDataIndex + 6;
// Tier-up ticks are set to the value of the tier-up ticks flag. The value is
// decremented on each execution of the bytecode, so that the tier-up
// happens once the ticks reach zero.
// This value is ignored if the regexp-tier-up flag isn't turned on.
static const int kIrregexpTicksUntilTierUpIndex = kDataIndex + 7;
// A smi containing either the backtracking limit or kNoBacktrackLimit.
// TODO(jgruber): If needed, this limit could be packed into other fields
// above to save space.
static const int kIrregexpBacktrackLimit = kDataIndex + 8;
static const int kIrregexpDataSize = kDataIndex + 9;
// TODO(mbid,v8:10765): At the moment the EXPERIMENTAL data array conforms
// to the format of an IRREGEXP data array, with most fields set to some
// default/uninitialized value. This is because EXPERIMENTAL and IRREGEXP
// regexps take the same code path in `RegExpExecInternal`, which reads off
// various fields from the data array. `RegExpExecInternal` should probably
// distinguish between EXPERIMENTAL and IRREGEXP, and then we can get rid of
// all the IRREGEXP only fields.
static constexpr int kExperimentalDataSize = kIrregexpDataSize;
// In-object fields.
static const int kLastIndexFieldIndex = 0;
static const int kInObjectFieldCount = 1;
// Descriptor array index to important methods in the prototype.
static const int kExecFunctionDescriptorIndex = 1;
static const int kSymbolMatchFunctionDescriptorIndex = 13;
static const int kSymbolMatchAllFunctionDescriptorIndex = 14;
static const int kSymbolReplaceFunctionDescriptorIndex = 15;
static const int kSymbolSearchFunctionDescriptorIndex = 16;
static const int kSymbolSplitFunctionDescriptorIndex = 17;
// The uninitialized value for a regexp code object.
static const int kUninitializedValue = -1;
// The heuristic value for the length of the subject string for which we
// tier-up to the compiler immediately, instead of using the interpreter.
static constexpr int kTierUpForSubjectLengthValue = 1000;
TQ_OBJECT_CONSTRUCTORS(JSRegExp)
};
DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)
// JSRegExpResult is just a JSArray with a specific initial map.
// This initial map adds in-object properties for "index" and "input"
// properties, as assigned by RegExp.prototype.exec, which allows
// faster creation of RegExp exec results.
// This class just holds constants used when creating the result.
// After creation the result must be treated as a JSArray in all regards.
class JSRegExpResult : public JSArray {
public:
DECL_CAST(JSRegExpResult)
// TODO(joshualitt): We would like to add printers and verifiers to
// JSRegExpResult, and maybe JSRegExpResultIndices, but both have the same
// instance type as JSArray.
// Layout description.
DEFINE_FIELD_OFFSET_CONSTANTS(JSArray::kHeaderSize,
TORQUE_GENERATED_JS_REG_EXP_RESULT_FIELDS)
static MaybeHandle<JSArray> GetAndCacheIndices(
Isolate* isolate, Handle<JSRegExpResult> regexp_result);
// Indices of in-object properties.
static const int kIndexIndex = 0;
static const int kInputIndex = 1;
static const int kGroupsIndex = 2;
// Private internal only fields.
static const int kCachedIndicesOrRegExpIndex = 3;
static const int kNamesIndex = 4;
static const int kRegExpInputIndex = 5;
static const int kRegExpLastIndex = 6;
static const int kInObjectPropertyCount = 7;
OBJECT_CONSTRUCTORS(JSRegExpResult, JSArray);
};
// JSRegExpResultIndices is just a JSArray with a specific initial map.
// This initial map adds in-object properties for "group"
// properties, as assigned by RegExp.prototype.exec, which allows
// faster creation of RegExp exec results.
// This class just holds constants used when creating the result.
// After creation the result must be treated as a JSArray in all regards.
class JSRegExpResultIndices : public JSArray {
public:
DECL_CAST(JSRegExpResultIndices)
// Layout description.
DEFINE_FIELD_OFFSET_CONSTANTS(
JSArray::kHeaderSize, TORQUE_GENERATED_JS_REG_EXP_RESULT_INDICES_FIELDS)
static Handle<JSRegExpResultIndices> BuildIndices(
Isolate* isolate, Handle<RegExpMatchInfo> match_info,
Handle<Object> maybe_names);
// Indices of in-object properties.
static const int kGroupsIndex = 0;
static const int kInObjectPropertyCount = 1;
// Descriptor index of groups.
static const int kGroupsDescriptorIndex = 1;
OBJECT_CONSTRUCTORS(JSRegExpResultIndices, JSArray);
};
} // namespace internal
} // namespace v8
#include "src/objects/object-macros-undef.h"
#endif // V8_OBJECTS_JS_REGEXP_H_