| /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
| * vim: set ts=8 sts=4 et sw=4 tw=99: |
| * This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| |
| #ifndef vm_RegExpObject_h |
| #define vm_RegExpObject_h |
| |
| #include "mozilla/Attributes.h" |
| |
| #include <stddef.h> |
| #include "jscntxt.h" |
| #include "jsobj.h" |
| |
| #include "gc/Barrier.h" |
| #include "gc/Marking.h" |
| #include "js/TemplateLib.h" |
| #include "vm/MatchPairs.h" |
| |
| #include "yarr/MatchResult.h" |
| #include "yarr/Yarr.h" |
| #if ENABLE_YARR_JIT |
| #include "yarr/YarrJIT.h" |
| #endif |
| #include "yarr/YarrSyntaxChecker.h" |
| |
| /* |
| * JavaScript Regular Expressions |
| * |
| * There are several engine concepts associated with a single logical regexp: |
| * |
| * RegExpObject - The JS-visible object whose .[[Class]] equals "RegExp" |
| * |
| * RegExpShared - The compiled representation of the regexp. |
| * |
| * RegExpCompartment - Owns all RegExpShared instances in a compartment. |
| * |
| * To save memory, a RegExpShared is not created for a RegExpObject until it is |
| * needed for execution. When a RegExpShared needs to be created, it is looked |
| * up in a per-compartment table to allow reuse between objects. Lastly, on |
| * GC, every RegExpShared (that is not active on the callstack) is discarded. |
| * Because of the last point, any code using a RegExpShared (viz., by executing |
| * a regexp) must indicate the RegExpShared is active via RegExpGuard. |
| */ |
| namespace js { |
| |
| enum RegExpRunStatus |
| { |
| RegExpRunStatus_Error, |
| RegExpRunStatus_Success, |
| RegExpRunStatus_Success_NotFound |
| }; |
| |
| class RegExpObjectBuilder |
| { |
| JSContext *cx; |
| Rooted<RegExpObject*> reobj_; |
| |
| bool getOrCreate(); |
| bool getOrCreateClone(RegExpObject *proto); |
| |
| public: |
| RegExpObjectBuilder(JSContext *cx, RegExpObject *reobj = NULL); |
| |
| RegExpObject *reobj() { return reobj_; } |
| |
| RegExpObject *build(HandleAtom source, RegExpFlag flags); |
| RegExpObject *build(HandleAtom source, RegExpShared &shared); |
| |
| /* Perform a VM-internal clone. */ |
| RegExpObject *clone(Handle<RegExpObject*> other, Handle<RegExpObject*> proto); |
| }; |
| |
| JSObject * |
| CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *proto); |
| |
| /* |
| * A RegExpShared is the compiled representation of a regexp. A RegExpShared is |
| * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may |
| * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a |
| * cache so that they can be reused when compiling the same regex string. |
| * |
| * During a GC, the trace hook for RegExpObject clears any pointers to |
| * RegExpShareds so that there will be no dangling pointers when they are |
| * deleted. However, some RegExpShareds are not deleted: |
| * |
| * 1. Any RegExpShared with pointers from the C++ stack is not deleted. |
| * 2. Any RegExpShared which has been embedded into jitcode is not deleted. |
| * This rarely comes into play, as jitcode is usually purged before the |
| * RegExpShared are sweeped. |
| * 3. Any RegExpShared that was installed in a RegExpObject during an |
| * incremental GC is not deleted. This is because the RegExpObject may have |
| * been traced through before the new RegExpShared was installed, in which |
| * case deleting the RegExpShared would turn the RegExpObject's reference |
| * into a dangling pointer |
| * |
| * The activeUseCount and gcNumberWhenUsed fields are used to track these |
| * conditions. |
| * |
| * There are two tables used to track RegExpShareds. map_ implements the cache |
| * and is cleared on every GC. inUse_ logically owns all RegExpShareds in the |
| * compartment and attempts to delete all RegExpShareds that aren't kept alive |
| * by the above conditions on every GC sweep phase. It is necessary to use two |
| * separate tables since map_ *must* be fully cleared on each GC since the Key |
| * points to a JSAtom that can become garbage. |
| */ |
| class RegExpShared |
| { |
| friend class RegExpCompartment; |
| friend class RegExpStatics; |
| friend class RegExpGuard; |
| |
| typedef frontend::TokenStream TokenStream; |
| typedef JSC::Yarr::BytecodePattern BytecodePattern; |
| typedef JSC::Yarr::ErrorCode ErrorCode; |
| typedef JSC::Yarr::YarrPattern YarrPattern; |
| #if ENABLE_YARR_JIT |
| typedef JSC::Yarr::JSGlobalData JSGlobalData; |
| typedef JSC::Yarr::YarrCodeBlock YarrCodeBlock; |
| typedef JSC::Yarr::YarrJITCompileMode YarrJITCompileMode; |
| #endif |
| |
| /* |
| * Source to the RegExp, for lazy compilation. |
| * The source must be rooted while activeUseCount is non-zero |
| * via RegExpGuard or explicit calls to trace(). |
| */ |
| JSAtom * source; |
| |
| RegExpFlag flags; |
| unsigned parenCount; |
| |
| #if ENABLE_YARR_JIT |
| /* Note: Native code is valid only if |codeBlock.isFallBack() == false|. */ |
| YarrCodeBlock codeBlock; |
| #endif |
| BytecodePattern *bytecode; |
| |
| /* Lifetime-preserving variables: see class-level comment above. */ |
| size_t activeUseCount; |
| uint64_t gcNumberWhenUsed; |
| |
| /* Internal functions. */ |
| bool compile(JSContext *cx, bool matchOnly); |
| bool compile(JSContext *cx, JSLinearString &pattern, bool matchOnly); |
| |
| bool compileIfNecessary(JSContext *cx); |
| bool compileMatchOnlyIfNecessary(JSContext *cx); |
| |
| public: |
| RegExpShared(JSRuntime *rt, JSAtom *source, RegExpFlag flags); |
| ~RegExpShared(); |
| |
| /* Explicit trace function for use by the RegExpStatics and JITs. */ |
| void trace(JSTracer *trc) { |
| MarkStringUnbarriered(trc, &source, "regexpshared source"); |
| } |
| |
| /* Static functions to expose some Yarr logic. */ |
| static inline bool isJITRuntimeEnabled(JSContext *cx); |
| static void reportYarrError(JSContext *cx, TokenStream *ts, ErrorCode error); |
| static bool checkSyntax(JSContext *cx, TokenStream *tokenStream, JSLinearString *source); |
| |
| /* Called when a RegExpShared is installed into a RegExpObject. */ |
| inline void prepareForUse(JSContext *cx); |
| |
| /* Primary interface: run this regular expression on the given string. */ |
| RegExpRunStatus execute(JSContext *cx, const jschar *chars, size_t length, |
| size_t *lastIndex, MatchPairs &matches); |
| |
| /* Run the regular expression without collecting matches, for test(). */ |
| RegExpRunStatus executeMatchOnly(JSContext *cx, const jschar *chars, size_t length, |
| size_t *lastIndex, MatchPair &match); |
| |
| /* Accessors */ |
| |
| size_t getParenCount() const { JS_ASSERT(isCompiled()); return parenCount; } |
| void incRef() { activeUseCount++; } |
| void decRef() { JS_ASSERT(activeUseCount > 0); activeUseCount--; } |
| |
| /* Accounts for the "0" (whole match) pair. */ |
| size_t pairCount() const { return getParenCount() + 1; } |
| |
| RegExpFlag getFlags() const { return flags; } |
| bool ignoreCase() const { return flags & IgnoreCaseFlag; } |
| bool global() const { return flags & GlobalFlag; } |
| bool multiline() const { return flags & MultilineFlag; } |
| bool sticky() const { return flags & StickyFlag; } |
| |
| #ifdef ENABLE_YARR_JIT |
| bool hasCode() const { return codeBlock.has16BitCode(); } |
| bool hasMatchOnlyCode() const { return codeBlock.has16BitCodeMatchOnly(); } |
| #else |
| bool hasCode() const { return false; } |
| bool hasMatchOnlyCode() const { return false; } |
| #endif |
| bool hasBytecode() const { return bytecode != NULL; } |
| bool isCompiled() const { return hasBytecode() || hasCode() || hasMatchOnlyCode(); } |
| }; |
| |
| /* |
| * Extend the lifetime of a given RegExpShared to at least the lifetime of |
| * the guard object. See Regular Expression comment at the top. |
| */ |
| class RegExpGuard |
| { |
| RegExpShared *re_; |
| |
| /* |
| * Prevent the RegExp source from being collected: |
| * because RegExpShared objects compile at execution time, the source |
| * must remain rooted for the active lifetime of the RegExpShared. |
| */ |
| RootedAtom source_; |
| |
| RegExpGuard(const RegExpGuard &) MOZ_DELETE; |
| void operator=(const RegExpGuard &) MOZ_DELETE; |
| |
| public: |
| inline RegExpGuard(JSContext *cx); |
| inline RegExpGuard(JSContext *cx, RegExpShared &re); |
| inline ~RegExpGuard(); |
| |
| public: |
| inline void init(RegExpShared &re); |
| inline void release(); |
| |
| bool initialized() const { return !!re_; } |
| RegExpShared *re() const { JS_ASSERT(initialized()); return re_; } |
| RegExpShared *operator->() { return re(); } |
| RegExpShared &operator*() { return *re(); } |
| }; |
| |
| class RegExpCompartment |
| { |
| struct Key { |
| JSAtom *atom; |
| uint16_t flag; |
| |
| Key() {} |
| Key(JSAtom *atom, RegExpFlag flag) |
| : atom(atom), flag(flag) |
| { } |
| |
| typedef Key Lookup; |
| static HashNumber hash(const Lookup &l) { |
| return DefaultHasher<JSAtom *>::hash(l.atom) ^ (l.flag << 1); |
| } |
| static bool match(Key l, Key r) { |
| return l.atom == r.atom && l.flag == r.flag; |
| } |
| }; |
| |
| /* |
| * Cache to reuse RegExpShareds with the same source/flags/etc. The cache |
| * is entirely cleared on each GC. |
| */ |
| typedef HashMap<Key, RegExpShared *, Key, RuntimeAllocPolicy> Map; |
| Map map_; |
| |
| /* |
| * The set of all RegExpShareds in the compartment. On every GC, every |
| * RegExpShared that is not actively being used is deleted and removed from |
| * the set. |
| */ |
| typedef HashSet<RegExpShared *, DefaultHasher<RegExpShared*>, RuntimeAllocPolicy> PendingSet; |
| PendingSet inUse_; |
| |
| public: |
| RegExpCompartment(JSRuntime *rt); |
| ~RegExpCompartment(); |
| |
| bool init(JSContext *cx); |
| void sweep(JSRuntime *rt); |
| |
| bool get(JSContext *cx, JSAtom *source, RegExpFlag flags, RegExpGuard *g); |
| |
| /* Like 'get', but compile 'maybeOpt' (if non-null). */ |
| bool get(JSContext *cx, HandleAtom source, JSString *maybeOpt, RegExpGuard *g); |
| |
| size_t sizeOfExcludingThis(JSMallocSizeOfFun mallocSizeOf); |
| }; |
| |
| class RegExpObject : public JSObject |
| { |
| static const unsigned LAST_INDEX_SLOT = 0; |
| static const unsigned SOURCE_SLOT = 1; |
| static const unsigned GLOBAL_FLAG_SLOT = 2; |
| static const unsigned IGNORE_CASE_FLAG_SLOT = 3; |
| static const unsigned MULTILINE_FLAG_SLOT = 4; |
| static const unsigned STICKY_FLAG_SLOT = 5; |
| |
| public: |
| static const unsigned RESERVED_SLOTS = 6; |
| |
| static Class class_; |
| |
| /* |
| * Note: The regexp statics flags are OR'd into the provided flags, |
| * so this function is really meant for object creation during code |
| * execution, as opposed to during something like XDR. |
| */ |
| static RegExpObject * |
| create(JSContext *cx, RegExpStatics *res, const jschar *chars, size_t length, |
| RegExpFlag flags, frontend::TokenStream *ts); |
| |
| static RegExpObject * |
| createNoStatics(JSContext *cx, const jschar *chars, size_t length, RegExpFlag flags, |
| frontend::TokenStream *ts); |
| |
| static RegExpObject * |
| createNoStatics(JSContext *cx, HandleAtom atom, RegExpFlag flags, frontend::TokenStream *ts); |
| |
| /* Accessors. */ |
| |
| static unsigned lastIndexSlot() { return LAST_INDEX_SLOT; } |
| |
| const Value &getLastIndex() const { return getSlot(LAST_INDEX_SLOT); } |
| inline void setLastIndex(double d); |
| inline void zeroLastIndex(); |
| |
| JSFlatString *toString(JSContext *cx) const; |
| |
| JSAtom *getSource() const { return &getSlot(SOURCE_SLOT).toString()->asAtom(); } |
| inline void setSource(JSAtom *source); |
| |
| RegExpFlag getFlags() const { |
| unsigned flags = 0; |
| flags |= global() ? GlobalFlag : 0; |
| flags |= ignoreCase() ? IgnoreCaseFlag : 0; |
| flags |= multiline() ? MultilineFlag : 0; |
| flags |= sticky() ? StickyFlag : 0; |
| return RegExpFlag(flags); |
| } |
| |
| /* Flags. */ |
| |
| inline void setIgnoreCase(bool enabled); |
| inline void setGlobal(bool enabled); |
| inline void setMultiline(bool enabled); |
| inline void setSticky(bool enabled); |
| bool ignoreCase() const { return getSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); } |
| bool global() const { return getSlot(GLOBAL_FLAG_SLOT).toBoolean(); } |
| bool multiline() const { return getSlot(MULTILINE_FLAG_SLOT).toBoolean(); } |
| bool sticky() const { return getSlot(STICKY_FLAG_SLOT).toBoolean(); } |
| |
| inline void shared(RegExpGuard *g) const; |
| inline bool getShared(JSContext *cx, RegExpGuard *g); |
| inline void setShared(JSContext *cx, RegExpShared &shared); |
| |
| private: |
| friend class RegExpObjectBuilder; |
| |
| /* |
| * Compute the initial shape to associate with fresh RegExp objects, |
| * encoding their initial properties. Return the shape after |
| * changing this regular expression object's last property to it. |
| */ |
| Shape *assignInitialShape(JSContext *cx); |
| |
| bool init(JSContext *cx, HandleAtom source, RegExpFlag flags); |
| |
| /* |
| * Precondition: the syntax for |source| has already been validated. |
| * Side effect: sets the private field. |
| */ |
| bool createShared(JSContext *cx, RegExpGuard *g); |
| RegExpShared *maybeShared() const; |
| |
| /* Call setShared in preference to setPrivate. */ |
| void setPrivate(void *priv) MOZ_DELETE; |
| }; |
| |
| /* |
| * Parse regexp flags. Report an error and return false if an invalid |
| * sequence of flags is encountered (repeat/invalid flag). |
| * |
| * N.B. flagStr must be rooted. |
| */ |
| bool |
| ParseRegExpFlags(JSContext *cx, JSString *flagStr, RegExpFlag *flagsOut); |
| |
| /* |
| * Assuming ObjectClassIs(obj, ESClass_RegExp), return obj's RegExpShared. |
| * |
| * Beware: this RegExpShared can be owned by a compartment other than |
| * cx->compartment. Normal RegExpGuard (which is necessary anyways) |
| * will protect the object but it is important not to assign the return value |
| * to be the private of any RegExpObject. |
| */ |
| inline bool |
| RegExpToShared(JSContext *cx, HandleObject obj, RegExpGuard *g); |
| |
| template<XDRMode mode> |
| bool |
| XDRScriptRegExpObject(XDRState<mode> *xdr, HeapPtrObject *objp); |
| |
| extern JSObject * |
| CloneScriptRegExpObject(JSContext *cx, RegExpObject &re); |
| |
| } /* namespace js */ |
| |
| #endif /* vm_RegExpObject_h */ |