blob: 2a4bfb20a6afafed7a89a910c38c3797e3c3e247 [file] [log] [blame]
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99: */
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// A simple interpreter for the Irregexp byte code.
#include "irregexp/RegExpBytecode.h"
#include "irregexp/RegExpMacroAssembler.h"
#include "vm/MatchPairs.h"
using namespace js;
using namespace js::irregexp;
static const size_t kBitsPerByte = 8;
static const size_t kBitsPerByteLog2 = 3;
class MOZ_STACK_CLASS RegExpStackCursor
{
public:
explicit RegExpStackCursor(JSContext* cx)
: cx(cx), cursor(nullptr)
{}
bool init() {
if (!stack.init()) {
ReportOutOfMemory(cx);
return false;
}
cursor = base();
return true;
}
bool push(int32_t value) {
*cursor++ = value;
if (cursor >= stack.limit()) {
int32_t pos = position();
if (!stack.grow()) {
ReportOverRecursed(cx);
return false;
}
setPosition(pos);
}
return true;
}
int32_t pop() {
MOZ_ASSERT(cursor > base());
return *--cursor;
}
int32_t peek() {
MOZ_ASSERT(cursor > base());
return *(cursor - 1);
}
int32_t position() {
MOZ_ASSERT(cursor >= base());
return cursor - base();
}
void setPosition(int32_t position) {
cursor = base() + position;
MOZ_ASSERT(cursor < stack.limit());
}
private:
JSContext* cx;
RegExpStack stack;
int32_t* cursor;
int32_t* base() { return (int32_t*) stack.base(); }
};
static int32_t
Load32Aligned(const uint8_t* pc)
{
MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 3) == 0);
return *reinterpret_cast<const int32_t*>(pc);
}
static int32_t
Load16Aligned(const uint8_t* pc)
{
MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 1) == 0);
return *reinterpret_cast<const uint16_t*>(pc);
}
#define BYTECODE(name) case BC_##name:
template <typename CharT>
RegExpRunStatus
irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t current,
size_t length, MatchPairs* matches)
{
const uint8_t* pc = byteCode;
uint32_t current_char = current ? chars[current - 1] : '\n';
RegExpStackCursor stack(cx);
if (!stack.init())
return RegExpRunStatus_Error;
int32_t numRegisters = Load32Aligned(pc);
pc += 4;
Vector<int32_t, 0, SystemAllocPolicy> registers;
if (!registers.growByUninitialized(numRegisters))
return RegExpRunStatus_Error;
for (size_t i = 0; i < (size_t) numRegisters; i++)
registers[i] = -1;
while (true) {
int32_t insn = Load32Aligned(pc);
switch (insn & BYTECODE_MASK) {
BYTECODE(BREAK)
MOZ_CRASH("Bad bytecode: BREAK");
BYTECODE(PUSH_CP)
if (!stack.push(current))
return RegExpRunStatus_Error;
pc += BC_PUSH_CP_LENGTH;
break;
BYTECODE(PUSH_BT)
if (!stack.push(Load32Aligned(pc + 4)))
return RegExpRunStatus_Error;
pc += BC_PUSH_BT_LENGTH;
break;
BYTECODE(PUSH_REGISTER)
if (!stack.push(registers[insn >> BYTECODE_SHIFT]))
return RegExpRunStatus_Error;
pc += BC_PUSH_REGISTER_LENGTH;
break;
BYTECODE(SET_REGISTER)
registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
pc += BC_SET_REGISTER_LENGTH;
break;
BYTECODE(ADVANCE_REGISTER)
registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
pc += BC_ADVANCE_REGISTER_LENGTH;
break;
BYTECODE(SET_REGISTER_TO_CP)
registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
pc += BC_SET_REGISTER_TO_CP_LENGTH;
break;
BYTECODE(SET_CP_TO_REGISTER)
current = registers[insn >> BYTECODE_SHIFT];
pc += BC_SET_CP_TO_REGISTER_LENGTH;
break;
BYTECODE(SET_REGISTER_TO_SP)
registers[insn >> BYTECODE_SHIFT] = stack.position();
pc += BC_SET_REGISTER_TO_SP_LENGTH;
break;
BYTECODE(SET_SP_TO_REGISTER)
stack.setPosition(registers[insn >> BYTECODE_SHIFT]);
pc += BC_SET_SP_TO_REGISTER_LENGTH;
break;
BYTECODE(POP_CP)
current = stack.pop();
pc += BC_POP_CP_LENGTH;
break;
BYTECODE(POP_BT)
if (!CheckForInterrupt(cx))
return RegExpRunStatus_Error;
pc = byteCode + stack.pop();
break;
BYTECODE(POP_REGISTER)
registers[insn >> BYTECODE_SHIFT] = stack.pop();
pc += BC_POP_REGISTER_LENGTH;
break;
BYTECODE(FAIL)
return RegExpRunStatus_Success_NotFound;
BYTECODE(SUCCEED)
if (matches)
memcpy(matches->pairsRaw(), registers.begin(), matches->length() * 2 * sizeof(int32_t));
return RegExpRunStatus_Success;
BYTECODE(ADVANCE_CP)
current += insn >> BYTECODE_SHIFT;
pc += BC_ADVANCE_CP_LENGTH;
break;
BYTECODE(GOTO)
pc = byteCode + Load32Aligned(pc + 4);
break;
BYTECODE(ADVANCE_CP_AND_GOTO)
current += insn >> BYTECODE_SHIFT;
pc = byteCode + Load32Aligned(pc + 4);
break;
BYTECODE(CHECK_GREEDY)
if ((int32_t)current == stack.peek()) {
stack.pop();
pc = byteCode + Load32Aligned(pc + 4);
} else {
pc += BC_CHECK_GREEDY_LENGTH;
}
break;
BYTECODE(LOAD_CURRENT_CHAR) {
size_t pos = current + (insn >> BYTECODE_SHIFT);
if (pos >= length) {
pc = byteCode + Load32Aligned(pc + 4);
} else {
current_char = chars[pos];
pc += BC_LOAD_CURRENT_CHAR_LENGTH;
}
break;
}
BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
int pos = current + (insn >> BYTECODE_SHIFT);
current_char = chars[pos];
pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
break;
}
BYTECODE(LOAD_2_CURRENT_CHARS) {
size_t pos = current + (insn >> BYTECODE_SHIFT);
if (pos + 2 > length) {
pc = byteCode + Load32Aligned(pc + 4);
} else {
CharT next = chars[pos + 1];
current_char = (chars[pos] | (next << (kBitsPerByte * sizeof(CharT))));
pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
}
break;
}
BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
int pos = current + (insn >> BYTECODE_SHIFT);
char16_t next = chars[pos + 1];
current_char = (chars[pos] | (next << (kBitsPerByte * sizeof(char16_t))));
pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
break;
}
BYTECODE(LOAD_4_CURRENT_CHARS)
MOZ_CRASH("ASCII handling implemented");
BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED)
MOZ_CRASH("ASCII handling implemented");
BYTECODE(CHECK_4_CHARS) {
uint32_t c = Load32Aligned(pc + 4);
if (c == current_char)
pc = byteCode + Load32Aligned(pc + 8);
else
pc += BC_CHECK_4_CHARS_LENGTH;
break;
}
BYTECODE(CHECK_CHAR) {
uint32_t c = (insn >> BYTECODE_SHIFT);
if (c == current_char)
pc = byteCode + Load32Aligned(pc + 4);
else
pc += BC_CHECK_CHAR_LENGTH;
break;
}
BYTECODE(CHECK_NOT_4_CHARS) {
uint32_t c = Load32Aligned(pc + 4);
if (c != current_char)
pc = byteCode + Load32Aligned(pc + 8);
else
pc += BC_CHECK_NOT_4_CHARS_LENGTH;
break;
}
BYTECODE(CHECK_NOT_CHAR) {
uint32_t c = (insn >> BYTECODE_SHIFT);
if (c != current_char)
pc = byteCode + Load32Aligned(pc + 4);
else
pc += BC_CHECK_NOT_CHAR_LENGTH;
break;
}
BYTECODE(AND_CHECK_4_CHARS) {
uint32_t c = Load32Aligned(pc + 4);
if (c == (current_char & Load32Aligned(pc + 8)))
pc = byteCode + Load32Aligned(pc + 12);
else
pc += BC_AND_CHECK_4_CHARS_LENGTH;
break;
}
BYTECODE(AND_CHECK_CHAR) {
uint32_t c = (insn >> BYTECODE_SHIFT);
if (c == (current_char & Load32Aligned(pc + 4)))
pc = byteCode + Load32Aligned(pc + 8);
else
pc += BC_AND_CHECK_CHAR_LENGTH;
break;
}
BYTECODE(AND_CHECK_NOT_4_CHARS) {
uint32_t c = Load32Aligned(pc + 4);
if (c != (current_char & Load32Aligned(pc + 8)))
pc = byteCode + Load32Aligned(pc + 12);
else
pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
break;
}
BYTECODE(AND_CHECK_NOT_CHAR) {
uint32_t c = (insn >> BYTECODE_SHIFT);
if (c != (current_char & Load32Aligned(pc + 4)))
pc = byteCode + Load32Aligned(pc + 8);
else
pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
break;
}
BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
uint32_t c = (insn >> BYTECODE_SHIFT);
uint32_t minus = Load16Aligned(pc + 4);
uint32_t mask = Load16Aligned(pc + 6);
if (c != ((current_char - minus) & mask))
pc = byteCode + Load32Aligned(pc + 8);
else
pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
break;
}
BYTECODE(CHECK_CHAR_IN_RANGE) {
uint32_t from = Load16Aligned(pc + 4);
uint32_t to = Load16Aligned(pc + 6);
if (from <= current_char && current_char <= to)
pc = byteCode + Load32Aligned(pc + 8);
else
pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
break;
}
BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
uint32_t from = Load16Aligned(pc + 4);
uint32_t to = Load16Aligned(pc + 6);
if (from > current_char || current_char > to)
pc = byteCode + Load32Aligned(pc + 8);
else
pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
break;
}
BYTECODE(CHECK_BIT_IN_TABLE) {
int mask = RegExpMacroAssembler::kTableMask;
uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
int bit = (current_char & (kBitsPerByte - 1));
if ((b & (1 << bit)) != 0)
pc = byteCode + Load32Aligned(pc + 4);
else
pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
break;
}
BYTECODE(CHECK_LT) {
uint32_t limit = (insn >> BYTECODE_SHIFT);
if (current_char < limit)
pc = byteCode + Load32Aligned(pc + 4);
else
pc += BC_CHECK_LT_LENGTH;
break;
}
BYTECODE(CHECK_GT) {
uint32_t limit = (insn >> BYTECODE_SHIFT);
if (current_char > limit)
pc = byteCode + Load32Aligned(pc + 4);
else
pc += BC_CHECK_GT_LENGTH;
break;
}
BYTECODE(CHECK_REGISTER_LT)
if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4))
pc = byteCode + Load32Aligned(pc + 8);
else
pc += BC_CHECK_REGISTER_LT_LENGTH;
break;
BYTECODE(CHECK_REGISTER_GE)
if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4))
pc = byteCode + Load32Aligned(pc + 8);
else
pc += BC_CHECK_REGISTER_GE_LENGTH;
break;
BYTECODE(CHECK_REGISTER_EQ_POS)
if (registers[insn >> BYTECODE_SHIFT] == (int32_t) current)
pc = byteCode + Load32Aligned(pc + 4);
else
pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
break;
BYTECODE(CHECK_NOT_REGS_EQUAL)
if (registers[insn >> BYTECODE_SHIFT] == registers[Load32Aligned(pc + 4)])
pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
else
pc = byteCode + Load32Aligned(pc + 8);
break;
BYTECODE(CHECK_NOT_BACK_REF) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from < 0 || len <= 0) {
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
if (current + len > length) {
pc = byteCode + Load32Aligned(pc + 4);
break;
} else {
int i;
for (i = 0; i < len; i++) {
if (chars[from + i] != chars[current + i]) {
pc = byteCode + Load32Aligned(pc + 4);
break;
}
}
if (i < len) break;
current += len;
}
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from < 0 || len <= 0) {
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
break;
}
if (current + len > length) {
pc = byteCode + Load32Aligned(pc + 4);
break;
}
if (CaseInsensitiveCompareStrings(chars + from, chars + current, len * sizeof(CharT))) {
current += len;
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
} else {
pc = byteCode + Load32Aligned(pc + 4);
}
break;
}
BYTECODE(CHECK_AT_START)
if (current == 0)
pc = byteCode + Load32Aligned(pc + 4);
else
pc += BC_CHECK_AT_START_LENGTH;
break;
BYTECODE(CHECK_NOT_AT_START)
if (current == 0)
pc += BC_CHECK_NOT_AT_START_LENGTH;
else
pc = byteCode + Load32Aligned(pc + 4);
break;
BYTECODE(SET_CURRENT_POSITION_FROM_END) {
size_t by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
if (length - current > by) {
current = length - by;
current_char = chars[current - 1];
}
pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
break;
}
default:
MOZ_CRASH("Bad bytecode");
}
}
}
template RegExpRunStatus
irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const Latin1Char* chars, size_t current,
size_t length, MatchPairs* matches);
template RegExpRunStatus
irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const char16_t* chars, size_t current,
size_t length, MatchPairs* matches);