blob: 4c43172ff4008e147e565c15cbc23c2419bd8d34 [file] [log] [blame]
// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_STRINGS_CHAR_PREDICATES_INL_H_
#define V8_STRINGS_CHAR_PREDICATES_INL_H_
#include "src/base/bounds.h"
#include "src/strings/char-predicates.h"
#include "src/utils/utils.h"
namespace v8 {
namespace internal {
// If c is in 'A'-'Z' or 'a'-'z', return its lower-case.
// Else, return something outside of 'A'-'Z' and 'a'-'z'.
// Note: it ignores LOCALE.
inline constexpr int AsciiAlphaToLower(uc32 c) { return c | 0x20; }
inline constexpr bool IsCarriageReturn(uc32 c) { return c == 0x000D; }
inline constexpr bool IsLineFeed(uc32 c) { return c == 0x000A; }
inline constexpr bool IsAsciiIdentifier(uc32 c) {
return IsAlphaNumeric(c) || c == '$' || c == '_';
}
inline constexpr bool IsAlphaNumeric(uc32 c) {
return base::IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c);
}
inline constexpr bool IsDecimalDigit(uc32 c) {
// ECMA-262, 3rd, 7.8.3 (p 16)
return base::IsInRange(c, '0', '9');
}
inline constexpr bool IsHexDigit(uc32 c) {
// ECMA-262, 3rd, 7.6 (p 15)
return IsDecimalDigit(c) || base::IsInRange(AsciiAlphaToLower(c), 'a', 'f');
}
inline constexpr bool IsOctalDigit(uc32 c) {
// ECMA-262, 6th, 7.8.3
return base::IsInRange(c, '0', '7');
}
inline constexpr bool IsNonOctalDecimalDigit(uc32 c) {
return base::IsInRange(c, '8', '9');
}
inline constexpr bool IsBinaryDigit(uc32 c) {
// ECMA-262, 6th, 7.8.3
return c == '0' || c == '1';
}
inline constexpr bool IsAsciiLower(uc32 c) {
return base::IsInRange(c, 'a', 'z');
}
inline constexpr bool IsAsciiUpper(uc32 c) {
return base::IsInRange(c, 'A', 'Z');
}
inline constexpr uc32 ToAsciiUpper(uc32 c) {
return c & ~(IsAsciiLower(c) << 5);
}
inline constexpr uc32 ToAsciiLower(uc32 c) {
return c | (IsAsciiUpper(c) << 5);
}
inline constexpr bool IsRegExpWord(uc32 c) {
return IsAlphaNumeric(c) || c == '_';
}
// Constexpr cache table for character flags.
enum OneByteCharFlags {
kIsIdentifierStart = 1 << 0,
kIsIdentifierPart = 1 << 1,
kIsWhiteSpace = 1 << 2,
kIsWhiteSpaceOrLineTerminator = 1 << 3,
kMaybeLineEnd = 1 << 4
};
// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
// ID_Start. Additionally includes '_' and '$'.
constexpr bool IsOneByteIDStart(uc32 c) {
return c == 0x0024 || (c >= 0x0041 && c <= 0x005A) || c == 0x005F ||
(c >= 0x0061 && c <= 0x007A) || c == 0x00AA || c == 0x00B5 ||
c == 0x00BA || (c >= 0x00C0 && c <= 0x00D6) ||
(c >= 0x00D8 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FF);
}
// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
// ID_Continue. Additionally includes '_' and '$'.
constexpr bool IsOneByteIDContinue(uc32 c) {
return c == 0x0024 || (c >= 0x0030 && c <= 0x0039) || c == 0x005F ||
(c >= 0x0041 && c <= 0x005A) || (c >= 0x0061 && c <= 0x007A) ||
c == 0x00AA || c == 0x00B5 || c == 0x00B7 || c == 0x00BA ||
(c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c <= 0x00F6) ||
(c >= 0x00F8 && c <= 0x00FF);
}
constexpr bool IsOneByteWhitespace(uc32 c) {
return c == '\t' || c == '\v' || c == '\f' || c == ' ' || c == u'\xa0';
}
constexpr uint8_t BuildOneByteCharFlags(uc32 c) {
uint8_t result = 0;
if (IsOneByteIDStart(c) || c == '\\') result |= kIsIdentifierStart;
if (IsOneByteIDContinue(c) || c == '\\') result |= kIsIdentifierPart;
if (IsOneByteWhitespace(c)) {
result |= kIsWhiteSpace | kIsWhiteSpaceOrLineTerminator;
}
if (c == '\r' || c == '\n') {
result |= kIsWhiteSpaceOrLineTerminator | kMaybeLineEnd;
}
// Add markers to identify 0x2028 and 0x2029.
if (c == static_cast<uint8_t>(0x2028) || c == static_cast<uint8_t>(0x2029)) {
result |= kMaybeLineEnd;
}
return result;
}
const constexpr uint8_t kOneByteCharFlags[256] = {
#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N),
INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
#undef BUILD_CHAR_FLAGS
#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N + 128),
INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
#undef BUILD_CHAR_FLAGS
};
bool IsIdentifierStart(uc32 c) {
if (!base::IsInRange(c, 0, 255)) return IsIdentifierStartSlow(c);
DCHECK_EQ(IsIdentifierStartSlow(c),
static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierStart));
return kOneByteCharFlags[c] & kIsIdentifierStart;
}
bool IsIdentifierPart(uc32 c) {
if (!base::IsInRange(c, 0, 255)) return IsIdentifierPartSlow(c);
DCHECK_EQ(IsIdentifierPartSlow(c),
static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierPart));
return kOneByteCharFlags[c] & kIsIdentifierPart;
}
bool IsWhiteSpace(uc32 c) {
if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceSlow(c);
DCHECK_EQ(IsWhiteSpaceSlow(c),
static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpace));
return kOneByteCharFlags[c] & kIsWhiteSpace;
}
bool IsWhiteSpaceOrLineTerminator(uc32 c) {
if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceOrLineTerminatorSlow(c);
DCHECK_EQ(
IsWhiteSpaceOrLineTerminatorSlow(c),
static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator));
return kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator;
}
bool IsLineTerminatorSequence(uc32 c, uc32 next) {
if (kOneByteCharFlags[static_cast<uint8_t>(c)] & kMaybeLineEnd) {
if (c == '\n') return true;
if (c == '\r') return next != '\n';
return base::IsInRange(static_cast<unsigned int>(c), 0x2028u, 0x2029u);
}
return false;
}
} // namespace internal
} // namespace v8
#endif // V8_STRINGS_CHAR_PREDICATES_INL_H_