|
Lines 732-750
ALWAYS_INLINE void Lexer<T>::skipWhitespace()
a/Source/JavaScriptCore/parser/Lexer.cpp_sec1
|
| 732 |
shift(); |
732 |
shift(); |
| 733 |
} |
733 |
} |
| 734 |
|
734 |
|
| 735 |
static NEVER_INLINE bool isNonLatin1IdentStart(UChar c) |
735 |
static bool isNonLatin1IdentStart(UChar32 c) |
| 736 |
{ |
736 |
{ |
| 737 |
return u_hasBinaryProperty(c, UCHAR_ID_START); |
737 |
return u_hasBinaryProperty(c, UCHAR_ID_START); |
| 738 |
} |
738 |
} |
| 739 |
|
739 |
|
| 740 |
static inline bool isIdentStart(LChar c) |
740 |
template<typename CharacterType> |
|
|
741 |
static ALWAYS_INLINE bool isIdentStart(CharacterType c) |
| 741 |
{ |
742 |
{ |
| 742 |
return typesOfLatin1Characters[c] == CharacterIdentifierStart; |
743 |
static_assert(std::is_same_v<CharacterType, LChar> || std::is_same_v<CharacterType, UChar32>, "Call isSingleCharacterIdentStart for UChars that don't need to check for surrogate pairs"); |
|
|
744 |
if (!isLatin1(c)) |
| 745 |
return isNonLatin1IdentStart(c); |
| 746 |
return typesOfLatin1Characters[static_cast<LChar>(c)] == CharacterIdentifierStart; |
| 743 |
} |
747 |
} |
| 744 |
|
748 |
|
| 745 |
static inline bool isIdentStart(UChar32 c) |
749 |
static ALWAYS_INLINE bool isSingleCharacterIdentStart(UChar c) |
| 746 |
{ |
750 |
{ |
| 747 |
return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c); |
751 |
if (LIKELY(isLatin1(c))) |
|
|
752 |
return isIdentStart(static_cast<LChar>(c)); |
| 753 |
return !U16_IS_SURROGATE(c) && isIdentStart(static_cast<UChar32>(c)); |
| 754 |
} |
| 755 |
|
| 756 |
static ALWAYS_INLINE bool cannotBeIdentStart(LChar c) |
| 757 |
{ |
| 758 |
return !isIdentStart(c) && c != '\\'; |
| 759 |
} |
| 760 |
|
| 761 |
static ALWAYS_INLINE bool cannotBeIdentStart(UChar c) |
| 762 |
{ |
| 763 |
if (LIKELY(isLatin1(c))) |
| 764 |
return cannotBeIdentStart(static_cast<LChar>(c)); |
| 765 |
return Lexer<UChar>::isWhiteSpace(c) || Lexer<UChar>::isLineTerminator(c); |
| 748 |
} |
766 |
} |
| 749 |
|
767 |
|
| 750 |
static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c) |
768 |
static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c) |
|
Lines 752-818
static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
a/Source/JavaScriptCore/parser/Lexer.cpp_sec2
|
| 752 |
return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) || c == 0x200C || c == 0x200D; |
770 |
return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) || c == 0x200C || c == 0x200D; |
| 753 |
} |
771 |
} |
| 754 |
|
772 |
|
| 755 |
static ALWAYS_INLINE bool isIdentPart(LChar c) |
773 |
template<typename CharacterType> |
|
|
774 |
static ALWAYS_INLINE bool isIdentPart(CharacterType c) |
| 756 |
{ |
775 |
{ |
|
|
776 |
static_assert(std::is_same_v<CharacterType, LChar> || std::is_same_v<CharacterType, UChar32>, "Call isSingleCharacterIdentPart for UChars that don't need to check for surrogate pairs"); |
| 777 |
if (!isLatin1(c)) |
| 778 |
return isNonLatin1IdentPart(c); |
| 779 |
|
| 757 |
// Character types are divided into two groups depending on whether they can be part of an |
780 |
// Character types are divided into two groups depending on whether they can be part of an |
| 758 |
// identifier or not. Those whose type value is less or equal than CharacterOtherIdentifierPart can be |
781 |
// identifier or not. Those whose type value is less or equal than CharacterOtherIdentifierPart can be |
| 759 |
// part of an identifier. (See the CharacterType definition for more details.) |
782 |
// part of an identifier. (See the CharacterType definition for more details.) |
| 760 |
return typesOfLatin1Characters[c] <= CharacterOtherIdentifierPart; |
783 |
return typesOfLatin1Characters[static_cast<LChar>(c)] <= CharacterOtherIdentifierPart; |
| 761 |
} |
784 |
} |
| 762 |
|
785 |
|
| 763 |
static ALWAYS_INLINE bool isIdentPart(UChar32 c) |
786 |
static ALWAYS_INLINE bool isSingleCharacterIdentPart(UChar c) |
| 764 |
{ |
787 |
{ |
| 765 |
return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c); |
788 |
if (LIKELY(isLatin1(c))) |
|
|
789 |
return isIdentPart(static_cast<LChar>(c)); |
| 790 |
return !U16_IS_SURROGATE(c) && isIdentPart(static_cast<UChar32>(c)); |
| 766 |
} |
791 |
} |
| 767 |
|
792 |
|
| 768 |
static ALWAYS_INLINE bool isIdentPart(UChar c) |
793 |
static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(LChar c) |
| 769 |
{ |
794 |
{ |
| 770 |
return isIdentPart(static_cast<UChar32>(c)); |
795 |
return !isIdentPart(c) && c != '\\'; |
| 771 |
} |
796 |
} |
| 772 |
|
797 |
|
| 773 |
template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd) |
798 |
// NOTE: This may give give false negatives (for non-ascii) but won't give false posititves. |
|
|
799 |
// This means it can be used to detect the end of a keyword (all keywords are ascii) |
| 800 |
static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(UChar c) |
| 774 |
{ |
801 |
{ |
| 775 |
if (isIdentPart(code[0])) |
802 |
if (LIKELY(isLatin1(c))) |
| 776 |
return true; |
803 |
return cannotBeIdentPartOrEscapeStart(static_cast<LChar>(c)); |
| 777 |
|
804 |
return Lexer<UChar>::isWhiteSpace(c) || Lexer<UChar>::isLineTerminator(c); |
| 778 |
// Shortest sequence handled below is \u{0}, which is 5 characters. |
|
|
| 779 |
if (!(code[0] == '\\' && codeEnd - code >= 5 && code[1] == 'u')) |
| 780 |
return false; |
| 781 |
|
| 782 |
if (code[2] == '{') { |
| 783 |
UChar32 codePoint = 0; |
| 784 |
const CharacterType* pointer; |
| 785 |
for (pointer = &code[3]; pointer < codeEnd; ++pointer) { |
| 786 |
auto digit = *pointer; |
| 787 |
if (!isASCIIHexDigit(digit)) |
| 788 |
break; |
| 789 |
codePoint = (codePoint << 4) | toASCIIHexValue(digit); |
| 790 |
if (codePoint > UCHAR_MAX_VALUE) |
| 791 |
return false; |
| 792 |
} |
| 793 |
return isIdentPart(codePoint) && pointer < codeEnd && *pointer == '}'; |
| 794 |
} |
| 795 |
|
| 796 |
// Shortest sequence handled below is \uXXXX, which is 6 characters. |
| 797 |
if (codeEnd - code < 6) |
| 798 |
return false; |
| 799 |
|
| 800 |
auto character1 = code[2]; |
| 801 |
auto character2 = code[3]; |
| 802 |
auto character3 = code[4]; |
| 803 |
auto character4 = code[5]; |
| 804 |
return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4) |
| 805 |
&& isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4)); |
| 806 |
} |
805 |
} |
| 807 |
|
806 |
|
| 808 |
static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd) |
807 |
|
|
|
808 |
template<> |
| 809 |
ALWAYS_INLINE UChar32 Lexer<LChar>::currentCodePoint() const |
| 809 |
{ |
810 |
{ |
| 810 |
return isIdentPartIncludingEscapeTemplate(code, codeEnd); |
811 |
return m_current; |
| 811 |
} |
812 |
} |
| 812 |
|
813 |
|
| 813 |
static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd) |
814 |
template<> |
|
|
815 |
ALWAYS_INLINE UChar32 Lexer<UChar>::currentCodePoint() const |
| 814 |
{ |
816 |
{ |
| 815 |
return isIdentPartIncludingEscapeTemplate(code, codeEnd); |
817 |
ASSERT_WITH_MESSAGE(!isIdentStart(static_cast<UChar32>(U_SENTINEL)), "error values shouldn't appear as a valid identifier start code point"); |
|
|
818 |
if (!U16_IS_SURROGATE(m_current)) |
| 819 |
return m_current; |
| 820 |
|
| 821 |
UChar trail = peek(1); |
| 822 |
if (UNLIKELY(!U16_IS_LEAD(m_current) || !U16_IS_SURROGATE_TRAIL(trail))) |
| 823 |
return U_SENTINEL; |
| 824 |
|
| 825 |
UChar32 codePoint = U16_GET_SUPPLEMENTARY(m_current, trail); |
| 826 |
return codePoint; |
| 816 |
} |
827 |
} |
| 817 |
|
828 |
|
| 818 |
template<typename CharacterType> |
829 |
template<typename CharacterType> |
|
Lines 952-966
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::p
a/Source/JavaScriptCore/parser/Lexer.cpp_sec3
|
| 952 |
} |
963 |
} |
| 953 |
|
964 |
|
| 954 |
const LChar* identifierStart = currentSourcePtr(); |
965 |
const LChar* identifierStart = currentSourcePtr(); |
| 955 |
unsigned identifierLineStart = currentLineStartOffset(); |
966 |
ASSERT(isIdentStart(m_current) || m_current == '\\'); |
| 956 |
|
|
|
| 957 |
while (isIdentPart(m_current)) |
967 |
while (isIdentPart(m_current)) |
| 958 |
shift(); |
968 |
shift(); |
| 959 |
|
969 |
|
| 960 |
if (UNLIKELY(m_current == '\\')) { |
970 |
if (UNLIKELY(m_current == '\\')) |
| 961 |
setOffsetFromSourcePtr(identifierStart, identifierLineStart); |
971 |
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode, identifierStart); |
| 962 |
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode); |
|
|
| 963 |
} |
| 964 |
|
972 |
|
| 965 |
const Identifier* ident = nullptr; |
973 |
const Identifier* ident = nullptr; |
| 966 |
|
974 |
|
|
Lines 1007-1012
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::p
a/Source/JavaScriptCore/parser/Lexer.cpp_sec4
|
| 1007 |
template <> |
1015 |
template <> |
| 1008 |
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode) |
1016 |
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode) |
| 1009 |
{ |
1017 |
{ |
|
|
1018 |
ASSERT(!m_parsingBuiltinFunction); |
| 1010 |
tokenData->escaped = false; |
1019 |
tokenData->escaped = false; |
| 1011 |
const ptrdiff_t remaining = m_codeEnd - m_code; |
1020 |
const ptrdiff_t remaining = m_codeEnd - m_code; |
| 1012 |
if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) { |
1021 |
if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) { |
|
Lines 1016-1085
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::p
a/Source/JavaScriptCore/parser/Lexer.cpp_sec5
|
| 1016 |
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword; |
1025 |
return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword; |
| 1017 |
} |
1026 |
} |
| 1018 |
} |
1027 |
} |
| 1019 |
|
|
|
| 1020 |
bool isPrivateName = m_current == '@' && m_parsingBuiltinFunction; |
| 1021 |
bool isWellKnownSymbol = false; |
| 1022 |
if (isPrivateName) { |
| 1023 |
ASSERT(m_parsingBuiltinFunction); |
| 1024 |
shift(); |
| 1025 |
if (m_current == '@') { |
| 1026 |
isWellKnownSymbol = true; |
| 1027 |
shift(); |
| 1028 |
} |
| 1029 |
} |
| 1030 |
|
| 1031 |
|
1028 |
|
| 1032 |
const UChar* identifierStart = currentSourcePtr(); |
1029 |
const UChar* identifierStart = currentSourcePtr(); |
| 1033 |
int identifierLineStart = currentLineStartOffset(); |
|
|
| 1034 |
|
| 1035 |
UChar orAllChars = 0; |
1030 |
UChar orAllChars = 0; |
| 1036 |
|
1031 |
ASSERT(isSingleCharacterIdentStart(m_current) || U16_IS_SURROGATE(m_current) || m_current == '\\'); |
| 1037 |
while (isIdentPart(m_current)) { |
1032 |
while (isSingleCharacterIdentPart(m_current)) { |
| 1038 |
orAllChars |= m_current; |
1033 |
orAllChars |= m_current; |
| 1039 |
shift(); |
1034 |
shift(); |
| 1040 |
} |
1035 |
} |
| 1041 |
|
1036 |
|
| 1042 |
if (UNLIKELY(m_current == '\\')) { |
1037 |
if (UNLIKELY(U16_IS_SURROGATE(m_current) || m_current == '\\')) |
| 1043 |
ASSERT(!isPrivateName); |
1038 |
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode, identifierStart); |
| 1044 |
setOffsetFromSourcePtr(identifierStart, identifierLineStart); |
|
|
| 1045 |
return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode); |
| 1046 |
} |
| 1047 |
|
| 1048 |
bool isAll8Bit = false; |
| 1049 |
|
| 1050 |
if (!(orAllChars & ~0xff)) |
| 1051 |
isAll8Bit = true; |
| 1052 |
|
1039 |
|
|
|
1040 |
bool isAll8Bit = !(orAllChars & ~0xff); |
| 1053 |
const Identifier* ident = nullptr; |
1041 |
const Identifier* ident = nullptr; |
| 1054 |
|
1042 |
|
| 1055 |
if (shouldCreateIdentifier || m_parsingBuiltinFunction) { |
1043 |
if (shouldCreateIdentifier) { |
| 1056 |
int identifierLength = currentSourcePtr() - identifierStart; |
1044 |
int identifierLength = currentSourcePtr() - identifierStart; |
| 1057 |
if (m_parsingBuiltinFunction && isPrivateName) { |
1045 |
if (isAll8Bit) |
| 1058 |
if (isWellKnownSymbol) |
1046 |
ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength); |
| 1059 |
ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->builtinNames().lookUpWellKnownSymbol(identifierStart, identifierLength)); |
1047 |
else |
| 1060 |
else |
1048 |
ident = makeIdentifier(identifierStart, identifierLength); |
| 1061 |
ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->builtinNames().lookUpPrivateName(identifierStart, identifierLength)); |
|
|
| 1062 |
if (!ident) |
| 1063 |
return INVALID_PRIVATE_NAME_ERRORTOK; |
| 1064 |
} else { |
| 1065 |
if (isAll8Bit) |
| 1066 |
ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength); |
| 1067 |
else |
| 1068 |
ident = makeIdentifier(identifierStart, identifierLength); |
| 1069 |
if (m_parsingBuiltinFunction) { |
| 1070 |
if (!isSafeBuiltinIdentifier(m_vm, ident)) { |
| 1071 |
m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions."); |
| 1072 |
return ERRORTOK; |
| 1073 |
} |
| 1074 |
if (*ident == m_vm.propertyNames->undefinedKeyword) |
| 1075 |
tokenData->ident = &m_vm.propertyNames->undefinedPrivateName; |
| 1076 |
} |
| 1077 |
} |
| 1078 |
tokenData->ident = ident; |
1049 |
tokenData->ident = ident; |
| 1079 |
} else |
1050 |
} else |
| 1080 |
tokenData->ident = nullptr; |
1051 |
tokenData->ident = nullptr; |
| 1081 |
|
1052 |
|
| 1082 |
if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) && !isPrivateName) { |
1053 |
if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords))) { |
| 1083 |
ASSERT(shouldCreateIdentifier); |
1054 |
ASSERT(shouldCreateIdentifier); |
| 1084 |
if (remaining < maxTokenLength) { |
1055 |
if (remaining < maxTokenLength) { |
| 1085 |
const HashTableValue* entry = JSC::mainTable.entry(*ident); |
1056 |
const HashTableValue* entry = JSC::mainTable.entry(*ident); |
|
Lines 1095-1143
template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::p
a/Source/JavaScriptCore/parser/Lexer.cpp_sec6
|
| 1095 |
return IDENT; |
1066 |
return IDENT; |
| 1096 |
} |
1067 |
} |
| 1097 |
|
1068 |
|
| 1098 |
template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode) |
1069 |
template<typename CharacterType> |
|
|
1070 |
template<bool shouldCreateIdentifier> |
| 1071 |
JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode, const CharacterType* identifierStart) |
| 1099 |
{ |
1072 |
{ |
| 1100 |
tokenData->escaped = true; |
1073 |
ASSERT(U16_IS_SURROGATE(m_current) || m_current == '\\'); |
| 1101 |
auto identifierStart = currentSourcePtr(); |
1074 |
ASSERT(m_buffer16.isEmpty()); |
| 1102 |
bool bufferRequired = false; |
1075 |
ASSERT(!tokenData->escaped); |
| 1103 |
|
1076 |
|
| 1104 |
while (true) { |
1077 |
auto fillBuffer = [&] (bool isStart = false) { |
| 1105 |
if (LIKELY(isIdentPart(m_current))) { |
1078 |
// \uXXXX unicode characters or Surrogate pairs. |
|
|
1079 |
if (identifierStart != currentSourcePtr()) |
| 1080 |
m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart); |
| 1081 |
|
| 1082 |
if (m_current == '\\') { |
| 1083 |
tokenData->escaped = true; |
| 1106 |
shift(); |
1084 |
shift(); |
| 1107 |
continue; |
1085 |
if (UNLIKELY(m_current != 'u')) |
|
|
1086 |
return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK; |
| 1087 |
shift(); |
| 1088 |
auto character = parseUnicodeEscape(); |
| 1089 |
if (UNLIKELY(!character.isValid())) |
| 1090 |
return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK; |
| 1091 |
if (UNLIKELY(isStart ? !isIdentStart(character.value()) : !isIdentPart(character.value()))) |
| 1092 |
return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK; |
| 1093 |
if (shouldCreateIdentifier) |
| 1094 |
recordUnicodeCodePoint(character.value()); |
| 1095 |
identifierStart = currentSourcePtr(); |
| 1096 |
return IDENT; |
| 1108 |
} |
1097 |
} |
| 1109 |
if (LIKELY(m_current != '\\')) |
|
|
| 1110 |
break; |
| 1111 |
|
1098 |
|
| 1112 |
// \uXXXX unicode characters. |
1099 |
ASSERT(U16_IS_SURROGATE(m_current)); |
| 1113 |
bufferRequired = true; |
1100 |
if (UNLIKELY(!U16_IS_SURROGATE_LEAD(m_current))) |
| 1114 |
if (identifierStart != currentSourcePtr()) |
1101 |
return INVALID_UNICODE_ENCODING_ERRORTOK; |
| 1115 |
m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart); |
1102 |
|
|
|
1103 |
UChar32 codePoint = currentCodePoint(); |
| 1104 |
if (UNLIKELY(codePoint == U_SENTINEL)) |
| 1105 |
return INVALID_UNICODE_ENCODING_ERRORTOK; |
| 1106 |
if (UNLIKELY(isStart ? !isNonLatin1IdentStart(codePoint) : !isNonLatin1IdentPart(codePoint))) |
| 1107 |
return INVALID_IDENTIFIER_UNICODE_ERRORTOK; |
| 1108 |
append16(m_code, 2); |
| 1116 |
shift(); |
1109 |
shift(); |
| 1117 |
if (UNLIKELY(m_current != 'u')) |
|
|
| 1118 |
return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK; |
| 1119 |
shift(); |
1110 |
shift(); |
| 1120 |
auto character = parseUnicodeEscape(); |
|
|
| 1121 |
if (UNLIKELY(!character.isValid())) |
| 1122 |
return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK; |
| 1123 |
if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value()))) |
| 1124 |
return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK; |
| 1125 |
if (shouldCreateIdentifier) |
| 1126 |
recordUnicodeCodePoint(character.value()); |
| 1127 |
identifierStart = currentSourcePtr(); |
1111 |
identifierStart = currentSourcePtr(); |
|
|
1112 |
return IDENT; |
| 1113 |
}; |
| 1114 |
|
| 1115 |
JSTokenType type = fillBuffer(identifierStart == currentSourcePtr()); |
| 1116 |
if (UNLIKELY(type & ErrorTokenFlag)) |
| 1117 |
return type; |
| 1118 |
|
| 1119 |
while (true) { |
| 1120 |
if (LIKELY(isSingleCharacterIdentPart(m_current))) { |
| 1121 |
shift(); |
| 1122 |
continue; |
| 1123 |
} |
| 1124 |
if (!U16_IS_SURROGATE(m_current) && m_current != '\\') |
| 1125 |
break; |
| 1126 |
|
| 1127 |
type = fillBuffer(); |
| 1128 |
if (UNLIKELY(type & ErrorTokenFlag)) |
| 1129 |
return type; |
| 1128 |
} |
1130 |
} |
| 1129 |
|
1131 |
|
| 1130 |
int identifierLength; |
|
|
| 1131 |
const Identifier* ident = nullptr; |
1132 |
const Identifier* ident = nullptr; |
| 1132 |
if (shouldCreateIdentifier) { |
1133 |
if (shouldCreateIdentifier) { |
| 1133 |
if (!bufferRequired) { |
1134 |
if (identifierStart != currentSourcePtr()) |
| 1134 |
identifierLength = currentSourcePtr() - identifierStart; |
1135 |
m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart); |
| 1135 |
ident = makeIdentifier(identifierStart, identifierLength); |
1136 |
ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); |
| 1136 |
} else { |
|
|
| 1137 |
if (identifierStart != currentSourcePtr()) |
| 1138 |
m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart); |
| 1139 |
ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); |
| 1140 |
} |
| 1141 |
|
1137 |
|
| 1142 |
tokenData->ident = ident; |
1138 |
tokenData->ident = ident; |
| 1143 |
} else |
1139 |
} else |
|
Lines 1152-1158
template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenTy
a/Source/JavaScriptCore/parser/Lexer.cpp_sec7
|
| 1152 |
return IDENT; |
1148 |
return IDENT; |
| 1153 |
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue()); |
1149 |
JSTokenType token = static_cast<JSTokenType>(entry->lexerValue()); |
| 1154 |
if ((token != RESERVED_IF_STRICT) || strictMode) |
1150 |
if ((token != RESERVED_IF_STRICT) || strictMode) |
| 1155 |
return bufferRequired ? UNEXPECTED_ESCAPE_ERRORTOK : token; |
1151 |
return UNEXPECTED_ESCAPE_ERRORTOK; |
| 1156 |
} |
1152 |
} |
| 1157 |
|
1153 |
|
| 1158 |
return IDENT; |
1154 |
return IDENT; |
|
Lines 1912-1923
start:
a/Source/JavaScriptCore/parser/Lexer.cpp_sec8
|
| 1912 |
CharacterType type; |
1908 |
CharacterType type; |
| 1913 |
if (LIKELY(isLatin1(m_current))) |
1909 |
if (LIKELY(isLatin1(m_current))) |
| 1914 |
type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]); |
1910 |
type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]); |
| 1915 |
else if (isNonLatin1IdentStart(m_current)) |
1911 |
else { |
| 1916 |
type = CharacterIdentifierStart; |
1912 |
UChar32 codePoint; |
| 1917 |
else if (isLineTerminator(m_current)) |
1913 |
U16_GET(m_code, 0, 0, m_codeEnd - m_code, codePoint); |
| 1918 |
type = CharacterLineTerminator; |
1914 |
if (isNonLatin1IdentStart(codePoint)) |
| 1919 |
else |
1915 |
type = CharacterIdentifierStart; |
| 1920 |
type = CharacterInvalid; |
1916 |
else if (isLineTerminator(m_current)) |
|
|
1917 |
type = CharacterLineTerminator; |
| 1918 |
else |
| 1919 |
type = CharacterInvalid; |
| 1920 |
} |
| 1921 |
|
1921 |
|
| 1922 |
switch (type) { |
1922 |
switch (type) { |
| 1923 |
case CharacterGreater: |
1923 |
case CharacterGreater: |
|
Lines 2231-2237
start:
a/Source/JavaScriptCore/parser/Lexer.cpp_sec9
|
| 2231 |
if (token == INTEGER) |
2231 |
if (token == INTEGER) |
| 2232 |
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue); |
2232 |
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue); |
| 2233 |
|
2233 |
|
| 2234 |
if (UNLIKELY(isIdentStart(m_current))) { |
2234 |
if (LIKELY(cannotBeIdentStart(m_current))) { |
|
|
2235 |
m_buffer8.shrink(0); |
| 2236 |
break; |
| 2237 |
} |
| 2238 |
|
| 2239 |
if (UNLIKELY(isIdentStart(currentCodePoint()))) { |
| 2235 |
m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s; |
2240 |
m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s; |
| 2236 |
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK; |
2241 |
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK; |
| 2237 |
goto returnError; |
2242 |
goto returnError; |
|
Lines 2262-2268
start:
a/Source/JavaScriptCore/parser/Lexer.cpp_sec10
|
| 2262 |
tokenData->radix = 16; |
2267 |
tokenData->radix = 16; |
| 2263 |
} |
2268 |
} |
| 2264 |
|
2269 |
|
| 2265 |
if (UNLIKELY(isIdentStart(m_current))) { |
2270 |
if (LIKELY(cannotBeIdentStart(m_current))) { |
|
|
2271 |
if (LIKELY(token != BIGINT)) |
| 2272 |
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue); |
| 2273 |
m_buffer8.shrink(0); |
| 2274 |
break; |
| 2275 |
} |
| 2276 |
|
| 2277 |
if (UNLIKELY(isIdentStart(currentCodePoint()))) { |
| 2266 |
m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s; |
2278 |
m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s; |
| 2267 |
token = UNTERMINATED_HEX_NUMBER_ERRORTOK; |
2279 |
token = UNTERMINATED_HEX_NUMBER_ERRORTOK; |
| 2268 |
goto returnError; |
2280 |
goto returnError; |
|
Lines 2294-2300
start:
a/Source/JavaScriptCore/parser/Lexer.cpp_sec11
|
| 2294 |
tokenData->radix = 2; |
2306 |
tokenData->radix = 2; |
| 2295 |
} |
2307 |
} |
| 2296 |
|
2308 |
|
| 2297 |
if (UNLIKELY(isIdentStart(m_current))) { |
2309 |
if (LIKELY(cannotBeIdentStart(m_current))) { |
|
|
2310 |
if (LIKELY(token != BIGINT)) |
| 2311 |
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue); |
| 2312 |
m_buffer8.shrink(0); |
| 2313 |
break; |
| 2314 |
} |
| 2315 |
|
| 2316 |
if (UNLIKELY(isIdentStart(currentCodePoint()))) { |
| 2298 |
m_lexErrorMessage = "No space between binary literal and identifier"_s; |
2317 |
m_lexErrorMessage = "No space between binary literal and identifier"_s; |
| 2299 |
token = UNTERMINATED_BINARY_NUMBER_ERRORTOK; |
2318 |
token = UNTERMINATED_BINARY_NUMBER_ERRORTOK; |
| 2300 |
goto returnError; |
2319 |
goto returnError; |
|
Lines 2327-2333
start:
a/Source/JavaScriptCore/parser/Lexer.cpp_sec12
|
| 2327 |
tokenData->radix = 8; |
2346 |
tokenData->radix = 8; |
| 2328 |
} |
2347 |
} |
| 2329 |
|
2348 |
|
| 2330 |
if (UNLIKELY(isIdentStart(m_current))) { |
2349 |
if (LIKELY(cannotBeIdentStart(m_current))) { |
|
|
2350 |
if (LIKELY(token != BIGINT)) |
| 2351 |
token = tokenTypeForIntegerLikeToken(tokenData->doubleValue); |
| 2352 |
m_buffer8.shrink(0); |
| 2353 |
break; |
| 2354 |
} |
| 2355 |
|
| 2356 |
if (UNLIKELY(isIdentStart(currentCodePoint()))) { |
| 2331 |
m_lexErrorMessage = "No space between octal literal and identifier"_s; |
2357 |
m_lexErrorMessage = "No space between octal literal and identifier"_s; |
| 2332 |
token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK; |
2358 |
token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK; |
| 2333 |
goto returnError; |
2359 |
goto returnError; |
|
Lines 2394-2400
start:
a/Source/JavaScriptCore/parser/Lexer.cpp_sec13
|
| 2394 |
} |
2420 |
} |
| 2395 |
} |
2421 |
} |
| 2396 |
|
2422 |
|
| 2397 |
if (UNLIKELY(isIdentStart(m_current))) { |
2423 |
if (LIKELY(cannotBeIdentStart(m_current))) { |
|
|
2424 |
m_buffer8.shrink(0); |
| 2425 |
break; |
| 2426 |
} |
| 2427 |
|
| 2428 |
if (UNLIKELY(isIdentStart(currentCodePoint()))) { |
| 2398 |
m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s; |
2429 |
m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s; |
| 2399 |
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK; |
2430 |
token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK; |
| 2400 |
goto returnError; |
2431 |
goto returnError; |
|
Lines 2416-2424
start:
a/Source/JavaScriptCore/parser/Lexer.cpp_sec14
|
| 2416 |
token = STRING; |
2447 |
token = STRING; |
| 2417 |
break; |
2448 |
break; |
| 2418 |
} |
2449 |
} |
| 2419 |
case CharacterIdentifierStart: |
2450 |
case CharacterIdentifierStart: { |
| 2420 |
ASSERT(isIdentStart(m_current)); |
2451 |
if constexpr (ASSERT_ENABLED) { |
|
|
2452 |
UChar32 codePoint; |
| 2453 |
U16_GET(m_code, 0, 0, m_codeEnd - m_code, codePoint); |
| 2454 |
ASSERT(isIdentStart(codePoint)); |
| 2455 |
} |
| 2421 |
FALLTHROUGH; |
2456 |
FALLTHROUGH; |
|
|
2457 |
} |
| 2422 |
case CharacterBackSlash: |
2458 |
case CharacterBackSlash: |
| 2423 |
parseIdent: |
2459 |
parseIdent: |
| 2424 |
if (lexerFlags.contains(LexerFlags::DontBuildKeywords)) |
2460 |
if (lexerFlags.contains(LexerFlags::DontBuildKeywords)) |
|
Lines 2578-2595
JSTokenType Lexer<T>::scanRegExp(JSToken* tokenRecord, UChar patternPrefix)
a/Source/JavaScriptCore/parser/Lexer.cpp_sec15
|
| 2578 |
} |
2614 |
} |
| 2579 |
|
2615 |
|
| 2580 |
tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether); |
2616 |
tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether); |
| 2581 |
|
|
|
| 2582 |
m_buffer16.shrink(0); |
2617 |
m_buffer16.shrink(0); |
| 2583 |
charactersOredTogether = 0; |
|
|
| 2584 |
|
2618 |
|
| 2585 |
while (isIdentPart(m_current)) { |
2619 |
ASSERT(m_buffer8.isEmpty()); |
| 2586 |
record16(m_current); |
2620 |
while (LIKELY(isLatin1(m_current)) && isIdentPart(static_cast<LChar>(m_current))) { |
| 2587 |
orCharacter<T>(charactersOredTogether, m_current); |
2621 |
record8(static_cast<LChar>(m_current)); |
| 2588 |
shift(); |
2622 |
shift(); |
| 2589 |
} |
2623 |
} |
| 2590 |
|
2624 |
|
| 2591 |
tokenData->flags = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether); |
2625 |
// FIXME: This should probably not be a lex error but dealing with surrogate pairs here is annoying and it's going to be an error anyway... |
| 2592 |
m_buffer16.shrink(0); |
2626 |
if (UNLIKELY(!isLatin1(m_current))) { |
|
|
2627 |
m_buffer8.shrink(0); |
| 2628 |
JSTokenType token = INVALID_IDENTIFIER_UNICODE_ERRORTOK; |
| 2629 |
fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition()); |
| 2630 |
m_error = true; |
| 2631 |
String codePoint = String::fromCodePoint(currentCodePoint()); |
| 2632 |
if (!codePoint) |
| 2633 |
codePoint = "`invalid unicode character`"; |
| 2634 |
m_lexErrorMessage = makeString("Invalid non-latin character in RexExp literal's flags '", getToken(*tokenRecord), codePoint, "'"); |
| 2635 |
return token; |
| 2636 |
} |
| 2637 |
|
| 2638 |
tokenData->flags = makeIdentifier(m_buffer8.data(), m_buffer8.size()); |
| 2639 |
m_buffer8.shrink(0); |
| 2593 |
|
2640 |
|
| 2594 |
// Since RegExp always ends with /, m_atLineStart always becomes false. |
2641 |
// Since RegExp always ends with /, m_atLineStart always becomes false. |
| 2595 |
m_atLineStart = false; |
2642 |
m_atLineStart = false; |