WebKit Bugzilla
Attachment 341881 Details for
Bug 181989
: [YARR] Extend size of fixed characters bulk matching in 64bit platform
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch
bug-181989-20180604100222.patch (text/plain), 19.47 KB, created by
Yusuke Suzuki
on 2018-06-03 18:02:23 PDT
(
hide
)
Description:
Patch
Filename:
MIME Type:
Creator:
Yusuke Suzuki
Created:
2018-06-03 18:02:23 PDT
Size:
19.47 KB
patch
obsolete
>Subversion Revision: 232449 >diff --git a/Source/JavaScriptCore/ChangeLog b/Source/JavaScriptCore/ChangeLog >index 637d7de2652e32a85652afbd8fa1bbb413d21149..0899c11dc14d8aa8d9f9727dd35fa1bb788a6467 100644 >--- a/Source/JavaScriptCore/ChangeLog >+++ b/Source/JavaScriptCore/ChangeLog >@@ -1,3 +1,20 @@ >+2018-06-03 Yusuke Suzuki <utatane.tea@gmail.com> >+ >+ [YARR] Extend size of fixed characters bulk matching in 64bit platform >+ https://bugs.webkit.org/show_bug.cgi?id=181989 >+ >+ Reviewed by NOBODY (OOPS!). >+ >+ This patch extends bulk matching style for fixed-sized characters. >+ In 64bit environment, the GPR can hold up to 8 characters. This change >+ reduces the code size since we can fuse multiple `mov` operations into one. >+ >+ * assembler/LinkBuffer.h: >+ * runtime/Options.h: >+ * yarr/YarrJIT.cpp: >+ (JSC::Yarr::YarrGenerator::generatePatternCharacterOnce): >+ (JSC::Yarr::YarrGenerator::compile): >+ > 2018-06-02 Caio Lima <ticaiolima@gmail.com> > > [ESNext][BigInt] Implement support for addition operations >diff --git a/Source/JavaScriptCore/assembler/LinkBuffer.h b/Source/JavaScriptCore/assembler/LinkBuffer.h >index 792a8c952261d640c17bfc2350ef8ecb42ee7143..4748b07092e473e3ee9f2d04caaf44355b248940 100644 >--- a/Source/JavaScriptCore/assembler/LinkBuffer.h >+++ b/Source/JavaScriptCore/assembler/LinkBuffer.h >@@ -382,6 +382,9 @@ bool shouldDumpDisassemblyFor(CodeBlock*); > #define FINALIZE_DFG_CODE(linkBufferReference, resultPtrTag, ...) \ > FINALIZE_CODE_IF((JSC::Options::asyncDisassembly() || JSC::Options::dumpDisassembly() || Options::dumpDFGDisassembly()), linkBufferReference, resultPtrTag, __VA_ARGS__) > >+#define FINALIZE_REGEXP_CODE(linkBufferReference, resultPtrTag, dataLogFArgumentsForHeading) \ >+ FINALIZE_CODE_IF(JSC::Options::asyncDisassembly() || JSC::Options::dumpDisassembly() || Options::dumpRegExpDisassembly(), linkBufferReference, resultPtrTag, dataLogFArgumentsForHeading) >+ > } // namespace JSC > > #endif // ENABLE(ASSEMBLER) >diff --git a/Source/JavaScriptCore/runtime/Options.h b/Source/JavaScriptCore/runtime/Options.h >index c7f192496e8768740ccc05b788cdc6df752cdd75..b490062aeee85687070e6696800d7a8b4ed59781 100644 >--- a/Source/JavaScriptCore/runtime/Options.h >+++ b/Source/JavaScriptCore/runtime/Options.h >@@ -177,6 +177,7 @@ constexpr bool enableWebAssemblyStreamingApi = false; > v(bool, asyncDisassembly, false, Normal, nullptr) \ > v(bool, dumpDFGDisassembly, false, Normal, "dumps disassembly of DFG function upon compilation") \ > v(bool, dumpFTLDisassembly, false, Normal, "dumps disassembly of FTL function upon compilation") \ >+ v(bool, dumpRegExpDisassembly, false, Normal, "dumps disassembly of RegExp upon compilation") \ > v(bool, dumpAllDFGNodes, false, Normal, nullptr) \ > v(optionRange, bytecodeRangeToJITCompile, 0, Normal, "bytecode size range to allow compilation on, e.g. 1:100") \ > v(optionRange, bytecodeRangeToDFGCompile, 0, Normal, "bytecode size range to allow DFG compilation on, e.g. 1:100") \ >diff --git a/Source/JavaScriptCore/yarr/YarrJIT.cpp b/Source/JavaScriptCore/yarr/YarrJIT.cpp >index 26fcd69d0b0cb225030c0ffb128b5b66cd7c444d..569030cd0e02c54eca1225a2d8ba5ca043b2bd4f 100644 >--- a/Source/JavaScriptCore/yarr/YarrJIT.cpp >+++ b/Source/JavaScriptCore/yarr/YarrJIT.cpp >@@ -1128,12 +1128,16 @@ class YarrGenerator : private MacroAssembler { > } > > const RegisterID character = regT0; >+#if CPU(X86_64) || CPU(ARM64) >+ unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4; >+#else > unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2; >- unsigned ignoreCaseMask = 0; >+#endif >+ uint64_t ignoreCaseMask = 0; > #if CPU(BIG_ENDIAN) >- int allCharacters = ch << (m_charSize == Char8 ? 24 : 16); >+ uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16); > #else >- int allCharacters = ch; >+ uint64_t allCharacters = ch; > #endif > unsigned numberCharacters; > unsigned startTermPosition = term->inputPosition; >@@ -1179,49 +1183,129 @@ class YarrGenerator : private MacroAssembler { > // upper & lower case representations are converted to a character class. > ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode)); > >- allCharacters |= (currentCharacter << shiftAmount); >+ allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount); > > if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter))) >- ignoreCaseMask |= 32 << shiftAmount; >+ ignoreCaseMask |= 32ULL << shiftAmount; > } > > if (m_charSize == Char8) { >+ auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) { >+ op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character)); >+ }; >+ >+ auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) { >+ load16Unaligned(negativeOffsetIndexedAddress(offset, character), character); >+ if (mask) >+ or32(Imm32(mask), character); >+ op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask))); >+ }; >+ >+ auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) { >+ if (mask) { >+ load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character); >+ if (mask) >+ or32(Imm32(mask), character); >+ op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask))); >+ return; >+ } >+ op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters))); >+ }; >+ >+#if CPU(X86_64) || CPU(ARM64) >+ auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) { >+ load64(negativeOffsetIndexedAddress(offset, character), character); >+ if (mask) >+ or64(TrustedImm64(mask), character); >+ op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask))); >+ }; >+#endif >+ > switch (numberCharacters) { > case 1: >- op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - startTermPosition, character)); >+ // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag. >+ check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff); > return; > case 2: { >- load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character); >- break; >+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff); >+ return; > } > case 3: { >- load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character); >- if (ignoreCaseMask) >- or32(Imm32(ignoreCaseMask), character); >- op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask))); >- op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, m_checkedOffset - startTermPosition - 2, character)); >+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff); >+ check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff); > return; > } > case 4: { >- load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- startTermPosition, character), character); >- break; >+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); >+ return; >+ } >+#if CPU(X86_64) || CPU(ARM64) >+ case 5: { >+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); >+ check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff); >+ return; >+ } >+ case 6: { >+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); >+ check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff); >+ return; > } >+ case 7: { >+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); >+ check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff); >+ check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff); >+ return; >+ } >+ case 8: { >+ check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask); >+ return; >+ } >+#endif > } > } else { >+ auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) { >+ op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character)); >+ }; >+ >+ auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) { >+ if (mask) { >+ load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character); >+ if (mask) >+ or32(Imm32(mask), character); >+ op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask))); >+ return; >+ } >+ op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters))); >+ }; >+ >+#if CPU(X86_64) || CPU(ARM64) >+ auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) { >+ load64(negativeOffsetIndexedAddress(offset, character), character); >+ if (mask) >+ or64(TrustedImm64(mask), character); >+ op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask))); >+ }; >+#endif >+ > switch (numberCharacters) { > case 1: >- op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character)); >+ // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag. >+ check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff); > return; > case 2: >- load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- term->inputPosition, character), character); >- break; >+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); >+ return; >+#if CPU(X86_64) || CPU(ARM64) >+ case 3: >+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff); >+ check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff); >+ return; >+ case 4: >+ check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask); >+ return; >+#endif > } > } >- >- if (ignoreCaseMask) >- or32(Imm32(ignoreCaseMask), character); >- op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask))); >- return; > } > void backtrackPatternCharacterOnce(size_t opIndex) > { >@@ -3522,14 +3606,14 @@ class YarrGenerator : private MacroAssembler { > > if (compileMode == MatchOnly) { > if (m_charSize == Char8) >- codeBlock.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, YarrMatchOnly8BitPtrTag, "Match-only 8-bit regular expression")); >+ codeBlock.set8BitCodeMatchOnly(FINALIZE_REGEXP_CODE(linkBuffer, YarrMatchOnly8BitPtrTag, "Match-only 8-bit regular expression")); > else >- codeBlock.set16BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, YarrMatchOnly16BitPtrTag, "Match-only 16-bit regular expression")); >+ codeBlock.set16BitCodeMatchOnly(FINALIZE_REGEXP_CODE(linkBuffer, YarrMatchOnly16BitPtrTag, "Match-only 16-bit regular expression")); > } else { > if (m_charSize == Char8) >- codeBlock.set8BitCode(FINALIZE_CODE(linkBuffer, Yarr8BitPtrTag, "8-bit regular expression")); >+ codeBlock.set8BitCode(FINALIZE_REGEXP_CODE(linkBuffer, Yarr8BitPtrTag, "8-bit regular expression")); > else >- codeBlock.set16BitCode(FINALIZE_CODE(linkBuffer, Yarr16BitPtrTag, "16-bit regular expression")); >+ codeBlock.set16BitCode(FINALIZE_REGEXP_CODE(linkBuffer, Yarr16BitPtrTag, "16-bit regular expression")); > } > if (m_failureReason) > codeBlock.setFallBackWithFailureReason(*m_failureReason); >diff --git a/JSTests/ChangeLog b/JSTests/ChangeLog >index f4d2b34ab9c030827a5a15b1e5dd988b90d657ba..bd28bed76ab60d4a57b46e7a1aac114f0c90058d 100644 >--- a/JSTests/ChangeLog >+++ b/JSTests/ChangeLog >@@ -1,3 +1,35 @@ >+2018-06-03 Yusuke Suzuki <utatane.tea@gmail.com> >+ >+ [YARR] Extend size of fixed characters bulk matching in 64bit platform >+ https://bugs.webkit.org/show_bug.cgi?id=181989 >+ >+ Reviewed by NOBODY (OOPS!). >+ >+ * stress/characters-regexp-ignore-case.js: Added. >+ (shouldBe): >+ (testH): >+ (testHe): >+ (testHel): >+ (testHell): >+ (testHello): >+ (testHelloW): >+ (testHelloWo): >+ (testHelloWor): >+ (testHelloWorl): >+ (testHelloWorld): >+ * stress/characters-regexp.js: Added. >+ (shouldBe): >+ (testH): >+ (testHe): >+ (testHel): >+ (testHell): >+ (testHello): >+ (testHelloW): >+ (testHelloWo): >+ (testHelloWor): >+ (testHelloWorl): >+ (testHelloWorld): >+ > 2018-06-02 Caio Lima <ticaiolima@gmail.com> > > [ESNext][BigInt] Implement support for addition operations >diff --git a/JSTests/stress/characters-regexp-ignore-case.js b/JSTests/stress/characters-regexp-ignore-case.js >new file mode 100644 >index 0000000000000000000000000000000000000000..a587f95871b6d91b7acb787d8d8db20e9b6bf050 >--- /dev/null >+++ b/JSTests/stress/characters-regexp-ignore-case.js >@@ -0,0 +1,77 @@ >+function shouldBe(actual, expected) { >+ if (actual !== expected) >+ throw new Error('bad value: ' + actual); >+} >+ >+function testH(string) { >+ return string.match(/h/i); >+} >+noInline(testH); >+ >+function testHe(string) { >+ return string.match(/he/i); >+} >+noInline(testHe); >+ >+function testHel(string) { >+ return string.match(/hel/i); >+} >+noInline(testHel); >+ >+function testHell(string) { >+ return string.match(/hell/i); >+} >+noInline(testHell); >+ >+function testHello(string) { >+ return string.match(/hello/i); >+} >+noInline(testHello); >+ >+function testHelloW(string) { >+ return string.match(/hellow/i); >+} >+noInline(testHelloW); >+ >+function testHelloWo(string) { >+ return string.match(/hellowo/i); >+} >+noInline(testHelloWo); >+ >+function testHelloWor(string) { >+ return string.match(/hellowor/i); >+} >+noInline(testHelloWor); >+ >+function testHelloWorl(string) { >+ return string.match(/helloworl/i); >+} >+noInline(testHelloWorl); >+ >+function testHelloWorld(string) { >+ return string.match(/helloworld/i); >+} >+noInline(testHelloWorld); >+ >+for (var i = 0; i < 1e4; ++i) { >+ shouldBe(testH("HelloWorld")[0], `H`); >+ shouldBe(testHe("HelloWorld")[0], `He`); >+ shouldBe(testHel("HelloWorld")[0], `Hel`); >+ shouldBe(testHell("HelloWorld")[0], `Hell`); >+ shouldBe(testHello("HelloWorld")[0], `Hello`); >+ shouldBe(testHelloW("HelloWorld")[0], `HelloW`); >+ shouldBe(testHelloWo("HelloWorld")[0], `HelloWo`); >+ shouldBe(testHelloWor("HelloWorld")[0], `HelloWor`); >+ shouldBe(testHelloWorl("HelloWorld")[0], `HelloWorl`); >+ shouldBe(testHelloWorld("HelloWorld")[0], `HelloWorld`); >+ shouldBe(testH("HelloWorldããã«ã¡ã¯")[0], `H`); >+ shouldBe(testHe("HelloWorldããã«ã¡ã¯")[0], `He`); >+ shouldBe(testHel("HelloWorldããã«ã¡ã¯")[0], `Hel`); >+ shouldBe(testHell("HelloWorldããã«ã¡ã¯")[0], `Hell`); >+ shouldBe(testHello("HelloWorldããã«ã¡ã¯")[0], `Hello`); >+ shouldBe(testHelloW("HelloWorldããã«ã¡ã¯")[0], `HelloW`); >+ shouldBe(testHelloWo("HelloWorldããã«ã¡ã¯")[0], `HelloWo`); >+ shouldBe(testHelloWor("HelloWorldããã«ã¡ã¯")[0], `HelloWor`); >+ shouldBe(testHelloWorl("HelloWorldããã«ã¡ã¯")[0], `HelloWorl`); >+ shouldBe(testHelloWorld("HelloWorldããã«ã¡ã¯")[0], `HelloWorld`); >+} >diff --git a/JSTests/stress/characters-regexp.js b/JSTests/stress/characters-regexp.js >new file mode 100644 >index 0000000000000000000000000000000000000000..16b060e3b5063a95c23d4d1f669f6e6382643df7 >--- /dev/null >+++ b/JSTests/stress/characters-regexp.js >@@ -0,0 +1,77 @@ >+function shouldBe(actual, expected) { >+ if (actual !== expected) >+ throw new Error('bad value: ' + actual); >+} >+ >+function testH(string) { >+ return string.match(/H/); >+} >+noInline(testH); >+ >+function testHe(string) { >+ return string.match(/He/); >+} >+noInline(testHe); >+ >+function testHel(string) { >+ return string.match(/Hel/); >+} >+noInline(testHel); >+ >+function testHell(string) { >+ return string.match(/Hell/); >+} >+noInline(testHell); >+ >+function testHello(string) { >+ return string.match(/Hello/); >+} >+noInline(testHello); >+ >+function testHelloW(string) { >+ return string.match(/HelloW/); >+} >+noInline(testHelloW); >+ >+function testHelloWo(string) { >+ return string.match(/HelloWo/); >+} >+noInline(testHelloWo); >+ >+function testHelloWor(string) { >+ return string.match(/HelloWor/); >+} >+noInline(testHelloWor); >+ >+function testHelloWorl(string) { >+ return string.match(/HelloWorl/); >+} >+noInline(testHelloWorl); >+ >+function testHelloWorld(string) { >+ return string.match(/HelloWorld/); >+} >+noInline(testHelloWorld); >+ >+for (var i = 0; i < 1e4; ++i) { >+ shouldBe(testH("HelloWorld")[0], `H`); >+ shouldBe(testHe("HelloWorld")[0], `He`); >+ shouldBe(testHel("HelloWorld")[0], `Hel`); >+ shouldBe(testHell("HelloWorld")[0], `Hell`); >+ shouldBe(testHello("HelloWorld")[0], `Hello`); >+ shouldBe(testHelloW("HelloWorld")[0], `HelloW`); >+ shouldBe(testHelloWo("HelloWorld")[0], `HelloWo`); >+ shouldBe(testHelloWor("HelloWorld")[0], `HelloWor`); >+ shouldBe(testHelloWorl("HelloWorld")[0], `HelloWorl`); >+ shouldBe(testHelloWorld("HelloWorld")[0], `HelloWorld`); >+ shouldBe(testH("HelloWorldããã«ã¡ã¯")[0], `H`); >+ shouldBe(testHe("HelloWorldããã«ã¡ã¯")[0], `He`); >+ shouldBe(testHel("HelloWorldããã«ã¡ã¯")[0], `Hel`); >+ shouldBe(testHell("HelloWorldããã«ã¡ã¯")[0], `Hell`); >+ shouldBe(testHello("HelloWorldããã«ã¡ã¯")[0], `Hello`); >+ shouldBe(testHelloW("HelloWorldããã«ã¡ã¯")[0], `HelloW`); >+ shouldBe(testHelloWo("HelloWorldããã«ã¡ã¯")[0], `HelloWo`); >+ shouldBe(testHelloWor("HelloWorldããã«ã¡ã¯")[0], `HelloWor`); >+ shouldBe(testHelloWorl("HelloWorldããã«ã¡ã¯")[0], `HelloWorl`); >+ shouldBe(testHelloWorld("HelloWorldããã«ã¡ã¯")[0], `HelloWorld`); >+}
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Formatted Diff
|
Diff
Attachments on
bug 181989
:
332036
|
332046
|
332047
|
332050
|
332083
|
332505
|
332506
|
332507
|
341881
|
341883