Attachment 289952 Details for Bug 162593 – Patch

[patch] Patch

bug-162593-20160927092036.patch (text/plain), 73.43 KB, created by Alex Christensen on 2016-09-27 09:23:32 PDT

(hide)

Description:

Filename:

MIME Type:

Creator: Alex Christensen

Created: 2016-09-27 09:23:32 PDT

Size: 73.43 KB

patch

obsolete

>Index: Source/WebCore/ChangeLog
>===================================================================
>--- Source/WebCore/ChangeLog	(revision 206417)
>+++ Source/WebCore/ChangeLog	(working copy)
>@@ -1,3 +1,63 @@
>+2016-09-26  Alex Christensen  <achristensen@webkit.org>
>+
>+        Implement URLParser::syntaxViolation
>+        https://bugs.webkit.org/show_bug.cgi?id=162593
>+
>+        Reviewed by NOBODY (OOPS!).
>+
>+        Most of the time when parsing URLs, we just look at the URL, find offsets of the host, path, query, etc., 
>+        and the String can be used untouched.  When this happens, we do not want to allocate and copy the String.
>+        We want to just add a reference to an existing String.
>+
>+        Sometimes we need to canonicalize the String because there has been a syntaxViolation,
>+        defined as any String that is different than its canonicalized URL String.  In such cases we need to
>+        allocate a new String and fill it with the canonicalized URL String.  When a syntaxViolation happens for the
>+        first time, we assume that everything in the input String up to that point is equal to what it would have been
>+        if we had canonicalized the beginning of the URL, copy it into a buffer, and continue parsing in a mode where
>+        instead of just looking at the input URL String, we canonicalize each code point into the buffer.
>+
>+        Changes to behavior involve additional spec compliance with tabs and newlines in different places in URLs,
>+        as well as additional spec compliance when parsing empty and null URLs relative to other URLs.
>+        Both are covered by new API tests. Existing behavior covered by existing API tests.
>+
>+        This is about a 15% speed improvement on my URL parsing benchmark.
>+
>+        * platform/URL.cpp:
>+        (WebCore::assertProtocolIsGood):
>+        (WebCore::URL::protocolIs):
>+        (WebCore::protocolIs):
>+        * platform/URL.h:
>+        * platform/URLParser.cpp:
>+        (WebCore::isTabOrNewline):
>+        (WebCore::URLParser::incrementIteratorSkippingTabsAndNewlines):
>+        (WebCore::URLParser::isWindowsDriveLetter):
>+        (WebCore::URLParser::appendToASCIIBuffer):
>+        (WebCore::URLParser::checkWindowsDriveLetter):
>+        (WebCore::URLParser::shouldCopyFileURL):
>+        (WebCore::URLParser::utf8PercentEncode):
>+        (WebCore::URLParser::utf8QueryEncode):
>+        (WebCore::URLParser::copyURLPartsUntil):
>+        (WebCore::URLParser::syntaxViolation):
>+        (WebCore::URLParser::fragmentSyntaxViolation):
>+        (WebCore::URLParser::parsedDataView):
>+        (WebCore::URLParser::currentPosition):
>+        (WebCore::URLParser::URLParser):
>+        (WebCore::URLParser::parse):
>+        (WebCore::URLParser::parseAuthority):
>+        (WebCore::URLParser::parseIPv4Number):
>+        (WebCore::URLParser::parseIPv4Host):
>+        (WebCore::URLParser::parseIPv6Host):
>+        (WebCore::URLParser::parsePort):
>+        (WebCore::URLParser::parseHostAndPort):
>+        (WebCore::serializeURLEncodedForm):
>+        (WebCore::URLParser::allValuesEqual):
>+        (WebCore::URLParser::internalValuesConsistent):
>+        (WebCore::URLParser::incrementIteratorSkippingTabAndNewLine): Deleted.
>+        (WebCore::URLParser::syntaxError): Deleted.
>+        (WebCore::parseIPv4Number): Deleted.
>+        * platform/URLParser.h:
>+        (WebCore::URLParser::incrementIteratorSkippingTabsAndNewlines):
>+
> 2016-09-26  Wenson Hsieh  <wenson_hsieh@apple.com>
> 
>         If you play a youtube video from now playing after it finished in Safari, controls disappear
>Index: Source/WebCore/platform/URL.cpp
>===================================================================
>--- Source/WebCore/platform/URL.cpp	(revision 206412)
>+++ Source/WebCore/platform/URL.cpp	(working copy)
>@@ -788,15 +788,15 @@ String URL::fileSystemPath() const
> 
> #ifdef NDEBUG
> 
>-static inline void assertProtocolIsGood(const char*, size_t)
>+static inline void assertProtocolIsGood(StringView)
> {
> }
> 
> #else
> 
>-static void assertProtocolIsGood(const char* protocol, size_t length)
>+static void assertProtocolIsGood(StringView protocol)
> {
>-    for (size_t i = 0; i < length; ++i) {
>+    for (size_t i = 0; i < protocol.length(); ++i) {
>         const char c = protocol[i];
>         ASSERT(c > ' ' && c < 0x7F && !(c >= 'A' && c <= 'Z'));
>     }
>@@ -806,7 +806,7 @@ static void assertProtocolIsGood(const c
> 
> bool URL::protocolIs(const char* protocol) const
> {
>-    assertProtocolIsGood(protocol, strlen(protocol));
>+    assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
> 
>     // JavaScript URLs are "valid" and should be executed even if URL decides they are invalid.
>     // The free function protocolIsJavaScript() should be used instead. 
>@@ -823,14 +823,14 @@ bool URL::protocolIs(const char* protoco
>     return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument.
> }
> 
>-bool URL::protocolIs(const LChar* protocol, size_t length) const
>+bool URL::protocolIs(StringView protocol) const
> {
>-    assertProtocolIsGood(reinterpret_cast<const char*>(protocol), length);
>+    assertProtocolIsGood(protocol);
> 
>     if (!m_isValid)
>         return false;
>     
>-    if (m_schemeEnd != length)
>+    if (m_schemeEnd != protocol.length())
>         return false;
> 
>     // Do the comparison without making a new string object.
>@@ -1914,7 +1914,7 @@ String encodeWithURLEscapeSequences(cons
> 
> static bool protocolIs(StringView stringURL, const char* protocol)
> {
>-    assertProtocolIsGood(protocol, strlen(protocol));
>+    assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
>     unsigned length = stringURL.length();
>     for (unsigned i = 0; i < length; ++i) {
>         if (!protocol[i])
>@@ -2141,7 +2141,7 @@ void URL::copyToBuffer(Vector<char, 512>
> bool protocolIs(const String& url, const char* protocol)
> {
>     // Do the comparison without making a new string object.
>-    assertProtocolIsGood(protocol, strlen(protocol));
>+    assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
>     bool isLeading = true;
>     for (unsigned i = 0, j = 0; url[i]; ++i) {
>         // skip leading whitespace and control characters.
>Index: Source/WebCore/platform/URL.h
>===================================================================
>--- Source/WebCore/platform/URL.h	(revision 206412)
>+++ Source/WebCore/platform/URL.h	(working copy)
>@@ -129,7 +129,7 @@ public:
>     // Returns true if the current URL's protocol is the same as the null-
>     // terminated ASCII argument. The argument must be lower-case.
>     WEBCORE_EXPORT bool protocolIs(const char*) const;
>-    bool protocolIs(const LChar*, size_t) const;
>+    bool protocolIs(StringView) const;
>     bool protocolIsBlob() const { return protocolIs("blob"); }
>     bool protocolIsData() const { return protocolIs("data"); }
>     bool protocolIsInHTTPFamily() const;
>Index: Source/WebCore/platform/URLParser.cpp
>===================================================================
>--- Source/WebCore/platform/URLParser.cpp	(revision 206412)
>+++ Source/WebCore/platform/URLParser.cpp	(working copy)
>@@ -410,11 +410,11 @@ template<typename CharacterType> inline 
> static bool shouldPercentEncodeQueryByte(uint8_t byte) { return characterClassTable[byte] & QueryPercent; }
> 
> template<typename CharacterType>
>-void URLParser::incrementIteratorSkippingTabAndNewLine(CodePointIterator<CharacterType>& iterator)
>+void URLParser::incrementIteratorSkippingTabsAndNewlines(CodePointIterator<CharacterType>& iterator, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition)
> {
>     ++iterator;
>-    while (!iterator.atEnd() && isTabOrNewline(*iterator)) {
>-        syntaxError(iterator);
>+    while (UNLIKELY(!iterator.atEnd() && isTabOrNewline(*iterator))) {
>+        syntaxViolation(iteratorForSyntaxViolationPosition);
>         ++iterator;
>     }
> }
>@@ -424,13 +424,13 @@ bool URLParser::isWindowsDriveLetter(Cod
> {
>     if (iterator.atEnd() || !isASCIIAlpha(*iterator))
>         return false;
>-    incrementIteratorSkippingTabAndNewLine(iterator);
>+    incrementIteratorSkippingTabsAndNewlines(iterator);
>     if (iterator.atEnd())
>         return false;
>     if (*iterator == ':')
>         return true;
>-    if (*iterator == '|') {
>-        syntaxError(iterator);
>+    if (UNLIKELY(*iterator == '|')) {
>+        syntaxViolation(iterator);
>         return true;
>     }
>     return false;
>@@ -447,14 +447,14 @@ void URLParser::appendToASCIIBuffer(UCha
> {
>     ASSERT(m_unicodeFragmentBuffer.isEmpty());
>     ASSERT(isASCII(codePoint));
>-    if (m_seenSyntaxError)
>+    if (UNLIKELY(m_seenSyntaxViolation))
>         m_asciiBuffer.append(codePoint);
> }
> 
> void URLParser::appendToASCIIBuffer(const char* characters, size_t length)
> {
>     ASSERT(m_unicodeFragmentBuffer.isEmpty());
>-    if (m_seenSyntaxError)
>+    if (UNLIKELY(m_seenSyntaxViolation))
>         m_asciiBuffer.append(characters, length);
> }
> 
>@@ -463,11 +463,11 @@ void URLParser::checkWindowsDriveLetter(
> {
>     if (isWindowsDriveLetter(iterator)) {
>         appendToASCIIBuffer(*iterator);
>-        incrementIteratorSkippingTabAndNewLine(iterator);
>+        incrementIteratorSkippingTabsAndNewlines(iterator);
>         ASSERT(!iterator.atEnd());
>         ASSERT(*iterator == ':' || *iterator == '|');
>         appendToASCIIBuffer(':');
>-        incrementIteratorSkippingTabAndNewLine(iterator);
>+        incrementIteratorSkippingTabsAndNewlines(iterator);
>     }
> }
> 
>@@ -478,10 +478,10 @@ bool URLParser::shouldCopyFileURL(CodePo
>         return true;
>     if (iterator.atEnd())
>         return false;
>-    incrementIteratorSkippingTabAndNewLine(iterator);
>+    incrementIteratorSkippingTabsAndNewlines(iterator);
>     if (iterator.atEnd())
>         return true;
>-    incrementIteratorSkippingTabAndNewLine(iterator);
>+    incrementIteratorSkippingTabsAndNewlines(iterator);
>     if (iterator.atEnd())
>         return true;
>     return !isSlashQuestionOrHash(*iterator);
>@@ -504,17 +504,21 @@ void URLParser::percentEncodeByte(uint8_
> const char replacementCharacterUTF8PercentEncoded[10] = "%EF%BF%BD";
> const size_t replacementCharacterUTF8PercentEncodedLength = sizeof(replacementCharacterUTF8PercentEncoded) - 1;
> 
>-template<bool(*isInCodeSet)(UChar32)>
>-void URLParser::utf8PercentEncode(UChar32 codePoint)
>+template<bool(*isInCodeSet)(UChar32), typename CharacterType>
>+void URLParser::utf8PercentEncode(const CodePointIterator<CharacterType>& iterator)
> {
>-    if (isASCII(codePoint)) {
>-        if (isInCodeSet(codePoint))
>+    ASSERT(!iterator.atEnd());
>+    UChar32 codePoint = *iterator;
>+    if (LIKELY(isASCII(codePoint))) {
>+        if (UNLIKELY(isInCodeSet(codePoint))) {
>+            syntaxViolation(iterator);
>             percentEncodeByte(codePoint);
>-        else
>+        } else
>             appendToASCIIBuffer(codePoint);
>         return;
>     }
>     ASSERT_WITH_MESSAGE(isInCodeSet(codePoint), "isInCodeSet should always return true for non-ASCII characters");
>+    syntaxViolation(iterator);
>     
>     if (!U_IS_UNICODE_CHAR(codePoint)) {
>         appendToASCIIBuffer(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
>@@ -528,17 +532,22 @@ void URLParser::utf8PercentEncode(UChar3
>         percentEncodeByte(buffer[i]);
> }
> 
>-
>-void URLParser::utf8QueryEncode(UChar32 codePoint)
>+template<typename CharacterType>
>+void URLParser::utf8QueryEncode(const CodePointIterator<CharacterType>& iterator)
> {
>-    if (isASCII(codePoint)) {
>-        if (shouldPercentEncodeQueryByte(codePoint))
>+    ASSERT(!iterator.atEnd());
>+    UChar32 codePoint = *iterator;
>+    if (LIKELY(isASCII(codePoint))) {
>+        if (UNLIKELY(shouldPercentEncodeQueryByte(codePoint))) {
>+            syntaxViolation(iterator);
>             percentEncodeByte(codePoint);
>-        else
>+        } else
>             appendToASCIIBuffer(codePoint);
>         return;
>     }
>     
>+    syntaxViolation(iterator);
>+    
>     if (!U_IS_UNICODE_CHAR(codePoint)) {
>         appendToASCIIBuffer(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
>         return;
>@@ -748,8 +757,11 @@ void URLParser::copyASCIIStringUntil(con
>     }
> }
> 
>-void URLParser::copyURLPartsUntil(const URL& base, URLPart part)
>+template<typename CharacterType>
>+void URLParser::copyURLPartsUntil(const URL& base, URLPart part, const CodePointIterator<CharacterType>& iterator)
> {
>+    syntaxViolation(iterator);
>+
>     m_asciiBuffer.clear();
>     m_unicodeFragmentBuffer.clear();
>     if (part == URLPart::FragmentEnd) {
>@@ -933,9 +945,45 @@ void URLParser::popPath()
> }
> 
> template<typename CharacterType>
>-void URLParser::syntaxError(const CodePointIterator<CharacterType>&)
>+void URLParser::syntaxViolation(const CodePointIterator<CharacterType>& iterator)
>+{
>+    if (m_seenSyntaxViolation)
>+        return;
>+    m_seenSyntaxViolation = true;
>+    
>+    ASSERT(m_asciiBuffer.isEmpty());
>+    ASSERT(m_unicodeFragmentBuffer.isEmpty());
>+    ASSERT_WITH_MESSAGE(!m_url.m_queryEnd, "syntaxViolation should not be used in the fragment, which might contain non-ASCII code points when serialized");
>+    size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
>+    RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());
>+    m_asciiBuffer.reserveCapacity(m_inputString.length());
>+    for (size_t i = 0; i < codeUnitsToCopy; ++i) {
>+        ASSERT(isASCII(m_inputString[i]));
>+        m_asciiBuffer.uncheckedAppend(m_inputString[i]);
>+    }
>+}
>+
>+template<typename CharacterType>
>+void URLParser::fragmentSyntaxViolation(const CodePointIterator<CharacterType>& iterator)
> {
>-    // FIXME: Implement.
>+    if (m_seenSyntaxViolation)
>+        return;
>+    m_seenSyntaxViolation = true;
>+
>+    ASSERT(m_asciiBuffer.isEmpty());
>+    ASSERT(m_unicodeFragmentBuffer.isEmpty());
>+    size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
>+    size_t asciiCodeUnitsToCopy = m_url.m_queryEnd;
>+    size_t unicodeCodeUnitsToCopy = codeUnitsToCopy - asciiCodeUnitsToCopy;
>+    RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());
>+    m_asciiBuffer.reserveCapacity(asciiCodeUnitsToCopy);
>+    for (size_t i = 0; i < asciiCodeUnitsToCopy; ++i) {
>+        ASSERT(isASCII(m_inputString[i]));
>+        m_asciiBuffer.uncheckedAppend(m_inputString[i]);
>+    }
>+    m_unicodeFragmentBuffer.reserveCapacity(m_inputString.length() - asciiCodeUnitsToCopy);
>+    for (size_t i = 0; i < unicodeCodeUnitsToCopy; ++i)
>+        m_unicodeFragmentBuffer.uncheckedAppend(m_inputString[i + asciiCodeUnitsToCopy]);
> }
> 
> void URLParser::failure()
>@@ -944,11 +992,23 @@ void URLParser::failure()
>     m_url.m_string = m_inputString;
> }
> 
>+StringView URLParser::parsedDataView(size_t start, size_t length)
>+{
>+    if (UNLIKELY(m_seenSyntaxViolation)) {
>+        ASSERT(start + length <= m_asciiBuffer.size());
>+        return StringView(m_asciiBuffer.data() + start, length);
>+    }
>+    ASSERT(start + length <= m_inputString.length());
>+    return StringView(m_inputString).substring(start, length);
>+}
>+
> template<typename CharacterType>
> size_t URLParser::currentPosition(const CodePointIterator<CharacterType>& iterator)
> {
>-    if (m_seenSyntaxError)
>+    if (UNLIKELY(m_seenSyntaxViolation)) {
>+        ASSERT(m_unicodeFragmentBuffer.isEmpty());
>         return m_asciiBuffer.size();
>+    }
>     
>     return iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
> }
>@@ -956,8 +1016,11 @@ size_t URLParser::currentPosition(const 
> URLParser::URLParser(const String& input, const URL& base, const TextEncoding& encoding)
>     : m_inputString(input)
> {
>-    if (input.isNull())
>+    if (input.isNull()) {
>+        if (base.isValid() && !base.m_cannotBeABaseURL)
>+            m_url = base;
>         return;
>+    }
> 
>     if (input.is8Bit()) {
>         m_inputBegin = input.characters8();
>@@ -966,6 +1029,9 @@ URLParser::URLParser(const String& input
>         m_inputBegin = input.characters16();
>         parse(input.characters16(), input.length(), base, encoding);
>     }
>+    ASSERT(!m_url.m_isValid
>+        || m_seenSyntaxViolation == (m_url.string() != input)
>+        || (input.isEmpty() && m_url.m_string == base.m_string));
> }
> 
> template<typename CharacterType>
>@@ -975,18 +1041,21 @@ void URLParser::parse(const CharacterTyp
>     m_url = { };
>     ASSERT(m_asciiBuffer.isEmpty());
>     ASSERT(m_unicodeFragmentBuffer.isEmpty());
>-    m_asciiBuffer.reserveInitialCapacity(length);
>     
>     bool isUTF8Encoding = encoding == UTF8Encoding();
>     Vector<UChar> queryBuffer;
> 
>     unsigned endIndex = length;
>-    while (endIndex && isC0ControlOrSpace(input[endIndex - 1]))
>+    while (UNLIKELY(endIndex && isC0ControlOrSpace(input[endIndex - 1]))) {
>+        syntaxViolation(CodePointIterator<CharacterType>(input, input));
>         endIndex--;
>+    }
>     CodePointIterator<CharacterType> c(input, input + endIndex);
>     CodePointIterator<CharacterType> authorityOrHostBegin;
>-    while (!c.atEnd() && isC0ControlOrSpace(*c))
>+    while (UNLIKELY(!c.atEnd() && isC0ControlOrSpace(*c))) {
>+        syntaxViolation(c);
>         ++c;
>+    }
>     auto beginAfterControlAndSpace = c;
> 
>     enum class State : uint8_t {
>@@ -1011,13 +1080,13 @@ void URLParser::parse(const CharacterTyp
>         Fragment,
>     };
> 
>-#define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, asciiBuffer size %zu", x, *c, currentPosition(c))
>+#define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, parsed data <%s> size %zu", x, *c, parsedDataView(0, currentPosition(c)).utf8().data(), currentPosition(c))
> #define LOG_FINAL_STATE(x) LOG(URLParser, "Final State: %s", x)
> 
>     State state = State::SchemeStart;
>     while (!c.atEnd()) {
>-        if (isTabOrNewline(*c)) {
>-            syntaxError(c);
>+        if (UNLIKELY(isTabOrNewline(*c))) {
>+            syntaxViolation(c);
>             ++c;
>             continue;
>         }
>@@ -1026,8 +1095,10 @@ void URLParser::parse(const CharacterTyp
>         case State::SchemeStart:
>             LOG_STATE("SchemeStart");
>             if (isASCIIAlpha(*c)) {
>+                if (UNLIKELY(isASCIIUpper(*c)))
>+                    syntaxViolation(c);
>                 appendToASCIIBuffer(toASCIILower(*c));
>-                incrementIteratorSkippingTabAndNewLine(c);
>+                incrementIteratorSkippingTabsAndNewlines(c);
>                 if (c.atEnd()) {
>                     m_asciiBuffer.clear();
>                     state = State::NoScheme;
>@@ -1039,36 +1110,40 @@ void URLParser::parse(const CharacterTyp
>             break;
>         case State::Scheme:
>             LOG_STATE("Scheme");
>-            if (isValidSchemeCharacter(*c))
>+            if (isValidSchemeCharacter(*c)) {
>+                if (UNLIKELY(isASCIIUpper(*c)))
>+                    syntaxViolation(c);
>                 appendToASCIIBuffer(toASCIILower(*c));
>-            else if (*c == ':') {
>+            } else if (*c == ':') {
>                 m_url.m_schemeEnd = currentPosition(c);
>-                StringView urlScheme = StringView(m_asciiBuffer.data(), m_url.m_schemeEnd);
>+                StringView urlScheme = parsedDataView(0, m_url.m_schemeEnd);
>                 m_url.m_protocolIsInHTTPFamily = urlScheme == "http" || urlScheme == "https";
>+                appendToASCIIBuffer(':');
>                 if (urlScheme == "file") {
>                     m_urlIsSpecial = true;
>                     state = State::File;
>-                    appendToASCIIBuffer(':');
>                     ++c;
>                     break;
>                 }
>-                appendToASCIIBuffer(':');
>                 if (isSpecialScheme(urlScheme)) {
>                     m_urlIsSpecial = true;
>-                    if (base.protocolIs(m_asciiBuffer.data(), currentPosition(c) - 1))
>+                    if (base.protocolIs(urlScheme))
>                         state = State::SpecialRelativeOrAuthority;
>                     else
>                         state = State::SpecialAuthoritySlashes;
>+                    ++c;
>                 } else {
>                     auto maybeSlash = c;
>-                    incrementIteratorSkippingTabAndNewLine(maybeSlash);
>+                    incrementIteratorSkippingTabsAndNewlines(maybeSlash);
>                     if (!maybeSlash.atEnd() && *maybeSlash == '/') {
>                         appendToASCIIBuffer('/');
>-                        m_url.m_userStart = currentPosition(c);
>-                        state = State::PathOrAuthority;
>                         c = maybeSlash;
>+                        state = State::PathOrAuthority;
>                         ASSERT(*c == '/');
>+                        ++c;
>+                        m_url.m_userStart = currentPosition(c);
>                     } else {
>+                        ++c;
>                         m_url.m_userStart = currentPosition(c);
>                         m_url.m_userEnd = m_url.m_userStart;
>                         m_url.m_passwordEnd = m_url.m_userStart;
>@@ -1079,7 +1154,6 @@ void URLParser::parse(const CharacterTyp
>                         state = State::CannotBeABaseURLPath;
>                     }
>                 }
>-                ++c;
>                 break;
>             } else {
>                 m_asciiBuffer.clear();
>@@ -1087,7 +1161,7 @@ void URLParser::parse(const CharacterTyp
>                 c = beginAfterControlAndSpace;
>                 break;
>             }
>-            incrementIteratorSkippingTabAndNewLine(c);
>+            incrementIteratorSkippingTabsAndNewlines(c);
>             if (c.atEnd()) {
>                 m_asciiBuffer.clear();
>                 state = State::NoScheme;
>@@ -1101,7 +1175,7 @@ void URLParser::parse(const CharacterTyp
>                 return;
>             }
>             if (base.m_cannotBeABaseURL && *c == '#') {
>-                copyURLPartsUntil(base, URLPart::QueryEnd);
>+                copyURLPartsUntil(base, URLPart::QueryEnd, c);
>                 state = State::Fragment;
>                 appendToASCIIBuffer('#');
>                 ++c;
>@@ -1111,7 +1185,7 @@ void URLParser::parse(const CharacterTyp
>                 state = State::Relative;
>                 break;
>             }
>-            copyURLPartsUntil(base, URLPart::SchemeEnd);
>+            copyURLPartsUntil(base, URLPart::SchemeEnd, c);
>             appendToASCIIBuffer(':');
>             state = State::File;
>             break;
>@@ -1119,7 +1193,7 @@ void URLParser::parse(const CharacterTyp
>             LOG_STATE("SpecialRelativeOrAuthority");
>             if (*c == '/') {
>                 appendToASCIIBuffer('/');
>-                incrementIteratorSkippingTabAndNewLine(c);
>+                incrementIteratorSkippingTabsAndNewlines(c);
>                 if (c.atEnd()) {
>                     failure();
>                     return;
>@@ -1137,12 +1211,12 @@ void URLParser::parse(const CharacterTyp
>             LOG_STATE("PathOrAuthority");
>             if (*c == '/') {
>                 appendToASCIIBuffer('/');
>-                m_url.m_userStart = currentPosition(c);
>                 state = State::AuthorityOrHost;
>                 ++c;
>+                m_url.m_userStart = currentPosition(c);
>                 authorityOrHostBegin = c;
>             } else {
>-                ASSERT(m_asciiBuffer.last() == '/');
>+                ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");
>                 m_url.m_userStart = currentPosition(c) - 1;
>                 m_url.m_userEnd = m_url.m_userStart;
>                 m_url.m_passwordEnd = m_url.m_userStart;
>@@ -1161,19 +1235,19 @@ void URLParser::parse(const CharacterTyp
>                 ++c;
>                 break;
>             case '?':
>-                copyURLPartsUntil(base, URLPart::PathEnd);
>+                copyURLPartsUntil(base, URLPart::PathEnd, c);
>                 appendToASCIIBuffer('?');
>                 state = State::Query;
>                 ++c;
>                 break;
>             case '#':
>-                copyURLPartsUntil(base, URLPart::QueryEnd);
>+                copyURLPartsUntil(base, URLPart::QueryEnd, c);
>                 appendToASCIIBuffer('#');
>                 state = State::Fragment;
>                 ++c;
>                 break;
>             default:
>-                copyURLPartsUntil(base, URLPart::PathAfterLastSlash);
>+                copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c);
>                 state = State::Path;
>                 break;
>             }
>@@ -1182,11 +1256,11 @@ void URLParser::parse(const CharacterTyp
>             LOG_STATE("RelativeSlash");
>             if (*c == '/' || *c == '\\') {
>                 ++c;
>-                copyURLPartsUntil(base, URLPart::SchemeEnd);
>+                copyURLPartsUntil(base, URLPart::SchemeEnd, c);
>                 appendToASCIIBuffer("://", 3);
>                 state = State::SpecialAuthorityIgnoreSlashes;
>             } else {
>-                copyURLPartsUntil(base, URLPart::PortEnd);
>+                copyURLPartsUntil(base, URLPart::PortEnd, c);
>                 appendToASCIIBuffer('/');
>                 m_url.m_pathAfterLastSlash = base.m_portEnd + 1;
>                 state = State::Path;
>@@ -1194,11 +1268,23 @@ void URLParser::parse(const CharacterTyp
>             break;
>         case State::SpecialAuthoritySlashes:
>             LOG_STATE("SpecialAuthoritySlashes");
>-            appendToASCIIBuffer("//", 2);
>-            if (*c == '/' || *c == '\\') {
>-                incrementIteratorSkippingTabAndNewLine(c);
>-                if (!c.atEnd() && (*c == '/' || *c == '\\'))
>+            if (LIKELY(*c == '/' || *c == '\\')) {
>+                if (UNLIKELY(*c == '\\'))
>+                    syntaxViolation(c);
>+                appendToASCIIBuffer('/');
>+                incrementIteratorSkippingTabsAndNewlines(c);
>+                if (LIKELY(!c.atEnd() && (*c == '/' || *c == '\\'))) {
>+                    if (UNLIKELY(*c == '\\'))
>+                        syntaxViolation(c);
>                     ++c;
>+                    appendToASCIIBuffer('/');
>+                } else {
>+                    syntaxViolation(c);
>+                    appendToASCIIBuffer('/');
>+                }
>+            } else {
>+                syntaxViolation(c);
>+                appendToASCIIBuffer("//", 2);
>             }
>             state = State::SpecialAuthorityIgnoreSlashes;
>             break;
>@@ -1213,8 +1299,9 @@ void URLParser::parse(const CharacterTyp
>             authorityOrHostBegin = c;
>             break;
>         case State::AuthorityOrHost:
>-            LOG_STATE("AuthorityOrHost");
>+            CaseAuthorityOrHost:
>             {
>+                LOG_STATE("AuthorityOrHost");
>                 if (*c == '@') {
>                     auto lastAt = c;
>                     auto findLastAt = c;
>@@ -1225,7 +1312,7 @@ void URLParser::parse(const CharacterTyp
>                     }
>                     parseAuthority(CodePointIterator<CharacterType>(authorityOrHostBegin, lastAt));
>                     c = lastAt;
>-                    incrementIteratorSkippingTabAndNewLine(c);
>+                    incrementIteratorSkippingTabsAndNewlines(c);
>                     authorityOrHostBegin = c;
>                     state = State::Host;
>                     m_hostHasPercentOrNonASCII = false;
>@@ -1233,13 +1320,14 @@ void URLParser::parse(const CharacterTyp
>                 }
>                 bool isSlash = *c == '/' || (m_urlIsSpecial && *c == '\\');
>                 if (isSlash || *c == '?' || *c == '#') {
>-                    m_url.m_userEnd = currentPosition(c);
>+                    m_url.m_userEnd = currentPosition(authorityOrHostBegin);
>                     m_url.m_passwordEnd = m_url.m_userEnd;
>                     if (!parseHostAndPort(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) {
>                         failure();
>                         return;
>                     }
>-                    if (!isSlash) {
>+                    if (UNLIKELY(!isSlash)) {
>+                        syntaxViolation(c);
>                         appendToASCIIBuffer('/');
>                         m_url.m_pathAfterLastSlash = currentPosition(c);
>                     }
>@@ -1249,8 +1337,12 @@ void URLParser::parse(const CharacterTyp
>                 if (isPercentOrNonASCII(*c))
>                     m_hostHasPercentOrNonASCII = true;
>                 ++c;
>+                if (c.atEnd())
>+                    break;
>             }
>-            break;
>+            // Skip the check for tabs which might cause a syntaxViolation.
>+            // We want to handle the syntaxViolations while actually parsing the authority or host.
>+            goto CaseAuthorityOrHost;
>         case State::Host:
>             LOG_STATE("Host");
>             if (*c == '/' || *c == '?' || *c == '#') {
>@@ -1268,15 +1360,18 @@ void URLParser::parse(const CharacterTyp
>         case State::File:
>             LOG_STATE("File");
>             switch (*c) {
>-            case '/':
>             case '\\':
>+                syntaxViolation(c);
>+                FALLTHROUGH;
>+            case '/':
>                 appendToASCIIBuffer('/');
>                 state = State::FileSlash;
>                 ++c;
>                 break;
>             case '?':
>+                syntaxViolation(c);
>                 if (base.isValid() && base.protocolIs("file"))
>-                    copyURLPartsUntil(base, URLPart::PathEnd);
>+                    copyURLPartsUntil(base, URLPart::PathEnd, c);
>                 appendToASCIIBuffer("///?", 4);
>                 m_url.m_userStart = currentPosition(c) - 2;
>                 m_url.m_userEnd = m_url.m_userStart;
>@@ -1289,8 +1384,9 @@ void URLParser::parse(const CharacterTyp
>                 ++c;
>                 break;
>             case '#':
>+                syntaxViolation(c);
>                 if (base.isValid() && base.protocolIs("file"))
>-                    copyURLPartsUntil(base, URLPart::QueryEnd);
>+                    copyURLPartsUntil(base, URLPart::QueryEnd, c);
>                 appendToASCIIBuffer("///#", 4);
>                 m_url.m_userStart = currentPosition(c) - 2;
>                 m_url.m_userEnd = m_url.m_userStart;
>@@ -1304,8 +1400,9 @@ void URLParser::parse(const CharacterTyp
>                 ++c;
>                 break;
>             default:
>+                syntaxViolation(c);
>                 if (base.isValid() && base.protocolIs("file") && shouldCopyFileURL(c))
>-                    copyURLPartsUntil(base, URLPart::PathAfterLastSlash);
>+                    copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c);
>                 else {
>                     appendToASCIIBuffer("///", 3);
>                     m_url.m_userStart = currentPosition(c) - 1;
>@@ -1322,7 +1419,9 @@ void URLParser::parse(const CharacterTyp
>             break;
>         case State::FileSlash:
>             LOG_STATE("FileSlash");
>-            if (*c == '/' || *c == '\\') {
>+            if (LIKELY(*c == '/' || *c == '\\')) {
>+                if (UNLIKELY(*c == '\\'))
>+                    syntaxViolation(c);
>                 ++c;
>                 appendToASCIIBuffer('/');
>                 m_url.m_userStart = currentPosition(c);
>@@ -1347,6 +1446,7 @@ void URLParser::parse(const CharacterTyp
>                     }
>                 }
>             }
>+            syntaxViolation(c);
>             appendToASCIIBuffer("//", 2);
>             m_url.m_userStart = currentPosition(c) - 1;
>             m_url.m_userEnd = m_url.m_userStart;
>@@ -1365,22 +1465,24 @@ void URLParser::parse(const CharacterTyp
>                     break;
>                 }
>                 if (authorityOrHostBegin == c) {
>-                    ASSERT(m_asciiBuffer[currentPosition(c) - 1] == '/');
>-                    if (*c == '?') {
>+                    ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");
>+                    if (UNLIKELY(*c == '?')) {
>+                        syntaxViolation(c);
>                         appendToASCIIBuffer("/?", 2);
>+                        ++c;
>                         m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
>                         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
>                         state = State::Query;
>-                        ++c;
>                         break;
>                     }
>-                    if (*c == '#') {
>+                    if (UNLIKELY(*c == '#')) {
>+                        syntaxViolation(c);
>                         appendToASCIIBuffer("/#", 2);
>+                        ++c;
>                         m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
>                         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
>                         m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
>                         state = State::Fragment;
>-                        ++c;
>                         break;
>                     }
>                     state = State::Path;
>@@ -1390,8 +1492,8 @@ void URLParser::parse(const CharacterTyp
>                     failure();
>                     return;
>                 }
>-                
>-                if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd) == "localhost")  {
>+                if (UNLIKELY(equalLettersIgnoringASCIICase(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd), "localhost"))) {
>+                    syntaxViolation(c);
>                     m_asciiBuffer.shrink(m_url.m_passwordEnd);
>                     m_url.m_hostEnd = currentPosition(c);
>                     m_url.m_portEnd = m_url.m_hostEnd;
>@@ -1413,18 +1515,22 @@ void URLParser::parse(const CharacterTyp
>         case State::Path:
>             LOG_STATE("Path");
>             if (*c == '/' || (m_urlIsSpecial && *c == '\\')) {
>+                if (UNLIKELY(m_urlIsSpecial && *c == '\\'))
>+                    syntaxViolation(c);
>                 appendToASCIIBuffer('/');
>-                m_url.m_pathAfterLastSlash = currentPosition(c);
>                 ++c;
>+                m_url.m_pathAfterLastSlash = currentPosition(c);
>                 break;
>             }
>-            if (currentPosition(c) && m_asciiBuffer[currentPosition(c) - 1] == '/') {
>-                if (isDoubleDotPathSegment(c)) {
>+            if (UNLIKELY(currentPosition(c) && parsedDataView(currentPosition(c) - 1, 1) == "/")) {
>+                if (UNLIKELY(isDoubleDotPathSegment(c))) {
>+                    syntaxViolation(c);
>                     consumeDoubleDotPathSegment(c);
>                     popPath();
>                     break;
>                 }
>-                if (m_asciiBuffer[currentPosition(c) - 1] == '/' && isSingleDotPathSegment(c)) {
>+                if (UNLIKELY(isSingleDotPathSegment(c))) {
>+                    syntaxViolation(c);
>                     consumeSingleDotPathSegment(c);
>                     break;
>                 }
>@@ -1441,6 +1547,8 @@ void URLParser::parse(const CharacterTyp
>                 break;
>             }
>             if (isPercentEncodedDot(c)) {
>+                if (UNLIKELY(*c != '.'))
>+                    syntaxViolation(c);
>                 appendToASCIIBuffer('.');
>                 ASSERT(*c == '%');
>                 ++c;
>@@ -1450,7 +1558,7 @@ void URLParser::parse(const CharacterTyp
>                 ++c;
>                 break;
>             }
>-            utf8PercentEncode<isInDefaultEncodeSet>(*c);
>+            utf8PercentEncode<isInDefaultEncodeSet>(c);
>             ++c;
>             break;
>         case State::CannotBeABaseURLPath:
>@@ -1464,10 +1572,10 @@ void URLParser::parse(const CharacterTyp
>                 state = State::Fragment;
>             } else if (*c == '/') {
>                 appendToASCIIBuffer('/');
>-                m_url.m_pathAfterLastSlash = currentPosition(c);
>                 ++c;
>+                m_url.m_pathAfterLastSlash = currentPosition(c);
>             } else {
>-                utf8PercentEncode<isInSimpleEncodeSet>(*c);
>+                utf8PercentEncode<isInSimpleEncodeSet>(c);
>                 ++c;
>             }
>             break;
>@@ -1481,26 +1589,43 @@ void URLParser::parse(const CharacterTyp
>                 break;
>             }
>             if (isUTF8Encoding)
>-                utf8QueryEncode(*c);
>+                utf8QueryEncode(c);
>             else
>                 appendCodePoint(queryBuffer, *c);
>             ++c;
>             break;
>         case State::Fragment:
>-            LOG_STATE("Fragment");
>-            if (m_unicodeFragmentBuffer.isEmpty() && isASCII(*c))
>+            CaseFragment:
>+            LOG(URLParser, "State Fragment");
>+            if (!m_seenUnicodeFragmentCodePoint && isASCII(*c))
>                 appendToASCIIBuffer(*c);
>-            else
>-                appendCodePoint(m_unicodeFragmentBuffer, *c);
>+            else {
>+                m_seenUnicodeFragmentCodePoint = true;
>+                if (UNLIKELY(m_seenSyntaxViolation))
>+                    appendCodePoint(m_unicodeFragmentBuffer, *c);
>+                else {
>+                    ASSERT(m_asciiBuffer.isEmpty());
>+                    ASSERT(m_unicodeFragmentBuffer.isEmpty());
>+                }
>+            }
>             ++c;
>-            break;
>+            while (UNLIKELY(!c.atEnd() && isTabOrNewline(*c))) {
>+                fragmentSyntaxViolation(c);
>+                ++c;
>+            }
>+            if (c.atEnd())
>+                break;
>+
>+            // Skip the check for tabs which might cause a syntaxViolation.
>+            // We need to handle them differently with fragmentSyntaxViolation.
>+            goto CaseFragment;
>         }
>     }
> 
>     switch (state) {
>     case State::SchemeStart:
>         LOG_FINAL_STATE("SchemeStart");
>-        if (!currentPosition(c) && base.isValid()) {
>+        if (!currentPosition(c) && base.isValid() && !base.m_cannotBeABaseURL) {
>             m_url = base;
>             return;
>         }
>@@ -1515,14 +1640,14 @@ void URLParser::parse(const CharacterTyp
>         RELEASE_ASSERT_NOT_REACHED();
>     case State::SpecialRelativeOrAuthority:
>         LOG_FINAL_STATE("SpecialRelativeOrAuthority");
>-        copyURLPartsUntil(base, URLPart::QueryEnd);
>+        copyURLPartsUntil(base, URLPart::QueryEnd, c);
>         m_url.m_fragmentEnd = m_url.m_queryEnd;
>         break;
>     case State::PathOrAuthority:
>         LOG_FINAL_STATE("PathOrAuthority");
>         ASSERT(m_url.m_userStart);
>         ASSERT(m_url.m_userStart == currentPosition(c));
>-        ASSERT(m_asciiBuffer.last() == '/');
>+        ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");
>         m_url.m_userStart--;
>         m_url.m_userEnd = m_url.m_userStart;
>         m_url.m_passwordEnd = m_url.m_userStart;
>@@ -1535,11 +1660,11 @@ void URLParser::parse(const CharacterTyp
>         break;
>     case State::Relative:
>         LOG_FINAL_STATE("Relative");
>-        copyURLPartsUntil(base, URLPart::FragmentEnd);
>+        copyURLPartsUntil(base, URLPart::FragmentEnd, c);
>         break;
>     case State::RelativeSlash:
>         LOG_FINAL_STATE("RelativeSlash");
>-        copyURLPartsUntil(base, URLPart::PortEnd);
>+        copyURLPartsUntil(base, URLPart::PortEnd, c);
>         appendToASCIIBuffer('/');
>         m_url.m_pathAfterLastSlash = base.m_portEnd + 1;
>         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
>@@ -1565,7 +1690,7 @@ void URLParser::parse(const CharacterTyp
>         break;
>     case State::AuthorityOrHost:
>         LOG_FINAL_STATE("AuthorityOrHost");
>-        m_url.m_userEnd = currentPosition(c);
>+        m_url.m_userEnd = currentPosition(authorityOrHostBegin);
>         m_url.m_passwordEnd = m_url.m_userEnd;
>         if (authorityOrHostBegin.atEnd()) {
>             m_url.m_hostEnd = m_url.m_userEnd;
>@@ -1574,6 +1699,7 @@ void URLParser::parse(const CharacterTyp
>             failure();
>             return;
>         }
>+        syntaxViolation(c);
>         appendToASCIIBuffer('/');
>         m_url.m_pathEnd = m_url.m_portEnd + 1;
>         m_url.m_pathAfterLastSlash = m_url.m_pathEnd;
>@@ -1586,6 +1712,7 @@ void URLParser::parse(const CharacterTyp
>             failure();
>             return;
>         }
>+        syntaxViolation(c);
>         appendToASCIIBuffer('/');
>         m_url.m_pathEnd = m_url.m_portEnd + 1;
>         m_url.m_pathAfterLastSlash = m_url.m_pathEnd;
>@@ -1595,9 +1722,10 @@ void URLParser::parse(const CharacterTyp
>     case State::File:
>         LOG_FINAL_STATE("File");
>         if (base.isValid() && base.protocolIs("file")) {
>-            copyURLPartsUntil(base, URLPart::QueryEnd);
>+            copyURLPartsUntil(base, URLPart::QueryEnd, c);
>             appendToASCIIBuffer(':');
>         }
>+        syntaxViolation(c);
>         appendToASCIIBuffer("///", 3);
>         m_url.m_userStart = currentPosition(c) - 1;
>         m_url.m_userEnd = m_url.m_userStart;
>@@ -1611,8 +1739,9 @@ void URLParser::parse(const CharacterTyp
>         break;
>     case State::FileSlash:
>         LOG_FINAL_STATE("FileSlash");
>+        syntaxViolation(c);
>+        m_url.m_userStart = currentPosition(c) + 1;
>         appendToASCIIBuffer("//", 2);
>-        m_url.m_userStart = currentPosition(c) - 1;
>         m_url.m_userEnd = m_url.m_userStart;
>         m_url.m_passwordEnd = m_url.m_userStart;
>         m_url.m_hostEnd = m_url.m_userStart;
>@@ -1625,6 +1754,7 @@ void URLParser::parse(const CharacterTyp
>     case State::FileHost:
>         LOG_FINAL_STATE("FileHost");
>         if (authorityOrHostBegin == c) {
>+            syntaxViolation(c);
>             appendToASCIIBuffer('/');
>             m_url.m_userStart = currentPosition(c) - 1;
>             m_url.m_userEnd = m_url.m_userStart;
>@@ -1643,7 +1773,8 @@ void URLParser::parse(const CharacterTyp
>             return;
>         }
> 
>-        if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd) == "localhost")  {
>+        syntaxViolation(c);
>+        if (equalLettersIgnoringASCIICase(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd), "localhost")) {
>             m_asciiBuffer.shrink(m_url.m_passwordEnd);
>             m_url.m_hostEnd = currentPosition(c);
>             m_url.m_portEnd = m_url.m_hostEnd;
>@@ -1677,20 +1808,24 @@ void URLParser::parse(const CharacterTyp
>         m_url.m_fragmentEnd = m_url.m_queryEnd;
>         break;
>     case State::Fragment:
>-        LOG_FINAL_STATE("Fragment");
>-        m_url.m_fragmentEnd = currentPosition(c) + m_unicodeFragmentBuffer.size();
>-        break;
>+        {
>+            LOG_FINAL_STATE("Fragment");
>+            size_t length = m_seenSyntaxViolation ? m_asciiBuffer.size() + m_unicodeFragmentBuffer.size() : c.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
>+            m_url.m_fragmentEnd = length;
>+            break;
>+        }
>     }
> 
>-    if (!m_seenSyntaxError) {
>+    if (LIKELY(!m_seenSyntaxViolation)) {
>         m_url.m_string = m_inputString;
>         ASSERT(m_asciiBuffer.isEmpty());
>         ASSERT(m_unicodeFragmentBuffer.isEmpty());
>-    } else if (m_unicodeFragmentBuffer.isEmpty())
>+    } else if (!m_seenUnicodeFragmentCodePoint) {
>+        ASSERT(m_unicodeFragmentBuffer.isEmpty());
>         m_url.m_string = String::adopt(WTFMove(m_asciiBuffer));
>-    else {
>+    } else {
>         Vector<UChar> buffer;
>-        buffer.reserveInitialCapacity(currentPosition(c) + m_unicodeFragmentBuffer.size());
>+        buffer.reserveInitialCapacity(m_asciiBuffer.size() + m_unicodeFragmentBuffer.size());
>         buffer.appendVector(m_asciiBuffer);
>         buffer.appendVector(m_unicodeFragmentBuffer);
>         m_url.m_string = String::adopt(WTFMove(buffer));
>@@ -1703,16 +1838,20 @@ void URLParser::parse(const CharacterTyp
> template<typename CharacterType>
> void URLParser::parseAuthority(CodePointIterator<CharacterType> iterator)
> {
>-    if (iterator.atEnd()) {
>+    if (UNLIKELY(iterator.atEnd())) {
>+        syntaxViolation(iterator);
>         m_url.m_userEnd = currentPosition(iterator);
>         m_url.m_passwordEnd = m_url.m_userEnd;
>         return;
>     }
>-    for (; !iterator.atEnd(); ++iterator) {
>+    auto authorityOrHostBegin = iterator;
>+    for (; !iterator.atEnd(); incrementIteratorSkippingTabsAndNewlines(iterator)) {
>         if (*iterator == ':') {
>-            ++iterator;
>             m_url.m_userEnd = currentPosition(iterator);
>-            if (iterator.atEnd()) {
>+            auto iteratorAtColon = iterator;
>+            incrementIteratorSkippingTabsAndNewlines(iterator, authorityOrHostBegin);
>+            if (UNLIKELY(iterator.atEnd())) {
>+                syntaxViolation(iteratorAtColon);
>                 m_url.m_passwordEnd = m_url.m_userEnd;
>                 if (m_url.m_userEnd > m_url.m_userStart)
>                     appendToASCIIBuffer('@');
>@@ -1721,10 +1860,10 @@ void URLParser::parseAuthority(CodePoint
>             appendToASCIIBuffer(':');
>             break;
>         }
>-        utf8PercentEncode<isInUserInfoEncodeSet>(*iterator);
>+        utf8PercentEncode<isInUserInfoEncodeSet>(iterator);
>     }
>-    for (; !iterator.atEnd(); ++iterator)
>-        utf8PercentEncode<isInUserInfoEncodeSet>(*iterator);
>+    for (; !iterator.atEnd(); incrementIteratorSkippingTabsAndNewlines(iterator))
>+        utf8PercentEncode<isInUserInfoEncodeSet>(iterator);
>     m_url.m_passwordEnd = currentPosition(iterator);
>     if (!m_url.m_userEnd)
>         m_url.m_userEnd = m_url.m_passwordEnd;
>@@ -1824,7 +1963,7 @@ void URLParser::serializeIPv6(URLParser:
> }
> 
> template<typename CharacterType>
>-inline static Optional<uint32_t> parseIPv4Number(CodePointIterator<CharacterType>& iterator)
>+Optional<uint32_t> URLParser::parseIPv4Number(CodePointIterator<CharacterType>& iterator, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition)
> {
>     // FIXME: Check for overflow.
>     enum class State : uint8_t {
>@@ -1843,7 +1982,7 @@ inline static Optional<uint32_t> parseIP
>         }
>         switch (state) {
>         case State::UnknownBase:
>-            if (*iterator == '0') {
>+            if (UNLIKELY(*iterator == '0')) {
>                 ++iterator;
>                 state = State::OctalOrHex;
>                 break;
>@@ -1851,6 +1990,7 @@ inline static Optional<uint32_t> parseIP
>             state = State::Decimal;
>             break;
>         case State::OctalOrHex:
>+            syntaxViolation(iteratorForSyntaxViolationPosition);
>             if (*iterator == 'x' || *iterator == 'X') {
>                 ++iterator;
>                 state = State::Hex;
>@@ -1866,6 +2006,7 @@ inline static Optional<uint32_t> parseIP
>             ++iterator;
>             break;
>         case State::Octal:
>+            ASSERT(m_seenSyntaxViolation);
>             if (*iterator < '0' || *iterator > '7')
>                 return Nullopt;
>             value *= 8;
>@@ -1873,6 +2014,7 @@ inline static Optional<uint32_t> parseIP
>             ++iterator;
>             break;
>         case State::Hex:
>+            ASSERT(m_seenSyntaxViolation);
>             if (!isASCIIHexDigit(*iterator))
>                 return Nullopt;
>             value *= 16;
>@@ -1894,12 +2036,14 @@ inline static uint64_t pow256(size_t exp
> template<typename CharacterType>
> Optional<URLParser::IPv4Address> URLParser::parseIPv4Host(CodePointIterator<CharacterType> iterator)
> {
>+    auto hostBegin = iterator;
>+
>     Vector<uint32_t, 4> items;
>     items.reserveInitialCapacity(4);
>     while (!iterator.atEnd()) {
>         if (items.size() >= 4)
>             return Nullopt;
>-        if (auto item = parseIPv4Number(iterator))
>+        if (auto item = parseIPv4Number(iterator, hostBegin))
>             items.append(item.value());
>         else
>             return Nullopt;
>@@ -1918,6 +2062,10 @@ Optional<URLParser::IPv4Address> URLPars
>         if (item > 255)
>             return Nullopt;
>     }
>+
>+    if (UNLIKELY(items.size() != 4))
>+        syntaxViolation(hostBegin);
>+
>     IPv4Address ipv4 = items.takeLast();
>     for (size_t counter = 0; counter < items.size(); ++counter)
>         ipv4 += items[counter] * pow256(3 - counter);
>@@ -1927,6 +2075,9 @@ Optional<URLParser::IPv4Address> URLPars
> template<typename CharacterType>
> Optional<URLParser::IPv6Address> URLParser::parseIPv6Host(CodePointIterator<CharacterType> c)
> {
>+    ASSERT(*c == '[');
>+    auto hostBegin = c;
>+    incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
>     if (c.atEnd())
>         return Nullopt;
> 
>@@ -1935,12 +2086,12 @@ Optional<URLParser::IPv6Address> URLPars
>     Optional<size_t> compressPointer;
> 
>     if (*c == ':') {
>-        ++c;
>+        incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
>         if (c.atEnd())
>             return Nullopt;
>         if (*c != ':')
>             return Nullopt;
>-        ++c;
>+        incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
>         ++piecePointer;
>         compressPointer = piecePointer;
>     }
>@@ -1951,26 +2102,30 @@ Optional<URLParser::IPv6Address> URLPars
>         if (*c == ':') {
>             if (compressPointer)
>                 return Nullopt;
>-            ++c;
>+            incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
>             ++piecePointer;
>             compressPointer = piecePointer;
>             continue;
>         }
>         uint16_t value = 0;
>-        for (size_t length = 0; length < 4; length++) {
>+        size_t length = 0;
>+        for (; length < 4; length++) {
>             if (c.atEnd())
>                 break;
>             if (!isASCIIHexDigit(*c))
>                 break;
>             value = value * 0x10 + toASCIIHexValue(*c);
>-            ++c;
>+            incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
>         }
>+        if (UNLIKELY(length > 1 && !value))
>+            syntaxViolation(hostBegin);
>+
>         address[piecePointer++] = value;
>         if (c.atEnd())
>             break;
>         if (*c != ':')
>             return Nullopt;
>-        ++c;
>+        incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
>     }
>     
>     if (!c.atEnd()) {
>@@ -1989,7 +2144,7 @@ Optional<URLParser::IPv6Address> URLPars
>                     return Nullopt;
>                 else
>                     value = value.value() * 10 + number;
>-                ++c;
>+                incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
>                 if (c.atEnd())
>                     return Nullopt;
>                 if (value.value() > 255)
>@@ -2001,7 +2156,7 @@ Optional<URLParser::IPv6Address> URLPars
>             if (dotsSeen == 1 || dotsSeen == 3)
>                 piecePointer++;
>             if (!c.atEnd())
>-                ++c;
>+                incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
>             if (dotsSeen == 3 && !c.atEnd())
>                 return Nullopt;
>             dotsSeen++;
>@@ -2014,6 +2169,13 @@ Optional<URLParser::IPv6Address> URLPars
>             std::swap(address[piecePointer--], address[compressPointer.value() + swaps-- - 1]);
>     } else if (piecePointer != 8)
>         return Nullopt;
>+
>+    Optional<size_t> possibleCompressPointer = findLongestZeroSequence(address);
>+    if (possibleCompressPointer)
>+        possibleCompressPointer.value()++;
>+    if (UNLIKELY(compressPointer != possibleCompressPointer))
>+        syntaxViolation(hostBegin);
>+    
>     return address;
> }
> 
>@@ -2105,15 +2267,20 @@ inline static bool hasInvalidDomainChara
> template<typename CharacterType>
> bool URLParser::parsePort(CodePointIterator<CharacterType>& iterator)
> {
>+    ASSERT(*iterator == ':');
>+    auto colonIterator = iterator;
>+    incrementIteratorSkippingTabsAndNewlines(iterator, colonIterator);
>     uint32_t port = 0;
>-    if (iterator.atEnd()) {
>-        m_url.m_portEnd = currentPosition(iterator);
>+    if (UNLIKELY(iterator.atEnd())) {
>+        m_url.m_portEnd = currentPosition(colonIterator);
>+        syntaxViolation(colonIterator);
>         return true;
>     }
>-    appendToASCIIBuffer(':');
>     for (; !iterator.atEnd(); ++iterator) {
>-        if (isTabOrNewline(*iterator))
>+        if (UNLIKELY(isTabOrNewline(*iterator))) {
>+            syntaxViolation(colonIterator);
>             continue;
>+        }
>         if (isASCIIDigit(*iterator)) {
>             port = port * 10 + *iterator - '0';
>             if (port > std::numeric_limits<uint16_t>::max())
>@@ -2122,10 +2289,10 @@ bool URLParser::parsePort(CodePointItera
>             return false;
>     }
> 
>-    if (isDefaultPort(StringView(m_asciiBuffer.data(), m_url.m_schemeEnd), port)) {
>-        ASSERT(m_asciiBuffer.last() == ':');
>-        m_asciiBuffer.shrink(currentPosition(iterator) - 1);
>-    } else {
>+    if (UNLIKELY(isDefaultPort(parsedDataView(0, m_url.m_schemeEnd), port)))
>+        syntaxViolation(colonIterator);
>+    else {
>+        appendToASCIIBuffer(':');
>         ASSERT(port <= std::numeric_limits<uint16_t>::max());
>         appendNumberToASCIIBuffer<uint16_t>(static_cast<uint16_t>(port));
>     }
>@@ -2140,27 +2307,27 @@ bool URLParser::parseHostAndPort(CodePoi
>     if (iterator.atEnd())
>         return false;
>     if (*iterator == '[') {
>-        ++iterator;
>         auto ipv6End = iterator;
>         while (!ipv6End.atEnd() && *ipv6End != ']')
>             ++ipv6End;
>         if (auto address = parseIPv6Host(CodePointIterator<CharacterType>(iterator, ipv6End))) {
>             serializeIPv6(address.value());
>-            m_url.m_hostEnd = currentPosition(iterator);
>             if (!ipv6End.atEnd()) {
>-                ++ipv6End;
>+                incrementIteratorSkippingTabsAndNewlines(ipv6End);
>                 if (!ipv6End.atEnd() && *ipv6End == ':') {
>-                    ++ipv6End;
>+                    m_url.m_hostEnd = currentPosition(ipv6End);
>                     return parsePort(ipv6End);
>                 }
>-                m_url.m_portEnd = currentPosition(iterator);
>+                m_url.m_hostEnd = currentPosition(ipv6End);
>+                m_url.m_portEnd = m_url.m_hostEnd;
>                 return true;
>             }
>+            m_url.m_hostEnd = currentPosition(ipv6End);
>             return true;
>         }
>     }
>-    
>-    if (!m_hostHasPercentOrNonASCII) {
>+
>+    if (LIKELY(!m_hostHasPercentOrNonASCII)) {
>         auto hostIterator = iterator;
>         for (; !iterator.atEnd(); ++iterator) {
>             if (isTabOrNewline(*iterator))
>@@ -2177,23 +2344,25 @@ bool URLParser::parseHostAndPort(CodePoi
>                 m_url.m_portEnd = currentPosition(iterator);
>                 return true;
>             }
>-            ++iterator;
>             return parsePort(iterator);
>         }
>         for (; hostIterator != iterator; ++hostIterator) {
>-            if (!isTabOrNewline(*hostIterator))
>+            if (LIKELY(!isTabOrNewline(*hostIterator))) {
>+                if (UNLIKELY(isASCIIUpper(*hostIterator)))
>+                    syntaxViolation(hostIterator);
>                 appendToASCIIBuffer(toASCIILower(*hostIterator));
>+            } else
>+                syntaxViolation(hostIterator);
>         }
>         m_url.m_hostEnd = currentPosition(iterator);
>-        if (!hostIterator.atEnd()) {
>-            ASSERT(*hostIterator == ':');
>-            incrementIteratorSkippingTabAndNewLine(hostIterator);
>+        if (!hostIterator.atEnd())
>             return parsePort(hostIterator);
>-        }
>         m_url.m_portEnd = currentPosition(iterator);
>         return true;
>     }
>     
>+    syntaxViolation(iterator);
>+    
>     Vector<LChar, defaultInlineBufferSize> utf8Encoded;
>     for (; !iterator.atEnd(); ++iterator) {
>         if (isTabOrNewline(*iterator))
>@@ -2223,17 +2392,13 @@ bool URLParser::parseHostAndPort(CodePoi
>             m_url.m_portEnd = currentPosition(iterator);
>             return true;
>         }
>-        ++iterator;
>         return parsePort(iterator);
>     }
> 
>     appendToASCIIBuffer(asciiDomainCharacters, asciiDomainValue.size());
>     m_url.m_hostEnd = currentPosition(iterator);
>-    if (!iterator.atEnd()) {
>-        ASSERT(*iterator == ':');
>-        incrementIteratorSkippingTabAndNewLine(iterator);
>+    if (!iterator.atEnd())
>         return parsePort(iterator);
>-    }
>     m_url.m_portEnd = currentPosition(iterator);
>     return true;
> }
>@@ -2281,7 +2446,7 @@ inline static void serializeURLEncodedFo
>             || (byte >= 0x30 && byte <= 0x39)
>             || (byte >= 0x41 && byte <= 0x5A)
>             || byte == 0x5F
>-            || (byte >= 0x61 && byte <= 0x7A))
>+            || (byte >= 0x61 && byte <= 0x7A)) // FIXME: Put these in the characterClassTable to avoid branches.
>             output.append(byte);
>         else
>             percentEncodeByte(byte, output);
>@@ -2349,7 +2514,7 @@ bool URLParser::allValuesEqual(const URL
> }
> 
> bool URLParser::internalValuesConsistent(const URL& url)
>-{    
>+{
>     return url.m_schemeEnd <= url.m_userStart
>         && url.m_userStart <= url.m_userEnd
>         && url.m_userEnd <= url.m_passwordEnd
>Index: Source/WebCore/platform/URLParser.h
>===================================================================
>--- Source/WebCore/platform/URLParser.h	(revision 206412)
>+++ Source/WebCore/platform/URLParser.h	(working copy)
>@@ -28,7 +28,6 @@
> #include "TextEncoding.h"
> #include "URL.h"
> #include <wtf/Forward.h>
>-#include <wtf/text/StringBuilder.h>
> 
> namespace WebCore {
> 
>@@ -53,13 +52,13 @@ private:
>     URL m_url;
>     Vector<LChar> m_asciiBuffer;
>     Vector<UChar> m_unicodeFragmentBuffer;
>+    bool m_seenUnicodeFragmentCodePoint { false };
>     bool m_urlIsSpecial { false };
>     bool m_hostHasPercentOrNonASCII { false };
>     String m_inputString;
>     const void* m_inputBegin { nullptr };
> 
>-    // FIXME: This should start out as false and only change to true when we see a syntax error once syntax error handling is implemented.
>-    bool m_seenSyntaxError { true };
>+    bool m_seenSyntaxViolation { false };
> 
>     template<typename CharacterType> void parse(const CharacterType*, const unsigned length, const URL&, const TextEncoding&);
>     template<typename CharacterType> void parseAuthority(CodePointIterator<CharacterType>);
>@@ -67,32 +66,36 @@ private:
>     template<typename CharacterType> bool parsePort(CodePointIterator<CharacterType>&);
> 
>     void failure();
>-    template<typename CharacterType> void incrementIteratorSkippingTabAndNewLine(CodePointIterator<CharacterType>&);
>-    template<typename CharacterType> void syntaxError(const CodePointIterator<CharacterType>&);
>+    template<typename CharacterType> void incrementIteratorSkippingTabsAndNewlines(CodePointIterator<CharacterType>& iterator) { incrementIteratorSkippingTabsAndNewlines(iterator, iterator); }
>+    template<typename CharacterType> void incrementIteratorSkippingTabsAndNewlines(CodePointIterator<CharacterType>&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition);
>+    template<typename CharacterType> void syntaxViolation(const CodePointIterator<CharacterType>&);
>+    template<typename CharacterType> void fragmentSyntaxViolation(const CodePointIterator<CharacterType>&);
>     template<typename CharacterType> bool isWindowsDriveLetter(CodePointIterator<CharacterType>);
>     template<typename CharacterType> bool shouldCopyFileURL(CodePointIterator<CharacterType>);
>     template<typename CharacterType> void checkWindowsDriveLetter(CodePointIterator<CharacterType>&);
>     template<typename CharacterType> size_t currentPosition(const CodePointIterator<CharacterType>&);
>     template<typename UnsignedIntegerType> void appendNumberToASCIIBuffer(UnsignedIntegerType);
>-    template<bool(*isInCodeSet)(UChar32)> void utf8PercentEncode(UChar32);
>-    void utf8QueryEncode(UChar32);
>+    template<bool(*isInCodeSet)(UChar32), typename CharacterType> void utf8PercentEncode(const CodePointIterator<CharacterType>&);
>+    template<typename CharacterType> void utf8QueryEncode(const CodePointIterator<CharacterType>&);
>     void percentEncodeByte(uint8_t);
>     void appendToASCIIBuffer(UChar32);
>     void appendToASCIIBuffer(const char*, size_t);
>     void appendToASCIIBuffer(const LChar* characters, size_t size) { appendToASCIIBuffer(reinterpret_cast<const char*>(characters), size); }
>     void encodeQuery(const Vector<UChar>& source, const TextEncoding&);
>     void copyASCIIStringUntil(const String&, size_t lengthIf8Bit, size_t lengthIf16Bit);
>+    StringView parsedDataView(size_t start, size_t length);
> 
>     using IPv4Address = uint32_t;
>     void serializeIPv4(IPv4Address);
>     template<typename CharacterType> Optional<IPv4Address> parseIPv4Host(CodePointIterator<CharacterType>);
>+    template<typename CharacterType> Optional<uint32_t> parseIPv4Number(CodePointIterator<CharacterType>&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition);
>     using IPv6Address = std::array<uint16_t, 8>;
>     template<typename CharacterType> Optional<IPv6Address> parseIPv6Host(CodePointIterator<CharacterType>);
>     void serializeIPv6Piece(uint16_t piece);
>     void serializeIPv6(URLParser::IPv6Address);
> 
>     enum class URLPart;
>-    void copyURLPartsUntil(const URL& base, URLPart);
>+    template<typename CharacterType> void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator<CharacterType>&);
>     static size_t urlLengthUntilPart(const URL&, URLPart);
>     void popPath();
> };
>Index: Tools/ChangeLog
>===================================================================
>--- Tools/ChangeLog	(revision 206412)
>+++ Tools/ChangeLog	(working copy)
>@@ -1,3 +1,13 @@
>+2016-09-26  Alex Christensen  <achristensen@webkit.org>
>+
>+        Implement URLParser::syntaxViolation
>+        https://bugs.webkit.org/show_bug.cgi?id=162593
>+
>+        Reviewed by NOBODY (OOPS!).
>+
>+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
>+        (TestWebKitAPI::TEST_F):
>+
> 2016-09-26  Ryan Haddad  <ryanhaddad@apple.com>
> 
>         Add Sierra to the flakiness dashboard.
>Index: Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp
>===================================================================
>--- Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	(revision 206412)
>+++ Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	(working copy)
>@@ -119,6 +119,15 @@ TEST_F(URLParserTest, Basic)
>     checkURL("http://[0:f::f:f:0:0]", {"http", "", "", "[0:f::f:f:0:0]", 0, "/", "", "", "http://[0:f::f:f:0:0]/"});
>     checkURL("http://[0:f:0:0:f::]", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
>     checkURL("http://[::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
>+    checkURL("http://[0:f:0:0:f::]:", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
>+    checkURL("http://[0:f:0:0:f::]:\t", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
>+    checkURL("http://[0:f:0:0:f::]\t:", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
>+    checkURL("http://\t[::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
>+    checkURL("http://[\t::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
>+    checkURL("http://[:\t:f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
>+    checkURL("http://[::\tf:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
>+    checkURL("http://[::f\t:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
>+    checkURL("http://[::f:\t0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
>     checkURL("http://example.com/path1/path2/.", {"http", "", "", "example.com", 0, "/path1/path2/", "", "", "http://example.com/path1/path2/"});
>     checkURL("http://example.com/path1/path2/..", {"http", "", "", "example.com", 0, "/path1/", "", "", "http://example.com/path1/"});
>     checkURL("http://example.com/path1/path2/./path3", {"http", "", "", "example.com", 0, "/path1/path2/path3", "", "", "http://example.com/path1/path2/path3"});
>@@ -191,12 +200,23 @@ TEST_F(URLParserTest, Basic)
>     checkURL("http://host/a%20B", {"http", "", "", "host", 0, "/a%20B", "", "", "http://host/a%20B"});
>     checkURL("http://host?q=@ <>!#fragment", {"http", "", "", "host", 0, "/", "q=@%20%3C%3E!", "fragment", "http://host/?q=@%20%3C%3E!#fragment"});
>     checkURL("http://user:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
>+    checkURL("http://user:@\thost", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
>+    checkURL("http://user\t:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
>+    checkURL("http://use\tr:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
>     checkURL("http://127.0.0.1:10100/path", {"http", "", "", "127.0.0.1", 10100, "/path", "", "", "http://127.0.0.1:10100/path"});
>     checkURL("http://127.0.0.1:/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
>+    checkURL("http://127.0.0.1\t:/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
>+    checkURL("http://127.0.0.1:\t/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
>+    checkURL("http://127.0.0.1:/\tpath", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
>     checkURL("http://127.0.0.1:123", {"http", "", "", "127.0.0.1", 123, "/", "", "", "http://127.0.0.1:123/"});
>     checkURL("http://127.0.0.1:", {"http", "", "", "127.0.0.1", 0, "/", "", "", "http://127.0.0.1/"});
>     checkURL("http://[0:f::f:f:0:0]:123/path", {"http", "", "", "[0:f::f:f:0:0]", 123, "/path", "", "", "http://[0:f::f:f:0:0]:123/path"});
>     checkURL("http://[0:f::f:f:0:0]:123", {"http", "", "", "[0:f::f:f:0:0]", 123, "/", "", "", "http://[0:f::f:f:0:0]:123/"});
>+    checkURL("http://[0:f:0:0:f:\t:]:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
>+    checkURL("http://[0:f:0:0:f::\t]:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
>+    checkURL("http://[0:f:0:0:f::]\t:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
>+    checkURL("http://[0:f:0:0:f::]:\t123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
>+    checkURL("http://[0:f:0:0:f::]:1\t23", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
>     checkURL("http://[0:f::f:f:0:0]:/path", {"http", "", "", "[0:f::f:f:0:0]", 0, "/path", "", "", "http://[0:f::f:f:0:0]/path"});
>     checkURL("http://[0:f::f:f:0:0]:", {"http", "", "", "[0:f::f:f:0:0]", 0, "/", "", "", "http://[0:f::f:f:0:0]/"});
>     checkURL("http://host:10100/path", {"http", "", "", "host", 10100, "/path", "", "", "http://host:10100/path"});
>@@ -210,6 +230,9 @@ TEST_F(URLParserTest, Basic)
>     checkURL("sc:/pa/", {"sc", "", "", "", 0, "/pa/", "", "", "sc:/pa/"});
>     checkURL("notspecial:/notuser:notpassword@nothost", {"notspecial", "", "", "", 0, "/notuser:notpassword@nothost", "", "", "notspecial:/notuser:notpassword@nothost"});
>     checkURL("sc://pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
>+    checkURL("sc://\tpa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
>+    checkURL("sc:/\t/pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
>+    checkURL("sc:\t//pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
>     checkURL("http://host   \a   ", {"http", "", "", "host", 0, "/", "", "", "http://host/"});
>     checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
>     checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
>@@ -217,11 +240,14 @@ TEST_F(URLParserTest, Basic)
>     checkURL("http://256/", {"http", "", "", "256", 0, "/", "", "", "http://256/"});
>     checkURL("http://256./", {"http", "", "", "256.", 0, "/", "", "", "http://256./"});
>     checkURL("http://123.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
>+    checkURL("http://123\t.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
>+    checkURL("http://123.\t256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
>     checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
>     checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
>     checkURL("notspecial:/", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
>     checkURL("data:image/png;base64,encoded-data-follows-here", {"data", "", "", "", 0, "image/png;base64,encoded-data-follows-here", "", "", "data:image/png;base64,encoded-data-follows-here"});
>     checkURL("data:image/png;base64,encoded/data-with-slash", {"data", "", "", "", 0, "image/png;base64,encoded/data-with-slash", "", "", "data:image/png;base64,encoded/data-with-slash"});
>+    checkURL("about:~", {"about", "", "", "", 0, "~", "", "", "about:~"});
> 
>     // This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,
>     // and Firefox fails the web platform test differently. Maybe the web platform test ought to be changed.
>@@ -287,6 +313,8 @@ TEST_F(URLParserTest, ParseRelative)
>     checkRelativeURL("\\@", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/@", "", "", "http://example.org/@"});
>     checkRelativeURL("/path3", "http://user@example.org/path1/path2", {"http", "user", "", "example.org", 0, "/path3", "", "", "http://user@example.org/path3"});
>     checkRelativeURL("", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
>+    checkRelativeURL("\t", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
>+    checkRelativeURL(" ", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
>     checkRelativeURL("  \a  \t\n", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
>     checkRelativeURL(":foo.com\\", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/:foo.com/", "", "", "http://example.org/foo/:foo.com/"});
>     checkRelativeURL("http:/example.com/", "about:blank", {"http", "", "", "example.com", 0, "/", "", "", "http://example.com/"});
>@@ -314,6 +342,7 @@ TEST_F(URLParserTest, ParseRelative)
>     checkRelativeURL("notspecial:/", "http://host", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
>     checkRelativeURL("foo:/", "http://example.org/foo/bar", {"foo", "", "", "", 0, "/", "", "", "foo:/"});
>     checkRelativeURL("://:0/", "http://webkit.org/", {"http", "", "", "webkit.org", 0, "/://:0/", "", "", "http://webkit.org/://:0/"});
>+    checkRelativeURL(String(), "http://webkit.org/", {"http", "", "", "webkit.org", 0, "/", "", "", "http://webkit.org/"});
> 
>     // The checking of slashes in SpecialAuthoritySlashes needed to get this to pass contradicts what is in the spec,
>     // but it is included in the web platform tests.
>@@ -590,12 +619,42 @@ TEST_F(URLParserTest, ParserDifferences)
>     checkURLDifferences(utf16String(u"http://host/path#ð©\tð©"),
>         {"http", "", "", "host", 0, "/path", "", utf16String(u"ð©ð©"), utf16String(u"http://host/path#ð©ð©")},
>         {"http", "", "", "host", 0, "/path", "", "%F0%9F%92%A9%F0%9F%92%A9", "http://host/path#%F0%9F%92%A9%F0%9F%92%A9"});
>+    checkURLDifferences("http://%48OsT",
>+        {"http", "", "", "host", 0, "/", "", "", "http://host/"},
>+        {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
>+    checkURLDifferences("http://h%4FsT",
>+        {"http", "", "", "host", 0, "/", "", "", "http://host/"},
>+        {"http", "", "", "h%4fst", 0, "/", "", "", "http://h%4fst/"});
>+    checkURLDifferences("http://h%4fsT",
>+        {"http", "", "", "host", 0, "/", "", "", "http://host/"},
>+        {"http", "", "", "h%4fst", 0, "/", "", "", "http://h%4fst/"});
>+    checkURLDifferences("http://h%6fsT",
>+        {"http", "", "", "host", 0, "/", "", "", "http://host/"},
>+        {"http", "", "", "h%6fst", 0, "/", "", "", "http://h%6fst/"});
>+    checkURLDifferences("http://host/`",
>+        {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},
>+        {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});
>+    checkURLDifferences("aA://",
>+        {"aa", "", "", "", 0, "/", "", "", "aa:///"},
>+        {"aa", "", "", "", 0, "//", "", "", "aa://"});
>+    checkURLDifferences("A://",
>+        {"a", "", "", "", 0, "/", "", "", "a:///"},
>+        {"a", "", "", "", 0, "//", "", "", "a://"});
> }
> 
> TEST_F(URLParserTest, DefaultPort)
> {
>     checkURL("FtP://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>     checkURL("ftp://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>+    checkURL("f\ttp://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>+    checkURL("f\ttp://host\t:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>+    checkURL("f\ttp://host:\t21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>+    checkURL("f\ttp://host:2\t1/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>+    checkURL("f\ttp://host:21\t/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>+    checkURL("ftp://host\t:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>+    checkURL("ftp://host:\t21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>+    checkURL("ftp://host:2\t1/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>+    checkURL("ftp://host:21\t/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
>     checkURL("ftp://host:22/", {"ftp", "", "", "host", 22, "/", "", "", "ftp://host:22/"});
>     checkURLDifferences("ftp://host:21",
>         {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"},
>@@ -670,12 +729,6 @@ TEST_F(URLParserTest, DefaultPort)
>     checkURLDifferences("unknown://host:81",
>         {"unknown", "", "", "host", 81, "/", "", "", "unknown://host:81/"},
>         {"unknown", "", "", "host", 81, "", "", "", "unknown://host:81"});
>-    checkURLDifferences("http://%48OsT",
>-        {"http", "", "", "host", 0, "/", "", "", "http://host/"},
>-        {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
>-    checkURLDifferences("http://host/`",
>-        {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},
>-        {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});
> }
>     
> static void shouldFail(const String& urlString)
>@@ -694,6 +747,8 @@ TEST_F(URLParserTest, ParserFailures)
>     shouldFail("  \a  ");
>     shouldFail("");
>     shouldFail(String());
>+    shouldFail("", "about:blank");
>+    shouldFail(String(), "about:blank");
>     shouldFail("http://127.0.0.1:abc");
>     shouldFail("http://host:abc");
>     shouldFail("http://a:@", "about:blank");
>@@ -723,6 +778,7 @@ TEST_F(URLParserTest, ParserFailures)
>     shouldFail("://:0/");
>     shouldFail("://:0/", "");
>     shouldFail("://:0/", "about:blank");
>+    shouldFail("about~");
> }
> 
> // These are in the spec but not in the web platform tests.

Flags:

ggaren: review+

Actions: View | Formatted Diff | Diff

Attachments on bug 162593: 289917 | 289920 | 289952