Source/WebCore/ChangeLog

 12016-09-26 Alex Christensen <achristensen@webkit.org>
 2
 3 Implement URLParser::syntaxViolation
 4 https://bugs.webkit.org/show_bug.cgi?id=162593
 5
 6 Reviewed by NOBODY (OOPS!).
 7
 8 Most of the time when parsing URLs, we just look at the URL, find offsets of the host, path, query, etc.,
 9 and the String can be used untouched. When this happens, we do not want to allocate and copy the String.
 10 We want to just add a reference to an existing String.
 11
 12 Sometimes we need to canonicalize the String because there has been a syntaxViolation,
 13 defined as any String that is different than its canonicalized URL String. In such cases we need to
 14 allocate a new String and fill it with the canonicalized URL String. When a syntaxViolation happens for the
 15 first time, we assume that everything in the input String up to that point is equal to what it would have been
 16 if we had canonicalized the beginning of the URL, copy it into a buffer, and continue parsing in a mode where
 17 instead of just looking at the input URL String, we canonicalize each code point into the buffer.
 18
 19 Changes to behavior involve additional spec compliance with tabs and newlines in different places in URLs,
 20 as well as additional spec compliance when parsing empty and null URLs relative to other URLs.
 21 Both are covered by new API tests. Existing behavior covered by existing API tests.
 22
 23 This is about a 15% speed improvement on my URL parsing benchmark.
 24
 25 * platform/URL.cpp:
 26 (WebCore::assertProtocolIsGood):
 27 (WebCore::URL::protocolIs):
 28 (WebCore::protocolIs):
 29 * platform/URL.h:
 30 * platform/URLParser.cpp:
 31 (WebCore::isTabOrNewline):
 32 (WebCore::URLParser::incrementIteratorSkippingTabsAndNewlines):
 33 (WebCore::URLParser::isWindowsDriveLetter):
 34 (WebCore::URLParser::appendToASCIIBuffer):
 35 (WebCore::URLParser::checkWindowsDriveLetter):
 36 (WebCore::URLParser::shouldCopyFileURL):
 37 (WebCore::URLParser::utf8PercentEncode):
 38 (WebCore::URLParser::utf8QueryEncode):
 39 (WebCore::URLParser::copyURLPartsUntil):
 40 (WebCore::URLParser::syntaxViolation):
 41 (WebCore::URLParser::fragmentSyntaxViolation):
 42 (WebCore::URLParser::parsedDataView):
 43 (WebCore::URLParser::currentPosition):
 44 (WebCore::URLParser::URLParser):
 45 (WebCore::URLParser::parse):
 46 (WebCore::URLParser::parseAuthority):
 47 (WebCore::URLParser::parseIPv4Number):
 48 (WebCore::URLParser::parseIPv4Host):
 49 (WebCore::URLParser::parseIPv6Host):
 50 (WebCore::URLParser::parsePort):
 51 (WebCore::URLParser::parseHostAndPort):
 52 (WebCore::serializeURLEncodedForm):
 53 (WebCore::URLParser::allValuesEqual):
 54 (WebCore::URLParser::internalValuesConsistent):
 55 (WebCore::URLParser::incrementIteratorSkippingTabAndNewLine): Deleted.
 56 (WebCore::URLParser::syntaxError): Deleted.
 57 (WebCore::parseIPv4Number): Deleted.
 58 * platform/URLParser.h:
 59 (WebCore::URLParser::incrementIteratorSkippingTabsAndNewlines):
 60
1612016-09-26 Wenson Hsieh <wenson_hsieh@apple.com>
262
363 If you play a youtube video from now playing after it finished in Safari, controls disappear
206417

Source/WebCore/platform/URL.cpp

@@String URL::fileSystemPath() const
788788
789789#ifdef NDEBUG
790790
791 static inline void assertProtocolIsGood(const char*, size_t)
 791static inline void assertProtocolIsGood(StringView)
792792{
793793}
794794
795795#else
796796
797 static void assertProtocolIsGood(const char* protocol, size_t length)
 797static void assertProtocolIsGood(StringView protocol)
798798{
799  for (size_t i = 0; i < length; ++i) {
 799 for (size_t i = 0; i < protocol.length(); ++i) {
800800 const char c = protocol[i];
801801 ASSERT(c > ' ' && c < 0x7F && !(c >= 'A' && c <= 'Z'));
802802 }

@@static void assertProtocolIsGood(const c
806806
807807bool URL::protocolIs(const char* protocol) const
808808{
809  assertProtocolIsGood(protocol, strlen(protocol));
 809 assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
810810
811811 // JavaScript URLs are "valid" and should be executed even if URL decides they are invalid.
812812 // The free function protocolIsJavaScript() should be used instead.

@@bool URL::protocolIs(const char* protoco
823823 return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument.
824824}
825825
826 bool URL::protocolIs(const LChar* protocol, size_t length) const
 826bool URL::protocolIs(StringView protocol) const
827827{
828  assertProtocolIsGood(reinterpret_cast<const char*>(protocol), length);
 828 assertProtocolIsGood(protocol);
829829
830830 if (!m_isValid)
831831 return false;
832832
833  if (m_schemeEnd != length)
 833 if (m_schemeEnd != protocol.length())
834834 return false;
835835
836836 // Do the comparison without making a new string object.

@@String encodeWithURLEscapeSequences(cons
19141914
19151915static bool protocolIs(StringView stringURL, const char* protocol)
19161916{
1917  assertProtocolIsGood(protocol, strlen(protocol));
 1917 assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
19181918 unsigned length = stringURL.length();
19191919 for (unsigned i = 0; i < length; ++i) {
19201920 if (!protocol[i])

@@void URL::copyToBuffer(Vector<char, 512>
21412141bool protocolIs(const String& url, const char* protocol)
21422142{
21432143 // Do the comparison without making a new string object.
2144  assertProtocolIsGood(protocol, strlen(protocol));
 2144 assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
21452145 bool isLeading = true;
21462146 for (unsigned i = 0, j = 0; url[i]; ++i) {
21472147 // skip leading whitespace and control characters.
206412

Source/WebCore/platform/URL.h

@@public:
129129 // Returns true if the current URL's protocol is the same as the null-
130130 // terminated ASCII argument. The argument must be lower-case.
131131 WEBCORE_EXPORT bool protocolIs(const char*) const;
132  bool protocolIs(const LChar*, size_t) const;
 132 bool protocolIs(StringView) const;
133133 bool protocolIsBlob() const { return protocolIs("blob"); }
134134 bool protocolIsData() const { return protocolIs("data"); }
135135 bool protocolIsInHTTPFamily() const;
206412

Source/WebCore/platform/URLParser.cpp

@@template<typename CharacterType> inline
410410static bool shouldPercentEncodeQueryByte(uint8_t byte) { return characterClassTable[byte] & QueryPercent; }
411411
412412template<typename CharacterType>
413 void URLParser::incrementIteratorSkippingTabAndNewLine(CodePointIterator<CharacterType>& iterator)
 413void URLParser::incrementIteratorSkippingTabsAndNewlines(CodePointIterator<CharacterType>& iterator, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition)
414414{
415415 ++iterator;
416  while (!iterator.atEnd() && isTabOrNewline(*iterator)) {
417  syntaxError(iterator);
 416 while (UNLIKELY(!iterator.atEnd() && isTabOrNewline(*iterator))) {
 417 syntaxViolation(iteratorForSyntaxViolationPosition);
418418 ++iterator;
419419 }
420420}

@@bool URLParser::isWindowsDriveLetter(Cod
424424{
425425 if (iterator.atEnd() || !isASCIIAlpha(*iterator))
426426 return false;
427  incrementIteratorSkippingTabAndNewLine(iterator);
 427 incrementIteratorSkippingTabsAndNewlines(iterator);
428428 if (iterator.atEnd())
429429 return false;
430430 if (*iterator == ':')
431431 return true;
432  if (*iterator == '|') {
433  syntaxError(iterator);
 432 if (UNLIKELY(*iterator == '|')) {
 433 syntaxViolation(iterator);
434434 return true;
435435 }
436436 return false;

@@void URLParser::appendToASCIIBuffer(UCha
447447{
448448 ASSERT(m_unicodeFragmentBuffer.isEmpty());
449449 ASSERT(isASCII(codePoint));
450  if (m_seenSyntaxError)
 450 if (UNLIKELY(m_seenSyntaxViolation))
451451 m_asciiBuffer.append(codePoint);
452452}
453453
454454void URLParser::appendToASCIIBuffer(const char* characters, size_t length)
455455{
456456 ASSERT(m_unicodeFragmentBuffer.isEmpty());
457  if (m_seenSyntaxError)
 457 if (UNLIKELY(m_seenSyntaxViolation))
458458 m_asciiBuffer.append(characters, length);
459459}
460460

@@void URLParser::checkWindowsDriveLetter(
463463{
464464 if (isWindowsDriveLetter(iterator)) {
465465 appendToASCIIBuffer(*iterator);
466  incrementIteratorSkippingTabAndNewLine(iterator);
 466 incrementIteratorSkippingTabsAndNewlines(iterator);
467467 ASSERT(!iterator.atEnd());
468468 ASSERT(*iterator == ':' || *iterator == '|');
469469 appendToASCIIBuffer(':');
470  incrementIteratorSkippingTabAndNewLine(iterator);
 470 incrementIteratorSkippingTabsAndNewlines(iterator);
471471 }
472472}
473473

@@bool URLParser::shouldCopyFileURL(CodePo
478478 return true;
479479 if (iterator.atEnd())
480480 return false;
481  incrementIteratorSkippingTabAndNewLine(iterator);
 481 incrementIteratorSkippingTabsAndNewlines(iterator);
482482 if (iterator.atEnd())
483483 return true;
484  incrementIteratorSkippingTabAndNewLine(iterator);
 484 incrementIteratorSkippingTabsAndNewlines(iterator);
485485 if (iterator.atEnd())
486486 return true;
487487 return !isSlashQuestionOrHash(*iterator);

@@void URLParser::percentEncodeByte(uint8_
504504const char replacementCharacterUTF8PercentEncoded[10] = "%EF%BF%BD";
505505const size_t replacementCharacterUTF8PercentEncodedLength = sizeof(replacementCharacterUTF8PercentEncoded) - 1;
506506
507 template<bool(*isInCodeSet)(UChar32)>
508 void URLParser::utf8PercentEncode(UChar32 codePoint)
 507template<bool(*isInCodeSet)(UChar32), typename CharacterType>
 508void URLParser::utf8PercentEncode(const CodePointIterator<CharacterType>& iterator)
509509{
510  if (isASCII(codePoint)) {
511  if (isInCodeSet(codePoint))
 510 ASSERT(!iterator.atEnd());
 511 UChar32 codePoint = *iterator;
 512 if (LIKELY(isASCII(codePoint))) {
 513 if (UNLIKELY(isInCodeSet(codePoint))) {
 514 syntaxViolation(iterator);
512515 percentEncodeByte(codePoint);
513  else
 516 } else
514517 appendToASCIIBuffer(codePoint);
515518 return;
516519 }
517520 ASSERT_WITH_MESSAGE(isInCodeSet(codePoint), "isInCodeSet should always return true for non-ASCII characters");
 521 syntaxViolation(iterator);
518522
519523 if (!U_IS_UNICODE_CHAR(codePoint)) {
520524 appendToASCIIBuffer(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);

@@void URLParser::utf8PercentEncode(UChar3
528532 percentEncodeByte(buffer[i]);
529533}
530534
531 
532 void URLParser::utf8QueryEncode(UChar32 codePoint)
 535template<typename CharacterType>
 536void URLParser::utf8QueryEncode(const CodePointIterator<CharacterType>& iterator)
533537{
534  if (isASCII(codePoint)) {
535  if (shouldPercentEncodeQueryByte(codePoint))
 538 ASSERT(!iterator.atEnd());
 539 UChar32 codePoint = *iterator;
 540 if (LIKELY(isASCII(codePoint))) {
 541 if (UNLIKELY(shouldPercentEncodeQueryByte(codePoint))) {
 542 syntaxViolation(iterator);
536543 percentEncodeByte(codePoint);
537  else
 544 } else
538545 appendToASCIIBuffer(codePoint);
539546 return;
540547 }
541548
 549 syntaxViolation(iterator);
 550
542551 if (!U_IS_UNICODE_CHAR(codePoint)) {
543552 appendToASCIIBuffer(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
544553 return;

@@void URLParser::copyASCIIStringUntil(con
748757 }
749758}
750759
751 void URLParser::copyURLPartsUntil(const URL& base, URLPart part)
 760template<typename CharacterType>
 761void URLParser::copyURLPartsUntil(const URL& base, URLPart part, const CodePointIterator<CharacterType>& iterator)
752762{
 763 syntaxViolation(iterator);
 764
753765 m_asciiBuffer.clear();
754766 m_unicodeFragmentBuffer.clear();
755767 if (part == URLPart::FragmentEnd) {

@@void URLParser::popPath()
933945}
934946
935947template<typename CharacterType>
936 void URLParser::syntaxError(const CodePointIterator<CharacterType>&)
 948void URLParser::syntaxViolation(const CodePointIterator<CharacterType>& iterator)
 949{
 950 if (m_seenSyntaxViolation)
 951 return;
 952 m_seenSyntaxViolation = true;
 953
 954 ASSERT(m_asciiBuffer.isEmpty());
 955 ASSERT(m_unicodeFragmentBuffer.isEmpty());
 956 ASSERT_WITH_MESSAGE(!m_url.m_queryEnd, "syntaxViolation should not be used in the fragment, which might contain non-ASCII code points when serialized");
 957 size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
 958 RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());
 959 m_asciiBuffer.reserveCapacity(m_inputString.length());
 960 for (size_t i = 0; i < codeUnitsToCopy; ++i) {
 961 ASSERT(isASCII(m_inputString[i]));
 962 m_asciiBuffer.uncheckedAppend(m_inputString[i]);
 963 }
 964}
 965
 966template<typename CharacterType>
 967void URLParser::fragmentSyntaxViolation(const CodePointIterator<CharacterType>& iterator)
937968{
938  // FIXME: Implement.
 969 if (m_seenSyntaxViolation)
 970 return;
 971 m_seenSyntaxViolation = true;
 972
 973 ASSERT(m_asciiBuffer.isEmpty());
 974 ASSERT(m_unicodeFragmentBuffer.isEmpty());
 975 size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
 976 size_t asciiCodeUnitsToCopy = m_url.m_queryEnd;
 977 size_t unicodeCodeUnitsToCopy = codeUnitsToCopy - asciiCodeUnitsToCopy;
 978 RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());
 979 m_asciiBuffer.reserveCapacity(asciiCodeUnitsToCopy);
 980 for (size_t i = 0; i < asciiCodeUnitsToCopy; ++i) {
 981 ASSERT(isASCII(m_inputString[i]));
 982 m_asciiBuffer.uncheckedAppend(m_inputString[i]);
 983 }
 984 m_unicodeFragmentBuffer.reserveCapacity(m_inputString.length() - asciiCodeUnitsToCopy);
 985 for (size_t i = 0; i < unicodeCodeUnitsToCopy; ++i)
 986 m_unicodeFragmentBuffer.uncheckedAppend(m_inputString[i + asciiCodeUnitsToCopy]);
939987}
940988
941989void URLParser::failure()

@@void URLParser::failure()
944992 m_url.m_string = m_inputString;
945993}
946994
 995StringView URLParser::parsedDataView(size_t start, size_t length)
 996{
 997 if (UNLIKELY(m_seenSyntaxViolation)) {
 998 ASSERT(start + length <= m_asciiBuffer.size());
 999 return StringView(m_asciiBuffer.data() + start, length);
 1000 }
 1001 ASSERT(start + length <= m_inputString.length());
 1002 return StringView(m_inputString).substring(start, length);
 1003}
 1004
9471005template<typename CharacterType>
9481006size_t URLParser::currentPosition(const CodePointIterator<CharacterType>& iterator)
9491007{
950  if (m_seenSyntaxError)
 1008 if (UNLIKELY(m_seenSyntaxViolation)) {
 1009 ASSERT(m_unicodeFragmentBuffer.isEmpty());
9511010 return m_asciiBuffer.size();
 1011 }
9521012
9531013 return iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
9541014}

@@size_t URLParser::currentPosition(const
9561016URLParser::URLParser(const String& input, const URL& base, const TextEncoding& encoding)
9571017 : m_inputString(input)
9581018{
959  if (input.isNull())
 1019 if (input.isNull()) {
 1020 if (base.isValid() && !base.m_cannotBeABaseURL)
 1021 m_url = base;
9601022 return;
 1023 }
9611024
9621025 if (input.is8Bit()) {
9631026 m_inputBegin = input.characters8();

@@URLParser::URLParser(const String& input
9661029 m_inputBegin = input.characters16();
9671030 parse(input.characters16(), input.length(), base, encoding);
9681031 }
 1032 ASSERT(!m_url.m_isValid
 1033 || m_seenSyntaxViolation == (m_url.string() != input)
 1034 || (input.isEmpty() && m_url.m_string == base.m_string));
9691035}
9701036
9711037template<typename CharacterType>

@@void URLParser::parse(const CharacterTyp
9751041 m_url = { };
9761042 ASSERT(m_asciiBuffer.isEmpty());
9771043 ASSERT(m_unicodeFragmentBuffer.isEmpty());
978  m_asciiBuffer.reserveInitialCapacity(length);
9791044
9801045 bool isUTF8Encoding = encoding == UTF8Encoding();
9811046 Vector<UChar> queryBuffer;
9821047
9831048 unsigned endIndex = length;
984  while (endIndex && isC0ControlOrSpace(input[endIndex - 1]))
 1049 while (UNLIKELY(endIndex && isC0ControlOrSpace(input[endIndex - 1]))) {
 1050 syntaxViolation(CodePointIterator<CharacterType>(input, input));
9851051 endIndex--;
 1052 }
9861053 CodePointIterator<CharacterType> c(input, input + endIndex);
9871054 CodePointIterator<CharacterType> authorityOrHostBegin;
988  while (!c.atEnd() && isC0ControlOrSpace(*c))
 1055 while (UNLIKELY(!c.atEnd() && isC0ControlOrSpace(*c))) {
 1056 syntaxViolation(c);
9891057 ++c;
 1058 }
9901059 auto beginAfterControlAndSpace = c;
9911060
9921061 enum class State : uint8_t {

@@void URLParser::parse(const CharacterTyp
10111080 Fragment,
10121081 };
10131082
1014 #define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, asciiBuffer size %zu", x, *c, currentPosition(c))
 1083#define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, parsed data <%s> size %zu", x, *c, parsedDataView(0, currentPosition(c)).utf8().data(), currentPosition(c))
10151084#define LOG_FINAL_STATE(x) LOG(URLParser, "Final State: %s", x)
10161085
10171086 State state = State::SchemeStart;
10181087 while (!c.atEnd()) {
1019  if (isTabOrNewline(*c)) {
1020  syntaxError(c);
 1088 if (UNLIKELY(isTabOrNewline(*c))) {
 1089 syntaxViolation(c);
10211090 ++c;
10221091 continue;
10231092 }

@@void URLParser::parse(const CharacterTyp
10261095 case State::SchemeStart:
10271096 LOG_STATE("SchemeStart");
10281097 if (isASCIIAlpha(*c)) {
 1098 if (UNLIKELY(isASCIIUpper(*c)))
 1099 syntaxViolation(c);
10291100 appendToASCIIBuffer(toASCIILower(*c));
1030  incrementIteratorSkippingTabAndNewLine(c);
 1101 incrementIteratorSkippingTabsAndNewlines(c);
10311102 if (c.atEnd()) {
10321103 m_asciiBuffer.clear();
10331104 state = State::NoScheme;

@@void URLParser::parse(const CharacterTyp
10391110 break;
10401111 case State::Scheme:
10411112 LOG_STATE("Scheme");
1042  if (isValidSchemeCharacter(*c))
 1113 if (isValidSchemeCharacter(*c)) {
 1114 if (UNLIKELY(isASCIIUpper(*c)))
 1115 syntaxViolation(c);
10431116 appendToASCIIBuffer(toASCIILower(*c));
1044  else if (*c == ':') {
 1117 } else if (*c == ':') {
10451118 m_url.m_schemeEnd = currentPosition(c);
1046  StringView urlScheme = StringView(m_asciiBuffer.data(), m_url.m_schemeEnd);
 1119 StringView urlScheme = parsedDataView(0, m_url.m_schemeEnd);
10471120 m_url.m_protocolIsInHTTPFamily = urlScheme == "http" || urlScheme == "https";
 1121 appendToASCIIBuffer(':');
10481122 if (urlScheme == "file") {
10491123 m_urlIsSpecial = true;
10501124 state = State::File;
1051  appendToASCIIBuffer(':');
10521125 ++c;
10531126 break;
10541127 }
1055  appendToASCIIBuffer(':');
10561128 if (isSpecialScheme(urlScheme)) {
10571129 m_urlIsSpecial = true;
1058  if (base.protocolIs(m_asciiBuffer.data(), currentPosition(c) - 1))
 1130 if (base.protocolIs(urlScheme))
10591131 state = State::SpecialRelativeOrAuthority;
10601132 else
10611133 state = State::SpecialAuthoritySlashes;
 1134 ++c;
10621135 } else {
10631136 auto maybeSlash = c;
1064  incrementIteratorSkippingTabAndNewLine(maybeSlash);
 1137 incrementIteratorSkippingTabsAndNewlines(maybeSlash);
10651138 if (!maybeSlash.atEnd() && *maybeSlash == '/') {
10661139 appendToASCIIBuffer('/');
1067  m_url.m_userStart = currentPosition(c);
1068  state = State::PathOrAuthority;
10691140 c = maybeSlash;
 1141 state = State::PathOrAuthority;
10701142 ASSERT(*c == '/');
 1143 ++c;
 1144 m_url.m_userStart = currentPosition(c);
10711145 } else {
 1146 ++c;
10721147 m_url.m_userStart = currentPosition(c);
10731148 m_url.m_userEnd = m_url.m_userStart;
10741149 m_url.m_passwordEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp
10791154 state = State::CannotBeABaseURLPath;
10801155 }
10811156 }
1082  ++c;
10831157 break;
10841158 } else {
10851159 m_asciiBuffer.clear();

@@void URLParser::parse(const CharacterTyp
10871161 c = beginAfterControlAndSpace;
10881162 break;
10891163 }
1090  incrementIteratorSkippingTabAndNewLine(c);
 1164 incrementIteratorSkippingTabsAndNewlines(c);
10911165 if (c.atEnd()) {
10921166 m_asciiBuffer.clear();
10931167 state = State::NoScheme;

@@void URLParser::parse(const CharacterTyp
11011175 return;
11021176 }
11031177 if (base.m_cannotBeABaseURL && *c == '#') {
1104  copyURLPartsUntil(base, URLPart::QueryEnd);
 1178 copyURLPartsUntil(base, URLPart::QueryEnd, c);
11051179 state = State::Fragment;
11061180 appendToASCIIBuffer('#');
11071181 ++c;

@@void URLParser::parse(const CharacterTyp
11111185 state = State::Relative;
11121186 break;
11131187 }
1114  copyURLPartsUntil(base, URLPart::SchemeEnd);
 1188 copyURLPartsUntil(base, URLPart::SchemeEnd, c);
11151189 appendToASCIIBuffer(':');
11161190 state = State::File;
11171191 break;

@@void URLParser::parse(const CharacterTyp
11191193 LOG_STATE("SpecialRelativeOrAuthority");
11201194 if (*c == '/') {
11211195 appendToASCIIBuffer('/');
1122  incrementIteratorSkippingTabAndNewLine(c);
 1196 incrementIteratorSkippingTabsAndNewlines(c);
11231197 if (c.atEnd()) {
11241198 failure();
11251199 return;

@@void URLParser::parse(const CharacterTyp
11371211 LOG_STATE("PathOrAuthority");
11381212 if (*c == '/') {
11391213 appendToASCIIBuffer('/');
1140  m_url.m_userStart = currentPosition(c);
11411214 state = State::AuthorityOrHost;
11421215 ++c;
 1216 m_url.m_userStart = currentPosition(c);
11431217 authorityOrHostBegin = c;
11441218 } else {
1145  ASSERT(m_asciiBuffer.last() == '/');
 1219 ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");
11461220 m_url.m_userStart = currentPosition(c) - 1;
11471221 m_url.m_userEnd = m_url.m_userStart;
11481222 m_url.m_passwordEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp
11611235 ++c;
11621236 break;
11631237 case '?':
1164  copyURLPartsUntil(base, URLPart::PathEnd);
 1238 copyURLPartsUntil(base, URLPart::PathEnd, c);
11651239 appendToASCIIBuffer('?');
11661240 state = State::Query;
11671241 ++c;
11681242 break;
11691243 case '#':
1170  copyURLPartsUntil(base, URLPart::QueryEnd);
 1244 copyURLPartsUntil(base, URLPart::QueryEnd, c);
11711245 appendToASCIIBuffer('#');
11721246 state = State::Fragment;
11731247 ++c;
11741248 break;
11751249 default:
1176  copyURLPartsUntil(base, URLPart::PathAfterLastSlash);
 1250 copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c);
11771251 state = State::Path;
11781252 break;
11791253 }

@@void URLParser::parse(const CharacterTyp
11821256 LOG_STATE("RelativeSlash");
11831257 if (*c == '/' || *c == '\\') {
11841258 ++c;
1185  copyURLPartsUntil(base, URLPart::SchemeEnd);
 1259 copyURLPartsUntil(base, URLPart::SchemeEnd, c);
11861260 appendToASCIIBuffer("://", 3);
11871261 state = State::SpecialAuthorityIgnoreSlashes;
11881262 } else {
1189  copyURLPartsUntil(base, URLPart::PortEnd);
 1263 copyURLPartsUntil(base, URLPart::PortEnd, c);
11901264 appendToASCIIBuffer('/');
11911265 m_url.m_pathAfterLastSlash = base.m_portEnd + 1;
11921266 state = State::Path;

@@void URLParser::parse(const CharacterTyp
11941268 break;
11951269 case State::SpecialAuthoritySlashes:
11961270 LOG_STATE("SpecialAuthoritySlashes");
1197  appendToASCIIBuffer("//", 2);
1198  if (*c == '/' || *c == '\\') {
1199  incrementIteratorSkippingTabAndNewLine(c);
1200  if (!c.atEnd() && (*c == '/' || *c == '\\'))
 1271 if (LIKELY(*c == '/' || *c == '\\')) {
 1272 if (UNLIKELY(*c == '\\'))
 1273 syntaxViolation(c);
 1274 appendToASCIIBuffer('/');
 1275 incrementIteratorSkippingTabsAndNewlines(c);
 1276 if (LIKELY(!c.atEnd() && (*c == '/' || *c == '\\'))) {
 1277 if (UNLIKELY(*c == '\\'))
 1278 syntaxViolation(c);
12011279 ++c;
 1280 appendToASCIIBuffer('/');
 1281 } else {
 1282 syntaxViolation(c);
 1283 appendToASCIIBuffer('/');
 1284 }
 1285 } else {
 1286 syntaxViolation(c);
 1287 appendToASCIIBuffer("//", 2);
12021288 }
12031289 state = State::SpecialAuthorityIgnoreSlashes;
12041290 break;

@@void URLParser::parse(const CharacterTyp
12131299 authorityOrHostBegin = c;
12141300 break;
12151301 case State::AuthorityOrHost:
1216  LOG_STATE("AuthorityOrHost");
 1302 CaseAuthorityOrHost:
12171303 {
 1304 LOG_STATE("AuthorityOrHost");
12181305 if (*c == '@') {
12191306 auto lastAt = c;
12201307 auto findLastAt = c;

@@void URLParser::parse(const CharacterTyp
12251312 }
12261313 parseAuthority(CodePointIterator<CharacterType>(authorityOrHostBegin, lastAt));
12271314 c = lastAt;
1228  incrementIteratorSkippingTabAndNewLine(c);
 1315 incrementIteratorSkippingTabsAndNewlines(c);
12291316 authorityOrHostBegin = c;
12301317 state = State::Host;
12311318 m_hostHasPercentOrNonASCII = false;

@@void URLParser::parse(const CharacterTyp
12331320 }
12341321 bool isSlash = *c == '/' || (m_urlIsSpecial && *c == '\\');
12351322 if (isSlash || *c == '?' || *c == '#') {
1236  m_url.m_userEnd = currentPosition(c);
 1323 m_url.m_userEnd = currentPosition(authorityOrHostBegin);
12371324 m_url.m_passwordEnd = m_url.m_userEnd;
12381325 if (!parseHostAndPort(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) {
12391326 failure();
12401327 return;
12411328 }
1242  if (!isSlash) {
 1329 if (UNLIKELY(!isSlash)) {
 1330 syntaxViolation(c);
12431331 appendToASCIIBuffer('/');
12441332 m_url.m_pathAfterLastSlash = currentPosition(c);
12451333 }

@@void URLParser::parse(const CharacterTyp
12491337 if (isPercentOrNonASCII(*c))
12501338 m_hostHasPercentOrNonASCII = true;
12511339 ++c;
 1340 if (c.atEnd())
 1341 break;
12521342 }
1253  break;
 1343 // Skip the check for tabs which might cause a syntaxViolation.
 1344 // We want to handle the syntaxViolations while actually parsing the authority or host.
 1345 goto CaseAuthorityOrHost;
12541346 case State::Host:
12551347 LOG_STATE("Host");
12561348 if (*c == '/' || *c == '?' || *c == '#') {

@@void URLParser::parse(const CharacterTyp
12681360 case State::File:
12691361 LOG_STATE("File");
12701362 switch (*c) {
1271  case '/':
12721363 case '\\':
 1364 syntaxViolation(c);
 1365 FALLTHROUGH;
 1366 case '/':
12731367 appendToASCIIBuffer('/');
12741368 state = State::FileSlash;
12751369 ++c;
12761370 break;
12771371 case '?':
 1372 syntaxViolation(c);
12781373 if (base.isValid() && base.protocolIs("file"))
1279  copyURLPartsUntil(base, URLPart::PathEnd);
 1374 copyURLPartsUntil(base, URLPart::PathEnd, c);
12801375 appendToASCIIBuffer("///?", 4);
12811376 m_url.m_userStart = currentPosition(c) - 2;
12821377 m_url.m_userEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp
12891384 ++c;
12901385 break;
12911386 case '#':
 1387 syntaxViolation(c);
12921388 if (base.isValid() && base.protocolIs("file"))
1293  copyURLPartsUntil(base, URLPart::QueryEnd);
 1389 copyURLPartsUntil(base, URLPart::QueryEnd, c);
12941390 appendToASCIIBuffer("///#", 4);
12951391 m_url.m_userStart = currentPosition(c) - 2;
12961392 m_url.m_userEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp
13041400 ++c;
13051401 break;
13061402 default:
 1403 syntaxViolation(c);
13071404 if (base.isValid() && base.protocolIs("file") && shouldCopyFileURL(c))
1308  copyURLPartsUntil(base, URLPart::PathAfterLastSlash);
 1405 copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c);
13091406 else {
13101407 appendToASCIIBuffer("///", 3);
13111408 m_url.m_userStart = currentPosition(c) - 1;

@@void URLParser::parse(const CharacterTyp
13221419 break;
13231420 case State::FileSlash:
13241421 LOG_STATE("FileSlash");
1325  if (*c == '/' || *c == '\\') {
 1422 if (LIKELY(*c == '/' || *c == '\\')) {
 1423 if (UNLIKELY(*c == '\\'))
 1424 syntaxViolation(c);
13261425 ++c;
13271426 appendToASCIIBuffer('/');
13281427 m_url.m_userStart = currentPosition(c);

@@void URLParser::parse(const CharacterTyp
13471446 }
13481447 }
13491448 }
 1449 syntaxViolation(c);
13501450 appendToASCIIBuffer("//", 2);
13511451 m_url.m_userStart = currentPosition(c) - 1;
13521452 m_url.m_userEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp
13651465 break;
13661466 }
13671467 if (authorityOrHostBegin == c) {
1368  ASSERT(m_asciiBuffer[currentPosition(c) - 1] == '/');
1369  if (*c == '?') {
 1468 ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");
 1469 if (UNLIKELY(*c == '?')) {
 1470 syntaxViolation(c);
13701471 appendToASCIIBuffer("/?", 2);
 1472 ++c;
13711473 m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
13721474 m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
13731475 state = State::Query;
1374  ++c;
13751476 break;
13761477 }
1377  if (*c == '#') {
 1478 if (UNLIKELY(*c == '#')) {
 1479 syntaxViolation(c);
13781480 appendToASCIIBuffer("/#", 2);
 1481 ++c;
13791482 m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
13801483 m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
13811484 m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
13821485 state = State::Fragment;
1383  ++c;
13841486 break;
13851487 }
13861488 state = State::Path;

@@void URLParser::parse(const CharacterTyp
13901492 failure();
13911493 return;
13921494 }
1393 
1394  if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd) == "localhost") {
 1495 if (UNLIKELY(equalLettersIgnoringASCIICase(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd), "localhost"))) {
 1496 syntaxViolation(c);
13951497 m_asciiBuffer.shrink(m_url.m_passwordEnd);
13961498 m_url.m_hostEnd = currentPosition(c);
13971499 m_url.m_portEnd = m_url.m_hostEnd;

@@void URLParser::parse(const CharacterTyp
14131515 case State::Path:
14141516 LOG_STATE("Path");
14151517 if (*c == '/' || (m_urlIsSpecial && *c == '\\')) {
 1518 if (UNLIKELY(m_urlIsSpecial && *c == '\\'))
 1519 syntaxViolation(c);
14161520 appendToASCIIBuffer('/');
1417  m_url.m_pathAfterLastSlash = currentPosition(c);
14181521 ++c;
 1522 m_url.m_pathAfterLastSlash = currentPosition(c);
14191523 break;
14201524 }
1421  if (currentPosition(c) && m_asciiBuffer[currentPosition(c) - 1] == '/') {
1422  if (isDoubleDotPathSegment(c)) {
 1525 if (UNLIKELY(currentPosition(c) && parsedDataView(currentPosition(c) - 1, 1) == "/")) {
 1526 if (UNLIKELY(isDoubleDotPathSegment(c))) {
 1527 syntaxViolation(c);
14231528 consumeDoubleDotPathSegment(c);
14241529 popPath();
14251530 break;
14261531 }
1427  if (m_asciiBuffer[currentPosition(c) - 1] == '/' && isSingleDotPathSegment(c)) {
 1532 if (UNLIKELY(isSingleDotPathSegment(c))) {
 1533 syntaxViolation(c);
14281534 consumeSingleDotPathSegment(c);
14291535 break;
14301536 }

@@void URLParser::parse(const CharacterTyp
14411547 break;
14421548 }
14431549 if (isPercentEncodedDot(c)) {
 1550 if (UNLIKELY(*c != '.'))
 1551 syntaxViolation(c);
14441552 appendToASCIIBuffer('.');
14451553 ASSERT(*c == '%');
14461554 ++c;

@@void URLParser::parse(const CharacterTyp
14501558 ++c;
14511559 break;
14521560 }
1453  utf8PercentEncode<isInDefaultEncodeSet>(*c);
 1561 utf8PercentEncode<isInDefaultEncodeSet>(c);
14541562 ++c;
14551563 break;
14561564 case State::CannotBeABaseURLPath:

@@void URLParser::parse(const CharacterTyp
14641572 state = State::Fragment;
14651573 } else if (*c == '/') {
14661574 appendToASCIIBuffer('/');
1467  m_url.m_pathAfterLastSlash = currentPosition(c);
14681575 ++c;
 1576 m_url.m_pathAfterLastSlash = currentPosition(c);
14691577 } else {
1470  utf8PercentEncode<isInSimpleEncodeSet>(*c);
 1578 utf8PercentEncode<isInSimpleEncodeSet>(c);
14711579 ++c;
14721580 }
14731581 break;

@@void URLParser::parse(const CharacterTyp
14811589 break;
14821590 }
14831591 if (isUTF8Encoding)
1484  utf8QueryEncode(*c);
 1592 utf8QueryEncode(c);
14851593 else
14861594 appendCodePoint(queryBuffer, *c);
14871595 ++c;
14881596 break;
14891597 case State::Fragment:
1490  LOG_STATE("Fragment");
1491  if (m_unicodeFragmentBuffer.isEmpty() && isASCII(*c))
 1598 CaseFragment:
 1599 LOG(URLParser, "State Fragment");
 1600 if (!m_seenUnicodeFragmentCodePoint && isASCII(*c))
14921601 appendToASCIIBuffer(*c);
1493  else
1494  appendCodePoint(m_unicodeFragmentBuffer, *c);
 1602 else {
 1603 m_seenUnicodeFragmentCodePoint = true;
 1604 if (UNLIKELY(m_seenSyntaxViolation))
 1605 appendCodePoint(m_unicodeFragmentBuffer, *c);
 1606 else {
 1607 ASSERT(m_asciiBuffer.isEmpty());
 1608 ASSERT(m_unicodeFragmentBuffer.isEmpty());
 1609 }
 1610 }
14951611 ++c;
1496  break;
 1612 while (UNLIKELY(!c.atEnd() && isTabOrNewline(*c))) {
 1613 fragmentSyntaxViolation(c);
 1614 ++c;
 1615 }
 1616 if (c.atEnd())
 1617 break;
 1618
 1619 // Skip the check for tabs which might cause a syntaxViolation.
 1620 // We need to handle them differently with fragmentSyntaxViolation.
 1621 goto CaseFragment;
14971622 }
14981623 }
14991624
15001625 switch (state) {
15011626 case State::SchemeStart:
15021627 LOG_FINAL_STATE("SchemeStart");
1503  if (!currentPosition(c) && base.isValid()) {
 1628 if (!currentPosition(c) && base.isValid() && !base.m_cannotBeABaseURL) {
15041629 m_url = base;
15051630 return;
15061631 }

@@void URLParser::parse(const CharacterTyp
15151640 RELEASE_ASSERT_NOT_REACHED();
15161641 case State::SpecialRelativeOrAuthority:
15171642 LOG_FINAL_STATE("SpecialRelativeOrAuthority");
1518  copyURLPartsUntil(base, URLPart::QueryEnd);
 1643 copyURLPartsUntil(base, URLPart::QueryEnd, c);
15191644 m_url.m_fragmentEnd = m_url.m_queryEnd;
15201645 break;
15211646 case State::PathOrAuthority:
15221647 LOG_FINAL_STATE("PathOrAuthority");
15231648 ASSERT(m_url.m_userStart);
15241649 ASSERT(m_url.m_userStart == currentPosition(c));
1525  ASSERT(m_asciiBuffer.last() == '/');
 1650 ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");
15261651 m_url.m_userStart--;
15271652 m_url.m_userEnd = m_url.m_userStart;
15281653 m_url.m_passwordEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp
15351660 break;
15361661 case State::Relative:
15371662 LOG_FINAL_STATE("Relative");
1538  copyURLPartsUntil(base, URLPart::FragmentEnd);
 1663 copyURLPartsUntil(base, URLPart::FragmentEnd, c);
15391664 break;
15401665 case State::RelativeSlash:
15411666 LOG_FINAL_STATE("RelativeSlash");
1542  copyURLPartsUntil(base, URLPart::PortEnd);
 1667 copyURLPartsUntil(base, URLPart::PortEnd, c);
15431668 appendToASCIIBuffer('/');
15441669 m_url.m_pathAfterLastSlash = base.m_portEnd + 1;
15451670 m_url.m_pathEnd = m_url.m_pathAfterLastSlash;

@@void URLParser::parse(const CharacterTyp
15651690 break;
15661691 case State::AuthorityOrHost:
15671692 LOG_FINAL_STATE("AuthorityOrHost");
1568  m_url.m_userEnd = currentPosition(c);
 1693 m_url.m_userEnd = currentPosition(authorityOrHostBegin);
15691694 m_url.m_passwordEnd = m_url.m_userEnd;
15701695 if (authorityOrHostBegin.atEnd()) {
15711696 m_url.m_hostEnd = m_url.m_userEnd;

@@void URLParser::parse(const CharacterTyp
15741699 failure();
15751700 return;
15761701 }
 1702 syntaxViolation(c);
15771703 appendToASCIIBuffer('/');
15781704 m_url.m_pathEnd = m_url.m_portEnd + 1;
15791705 m_url.m_pathAfterLastSlash = m_url.m_pathEnd;

@@void URLParser::parse(const CharacterTyp
15861712 failure();
15871713 return;
15881714 }
 1715 syntaxViolation(c);
15891716 appendToASCIIBuffer('/');
15901717 m_url.m_pathEnd = m_url.m_portEnd + 1;
15911718 m_url.m_pathAfterLastSlash = m_url.m_pathEnd;

@@void URLParser::parse(const CharacterTyp
15951722 case State::File:
15961723 LOG_FINAL_STATE("File");
15971724 if (base.isValid() && base.protocolIs("file")) {
1598  copyURLPartsUntil(base, URLPart::QueryEnd);
 1725 copyURLPartsUntil(base, URLPart::QueryEnd, c);
15991726 appendToASCIIBuffer(':');
16001727 }
 1728 syntaxViolation(c);
16011729 appendToASCIIBuffer("///", 3);
16021730 m_url.m_userStart = currentPosition(c) - 1;
16031731 m_url.m_userEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp
16111739 break;
16121740 case State::FileSlash:
16131741 LOG_FINAL_STATE("FileSlash");
 1742 syntaxViolation(c);
 1743 m_url.m_userStart = currentPosition(c) + 1;
16141744 appendToASCIIBuffer("//", 2);
1615  m_url.m_userStart = currentPosition(c) - 1;
16161745 m_url.m_userEnd = m_url.m_userStart;
16171746 m_url.m_passwordEnd = m_url.m_userStart;
16181747 m_url.m_hostEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp
16251754 case State::FileHost:
16261755 LOG_FINAL_STATE("FileHost");
16271756 if (authorityOrHostBegin == c) {
 1757 syntaxViolation(c);
16281758 appendToASCIIBuffer('/');
16291759 m_url.m_userStart = currentPosition(c) - 1;
16301760 m_url.m_userEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp
16431773 return;
16441774 }
16451775
1646  if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd) == "localhost") {
 1776 syntaxViolation(c);
 1777 if (equalLettersIgnoringASCIICase(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd), "localhost")) {
16471778 m_asciiBuffer.shrink(m_url.m_passwordEnd);
16481779 m_url.m_hostEnd = currentPosition(c);
16491780 m_url.m_portEnd = m_url.m_hostEnd;

@@void URLParser::parse(const CharacterTyp
16771808 m_url.m_fragmentEnd = m_url.m_queryEnd;
16781809 break;
16791810 case State::Fragment:
1680  LOG_FINAL_STATE("Fragment");
1681  m_url.m_fragmentEnd = currentPosition(c) + m_unicodeFragmentBuffer.size();
1682  break;
 1811 {
 1812 LOG_FINAL_STATE("Fragment");
 1813 size_t length = m_seenSyntaxViolation ? m_asciiBuffer.size() + m_unicodeFragmentBuffer.size() : c.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
 1814 m_url.m_fragmentEnd = length;
 1815 break;
 1816 }
16831817 }
16841818
1685  if (!m_seenSyntaxError) {
 1819 if (LIKELY(!m_seenSyntaxViolation)) {
16861820 m_url.m_string = m_inputString;
16871821 ASSERT(m_asciiBuffer.isEmpty());
16881822 ASSERT(m_unicodeFragmentBuffer.isEmpty());
1689  } else if (m_unicodeFragmentBuffer.isEmpty())
 1823 } else if (!m_seenUnicodeFragmentCodePoint) {
 1824 ASSERT(m_unicodeFragmentBuffer.isEmpty());
16901825 m_url.m_string = String::adopt(WTFMove(m_asciiBuffer));
1691  else {
 1826 } else {
16921827 Vector<UChar> buffer;
1693  buffer.reserveInitialCapacity(currentPosition(c) + m_unicodeFragmentBuffer.size());
 1828 buffer.reserveInitialCapacity(m_asciiBuffer.size() + m_unicodeFragmentBuffer.size());
16941829 buffer.appendVector(m_asciiBuffer);
16951830 buffer.appendVector(m_unicodeFragmentBuffer);
16961831 m_url.m_string = String::adopt(WTFMove(buffer));

@@void URLParser::parse(const CharacterTyp
17031838template<typename CharacterType>
17041839void URLParser::parseAuthority(CodePointIterator<CharacterType> iterator)
17051840{
1706  if (iterator.atEnd()) {
 1841 if (UNLIKELY(iterator.atEnd())) {
 1842 syntaxViolation(iterator);
17071843 m_url.m_userEnd = currentPosition(iterator);
17081844 m_url.m_passwordEnd = m_url.m_userEnd;
17091845 return;
17101846 }
1711  for (; !iterator.atEnd(); ++iterator) {
 1847 auto authorityOrHostBegin = iterator;
 1848 for (; !iterator.atEnd(); incrementIteratorSkippingTabsAndNewlines(iterator)) {
17121849 if (*iterator == ':') {
1713  ++iterator;
17141850 m_url.m_userEnd = currentPosition(iterator);
1715  if (iterator.atEnd()) {
 1851 auto iteratorAtColon = iterator;
 1852 incrementIteratorSkippingTabsAndNewlines(iterator, authorityOrHostBegin);
 1853 if (UNLIKELY(iterator.atEnd())) {
 1854 syntaxViolation(iteratorAtColon);
17161855 m_url.m_passwordEnd = m_url.m_userEnd;
17171856 if (m_url.m_userEnd > m_url.m_userStart)
17181857 appendToASCIIBuffer('@');

@@void URLParser::parseAuthority(CodePoint
17211860 appendToASCIIBuffer(':');
17221861 break;
17231862 }
1724  utf8PercentEncode<isInUserInfoEncodeSet>(*iterator);
 1863 utf8PercentEncode<isInUserInfoEncodeSet>(iterator);
17251864 }
1726  for (; !iterator.atEnd(); ++iterator)
1727  utf8PercentEncode<isInUserInfoEncodeSet>(*iterator);
 1865 for (; !iterator.atEnd(); incrementIteratorSkippingTabsAndNewlines(iterator))
 1866 utf8PercentEncode<isInUserInfoEncodeSet>(iterator);
17281867 m_url.m_passwordEnd = currentPosition(iterator);
17291868 if (!m_url.m_userEnd)
17301869 m_url.m_userEnd = m_url.m_passwordEnd;

@@void URLParser::serializeIPv6(URLParser:
18241963}
18251964
18261965template<typename CharacterType>
1827 inline static Optional<uint32_t> parseIPv4Number(CodePointIterator<CharacterType>& iterator)
 1966Optional<uint32_t> URLParser::parseIPv4Number(CodePointIterator<CharacterType>& iterator, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition)
18281967{
18291968 // FIXME: Check for overflow.
18301969 enum class State : uint8_t {

@@inline static Optional<uint32_t> parseIP
18431982 }
18441983 switch (state) {
18451984 case State::UnknownBase:
1846  if (*iterator == '0') {
 1985 if (UNLIKELY(*iterator == '0')) {
18471986 ++iterator;
18481987 state = State::OctalOrHex;
18491988 break;

@@inline static Optional<uint32_t> parseIP
18511990 state = State::Decimal;
18521991 break;
18531992 case State::OctalOrHex:
 1993 syntaxViolation(iteratorForSyntaxViolationPosition);
18541994 if (*iterator == 'x' || *iterator == 'X') {
18551995 ++iterator;
18561996 state = State::Hex;

@@inline static Optional<uint32_t> parseIP
18662006 ++iterator;
18672007 break;
18682008 case State::Octal:
 2009 ASSERT(m_seenSyntaxViolation);
18692010 if (*iterator < '0' || *iterator > '7')
18702011 return Nullopt;
18712012 value *= 8;

@@inline static Optional<uint32_t> parseIP
18732014 ++iterator;
18742015 break;
18752016 case State::Hex:
 2017 ASSERT(m_seenSyntaxViolation);
18762018 if (!isASCIIHexDigit(*iterator))
18772019 return Nullopt;
18782020 value *= 16;

@@inline static uint64_t pow256(size_t exp
18942036template<typename CharacterType>
18952037Optional<URLParser::IPv4Address> URLParser::parseIPv4Host(CodePointIterator<CharacterType> iterator)
18962038{
 2039 auto hostBegin = iterator;
 2040
18972041 Vector<uint32_t, 4> items;
18982042 items.reserveInitialCapacity(4);
18992043 while (!iterator.atEnd()) {
19002044 if (items.size() >= 4)
19012045 return Nullopt;
1902  if (auto item = parseIPv4Number(iterator))
 2046 if (auto item = parseIPv4Number(iterator, hostBegin))
19032047 items.append(item.value());
19042048 else
19052049 return Nullopt;

@@Optional<URLParser::IPv4Address> URLPars
19182062 if (item > 255)
19192063 return Nullopt;
19202064 }
 2065
 2066 if (UNLIKELY(items.size() != 4))
 2067 syntaxViolation(hostBegin);
 2068
19212069 IPv4Address ipv4 = items.takeLast();
19222070 for (size_t counter = 0; counter < items.size(); ++counter)
19232071 ipv4 += items[counter] * pow256(3 - counter);

@@Optional<URLParser::IPv4Address> URLPars
19272075template<typename CharacterType>
19282076Optional<URLParser::IPv6Address> URLParser::parseIPv6Host(CodePointIterator<CharacterType> c)
19292077{
 2078 ASSERT(*c == '[');
 2079 auto hostBegin = c;
 2080 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
19302081 if (c.atEnd())
19312082 return Nullopt;
19322083

@@Optional<URLParser::IPv6Address> URLPars
19352086 Optional<size_t> compressPointer;
19362087
19372088 if (*c == ':') {
1938  ++c;
 2089 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
19392090 if (c.atEnd())
19402091 return Nullopt;
19412092 if (*c != ':')
19422093 return Nullopt;
1943  ++c;
 2094 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
19442095 ++piecePointer;
19452096 compressPointer = piecePointer;
19462097 }

@@Optional<URLParser::IPv6Address> URLPars
19512102 if (*c == ':') {
19522103 if (compressPointer)
19532104 return Nullopt;
1954  ++c;
 2105 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
19552106 ++piecePointer;
19562107 compressPointer = piecePointer;
19572108 continue;
19582109 }
19592110 uint16_t value = 0;
1960  for (size_t length = 0; length < 4; length++) {
 2111 size_t length = 0;
 2112 for (; length < 4; length++) {
19612113 if (c.atEnd())
19622114 break;
19632115 if (!isASCIIHexDigit(*c))
19642116 break;
19652117 value = value * 0x10 + toASCIIHexValue(*c);
1966  ++c;
 2118 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
19672119 }
 2120 if (UNLIKELY(length > 1 && !value))
 2121 syntaxViolation(hostBegin);
 2122
19682123 address[piecePointer++] = value;
19692124 if (c.atEnd())
19702125 break;
19712126 if (*c != ':')
19722127 return Nullopt;
1973  ++c;
 2128 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
19742129 }
19752130
19762131 if (!c.atEnd()) {

@@Optional<URLParser::IPv6Address> URLPars
19892144 return Nullopt;
19902145 else
19912146 value = value.value() * 10 + number;
1992  ++c;
 2147 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
19932148 if (c.atEnd())
19942149 return Nullopt;
19952150 if (value.value() > 255)

@@Optional<URLParser::IPv6Address> URLPars
20012156 if (dotsSeen == 1 || dotsSeen == 3)
20022157 piecePointer++;
20032158 if (!c.atEnd())
2004  ++c;
 2159 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);
20052160 if (dotsSeen == 3 && !c.atEnd())
20062161 return Nullopt;
20072162 dotsSeen++;

@@Optional<URLParser::IPv6Address> URLPars
20142169 std::swap(address[piecePointer--], address[compressPointer.value() + swaps-- - 1]);
20152170 } else if (piecePointer != 8)
20162171 return Nullopt;
 2172
 2173 Optional<size_t> possibleCompressPointer = findLongestZeroSequence(address);
 2174 if (possibleCompressPointer)
 2175 possibleCompressPointer.value()++;
 2176 if (UNLIKELY(compressPointer != possibleCompressPointer))
 2177 syntaxViolation(hostBegin);
 2178
20172179 return address;
20182180}
20192181

@@inline static bool hasInvalidDomainChara
21052267template<typename CharacterType>
21062268bool URLParser::parsePort(CodePointIterator<CharacterType>& iterator)
21072269{
 2270 ASSERT(*iterator == ':');
 2271 auto colonIterator = iterator;
 2272 incrementIteratorSkippingTabsAndNewlines(iterator, colonIterator);
21082273 uint32_t port = 0;
2109  if (iterator.atEnd()) {
2110  m_url.m_portEnd = currentPosition(iterator);
 2274 if (UNLIKELY(iterator.atEnd())) {
 2275 m_url.m_portEnd = currentPosition(colonIterator);
 2276 syntaxViolation(colonIterator);
21112277 return true;
21122278 }
2113  appendToASCIIBuffer(':');
21142279 for (; !iterator.atEnd(); ++iterator) {
2115  if (isTabOrNewline(*iterator))
 2280 if (UNLIKELY(isTabOrNewline(*iterator))) {
 2281 syntaxViolation(colonIterator);
21162282 continue;
 2283 }
21172284 if (isASCIIDigit(*iterator)) {
21182285 port = port * 10 + *iterator - '0';
21192286 if (port > std::numeric_limits<uint16_t>::max())

@@bool URLParser::parsePort(CodePointItera
21222289 return false;
21232290 }
21242291
2125  if (isDefaultPort(StringView(m_asciiBuffer.data(), m_url.m_schemeEnd), port)) {
2126  ASSERT(m_asciiBuffer.last() == ':');
2127  m_asciiBuffer.shrink(currentPosition(iterator) - 1);
2128  } else {
 2292 if (UNLIKELY(isDefaultPort(parsedDataView(0, m_url.m_schemeEnd), port)))
 2293 syntaxViolation(colonIterator);
 2294 else {
 2295 appendToASCIIBuffer(':');
21292296 ASSERT(port <= std::numeric_limits<uint16_t>::max());
21302297 appendNumberToASCIIBuffer<uint16_t>(static_cast<uint16_t>(port));
21312298 }

@@bool URLParser::parseHostAndPort(CodePoi
21402307 if (iterator.atEnd())
21412308 return false;
21422309 if (*iterator == '[') {
2143  ++iterator;
21442310 auto ipv6End = iterator;
21452311 while (!ipv6End.atEnd() && *ipv6End != ']')
21462312 ++ipv6End;
21472313 if (auto address = parseIPv6Host(CodePointIterator<CharacterType>(iterator, ipv6End))) {
21482314 serializeIPv6(address.value());
2149  m_url.m_hostEnd = currentPosition(iterator);
21502315 if (!ipv6End.atEnd()) {
2151  ++ipv6End;
 2316 incrementIteratorSkippingTabsAndNewlines(ipv6End);
21522317 if (!ipv6End.atEnd() && *ipv6End == ':') {
2153  ++ipv6End;
 2318 m_url.m_hostEnd = currentPosition(ipv6End);
21542319 return parsePort(ipv6End);
21552320 }
2156  m_url.m_portEnd = currentPosition(iterator);
 2321 m_url.m_hostEnd = currentPosition(ipv6End);
 2322 m_url.m_portEnd = m_url.m_hostEnd;
21572323 return true;
21582324 }
 2325 m_url.m_hostEnd = currentPosition(ipv6End);
21592326 return true;
21602327 }
21612328 }
2162 
2163  if (!m_hostHasPercentOrNonASCII) {
 2329
 2330 if (LIKELY(!m_hostHasPercentOrNonASCII)) {
21642331 auto hostIterator = iterator;
21652332 for (; !iterator.atEnd(); ++iterator) {
21662333 if (isTabOrNewline(*iterator))

@@bool URLParser::parseHostAndPort(CodePoi
21772344 m_url.m_portEnd = currentPosition(iterator);
21782345 return true;
21792346 }
2180  ++iterator;
21812347 return parsePort(iterator);
21822348 }
21832349 for (; hostIterator != iterator; ++hostIterator) {
2184  if (!isTabOrNewline(*hostIterator))
 2350 if (LIKELY(!isTabOrNewline(*hostIterator))) {
 2351 if (UNLIKELY(isASCIIUpper(*hostIterator)))
 2352 syntaxViolation(hostIterator);
21852353 appendToASCIIBuffer(toASCIILower(*hostIterator));
 2354 } else
 2355 syntaxViolation(hostIterator);
21862356 }
21872357 m_url.m_hostEnd = currentPosition(iterator);
2188  if (!hostIterator.atEnd()) {
2189  ASSERT(*hostIterator == ':');
2190  incrementIteratorSkippingTabAndNewLine(hostIterator);
 2358 if (!hostIterator.atEnd())
21912359 return parsePort(hostIterator);
2192  }
21932360 m_url.m_portEnd = currentPosition(iterator);
21942361 return true;
21952362 }
21962363
 2364 syntaxViolation(iterator);
 2365
21972366 Vector<LChar, defaultInlineBufferSize> utf8Encoded;
21982367 for (; !iterator.atEnd(); ++iterator) {
21992368 if (isTabOrNewline(*iterator))

@@bool URLParser::parseHostAndPort(CodePoi
22232392 m_url.m_portEnd = currentPosition(iterator);
22242393 return true;
22252394 }
2226  ++iterator;
22272395 return parsePort(iterator);
22282396 }
22292397
22302398 appendToASCIIBuffer(asciiDomainCharacters, asciiDomainValue.size());
22312399 m_url.m_hostEnd = currentPosition(iterator);
2232  if (!iterator.atEnd()) {
2233  ASSERT(*iterator == ':');
2234  incrementIteratorSkippingTabAndNewLine(iterator);
 2400 if (!iterator.atEnd())
22352401 return parsePort(iterator);
2236  }
22372402 m_url.m_portEnd = currentPosition(iterator);
22382403 return true;
22392404}

@@inline static void serializeURLEncodedFo
22812446 || (byte >= 0x30 && byte <= 0x39)
22822447 || (byte >= 0x41 && byte <= 0x5A)
22832448 || byte == 0x5F
2284  || (byte >= 0x61 && byte <= 0x7A))
 2449 || (byte >= 0x61 && byte <= 0x7A)) // FIXME: Put these in the characterClassTable to avoid branches.
22852450 output.append(byte);
22862451 else
22872452 percentEncodeByte(byte, output);

@@bool URLParser::allValuesEqual(const URL
23492514}
23502515
23512516bool URLParser::internalValuesConsistent(const URL& url)
2352 {
 2517{
23532518 return url.m_schemeEnd <= url.m_userStart
23542519 && url.m_userStart <= url.m_userEnd
23552520 && url.m_userEnd <= url.m_passwordEnd
206412

Source/WebCore/platform/URLParser.h

2828#include "TextEncoding.h"
2929#include "URL.h"
3030#include <wtf/Forward.h>
31 #include <wtf/text/StringBuilder.h>
3231
3332namespace WebCore {
3433

@@private:
5352 URL m_url;
5453 Vector<LChar> m_asciiBuffer;
5554 Vector<UChar> m_unicodeFragmentBuffer;
 55 bool m_seenUnicodeFragmentCodePoint { false };
5656 bool m_urlIsSpecial { false };
5757 bool m_hostHasPercentOrNonASCII { false };
5858 String m_inputString;
5959 const void* m_inputBegin { nullptr };
6060
61  // FIXME: This should start out as false and only change to true when we see a syntax error once syntax error handling is implemented.
62  bool m_seenSyntaxError { true };
 61 bool m_seenSyntaxViolation { false };
6362
6463 template<typename CharacterType> void parse(const CharacterType*, const unsigned length, const URL&, const TextEncoding&);
6564 template<typename CharacterType> void parseAuthority(CodePointIterator<CharacterType>);

@@private:
6766 template<typename CharacterType> bool parsePort(CodePointIterator<CharacterType>&);
6867
6968 void failure();
70  template<typename CharacterType> void incrementIteratorSkippingTabAndNewLine(CodePointIterator<CharacterType>&);
71  template<typename CharacterType> void syntaxError(const CodePointIterator<CharacterType>&);
 69 template<typename CharacterType> void incrementIteratorSkippingTabsAndNewlines(CodePointIterator<CharacterType>& iterator) { incrementIteratorSkippingTabsAndNewlines(iterator, iterator); }
 70 template<typename CharacterType> void incrementIteratorSkippingTabsAndNewlines(CodePointIterator<CharacterType>&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition);
 71 template<typename CharacterType> void syntaxViolation(const CodePointIterator<CharacterType>&);
 72 template<typename CharacterType> void fragmentSyntaxViolation(const CodePointIterator<CharacterType>&);
7273 template<typename CharacterType> bool isWindowsDriveLetter(CodePointIterator<CharacterType>);
7374 template<typename CharacterType> bool shouldCopyFileURL(CodePointIterator<CharacterType>);
7475 template<typename CharacterType> void checkWindowsDriveLetter(CodePointIterator<CharacterType>&);
7576 template<typename CharacterType> size_t currentPosition(const CodePointIterator<CharacterType>&);
7677 template<typename UnsignedIntegerType> void appendNumberToASCIIBuffer(UnsignedIntegerType);
77  template<bool(*isInCodeSet)(UChar32)> void utf8PercentEncode(UChar32);
78  void utf8QueryEncode(UChar32);
 78 template<bool(*isInCodeSet)(UChar32), typename CharacterType> void utf8PercentEncode(const CodePointIterator<CharacterType>&);
 79 template<typename CharacterType> void utf8QueryEncode(const CodePointIterator<CharacterType>&);
7980 void percentEncodeByte(uint8_t);
8081 void appendToASCIIBuffer(UChar32);
8182 void appendToASCIIBuffer(const char*, size_t);
8283 void appendToASCIIBuffer(const LChar* characters, size_t size) { appendToASCIIBuffer(reinterpret_cast<const char*>(characters), size); }
8384 void encodeQuery(const Vector<UChar>& source, const TextEncoding&);
8485 void copyASCIIStringUntil(const String&, size_t lengthIf8Bit, size_t lengthIf16Bit);
 86 StringView parsedDataView(size_t start, size_t length);
8587
8688 using IPv4Address = uint32_t;
8789 void serializeIPv4(IPv4Address);
8890 template<typename CharacterType> Optional<IPv4Address> parseIPv4Host(CodePointIterator<CharacterType>);
 91 template<typename CharacterType> Optional<uint32_t> parseIPv4Number(CodePointIterator<CharacterType>&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition);
8992 using IPv6Address = std::array<uint16_t, 8>;
9093 template<typename CharacterType> Optional<IPv6Address> parseIPv6Host(CodePointIterator<CharacterType>);
9194 void serializeIPv6Piece(uint16_t piece);
9295 void serializeIPv6(URLParser::IPv6Address);
9396
9497 enum class URLPart;
95  void copyURLPartsUntil(const URL& base, URLPart);
 98 template<typename CharacterType> void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator<CharacterType>&);
9699 static size_t urlLengthUntilPart(const URL&, URLPart);
97100 void popPath();
98101};
206412

Tools/ChangeLog

 12016-09-26 Alex Christensen <achristensen@webkit.org>
 2
 3 Implement URLParser::syntaxViolation
 4 https://bugs.webkit.org/show_bug.cgi?id=162593
 5
 6 Reviewed by NOBODY (OOPS!).
 7
 8 * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
 9 (TestWebKitAPI::TEST_F):
 10
1112016-09-26 Ryan Haddad <ryanhaddad@apple.com>
212
313 Add Sierra to the flakiness dashboard.
206412

Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp

@@TEST_F(URLParserTest, Basic)
119119 checkURL("http://[0:f::f:f:0:0]", {"http", "", "", "[0:f::f:f:0:0]", 0, "/", "", "", "http://[0:f::f:f:0:0]/"});
120120 checkURL("http://[0:f:0:0:f::]", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
121121 checkURL("http://[::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
 122 checkURL("http://[0:f:0:0:f::]:", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
 123 checkURL("http://[0:f:0:0:f::]:\t", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
 124 checkURL("http://[0:f:0:0:f::]\t:", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
 125 checkURL("http://\t[::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
 126 checkURL("http://[\t::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
 127 checkURL("http://[:\t:f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
 128 checkURL("http://[::\tf:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
 129 checkURL("http://[::f\t:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
 130 checkURL("http://[::f:\t0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
122131 checkURL("http://example.com/path1/path2/.", {"http", "", "", "example.com", 0, "/path1/path2/", "", "", "http://example.com/path1/path2/"});
123132 checkURL("http://example.com/path1/path2/..", {"http", "", "", "example.com", 0, "/path1/", "", "", "http://example.com/path1/"});
124133 checkURL("http://example.com/path1/path2/./path3", {"http", "", "", "example.com", 0, "/path1/path2/path3", "", "", "http://example.com/path1/path2/path3"});

@@TEST_F(URLParserTest, Basic)
191200 checkURL("http://host/a%20B", {"http", "", "", "host", 0, "/a%20B", "", "", "http://host/a%20B"});
192201 checkURL("http://host?q=@ <>!#fragment", {"http", "", "", "host", 0, "/", "q=@%20%3C%3E!", "fragment", "http://host/?q=@%20%3C%3E!#fragment"});
193202 checkURL("http://user:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
 203 checkURL("http://user:@\thost", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
 204 checkURL("http://user\t:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
 205 checkURL("http://use\tr:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
194206 checkURL("http://127.0.0.1:10100/path", {"http", "", "", "127.0.0.1", 10100, "/path", "", "", "http://127.0.0.1:10100/path"});
195207 checkURL("http://127.0.0.1:/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
 208 checkURL("http://127.0.0.1\t:/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
 209 checkURL("http://127.0.0.1:\t/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
 210 checkURL("http://127.0.0.1:/\tpath", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
196211 checkURL("http://127.0.0.1:123", {"http", "", "", "127.0.0.1", 123, "/", "", "", "http://127.0.0.1:123/"});
197212 checkURL("http://127.0.0.1:", {"http", "", "", "127.0.0.1", 0, "/", "", "", "http://127.0.0.1/"});
198213 checkURL("http://[0:f::f:f:0:0]:123/path", {"http", "", "", "[0:f::f:f:0:0]", 123, "/path", "", "", "http://[0:f::f:f:0:0]:123/path"});
199214 checkURL("http://[0:f::f:f:0:0]:123", {"http", "", "", "[0:f::f:f:0:0]", 123, "/", "", "", "http://[0:f::f:f:0:0]:123/"});
 215 checkURL("http://[0:f:0:0:f:\t:]:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
 216 checkURL("http://[0:f:0:0:f::\t]:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
 217 checkURL("http://[0:f:0:0:f::]\t:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
 218 checkURL("http://[0:f:0:0:f::]:\t123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
 219 checkURL("http://[0:f:0:0:f::]:1\t23", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
200220 checkURL("http://[0:f::f:f:0:0]:/path", {"http", "", "", "[0:f::f:f:0:0]", 0, "/path", "", "", "http://[0:f::f:f:0:0]/path"});
201221 checkURL("http://[0:f::f:f:0:0]:", {"http", "", "", "[0:f::f:f:0:0]", 0, "/", "", "", "http://[0:f::f:f:0:0]/"});
202222 checkURL("http://host:10100/path", {"http", "", "", "host", 10100, "/path", "", "", "http://host:10100/path"});

@@TEST_F(URLParserTest, Basic)
210230 checkURL("sc:/pa/", {"sc", "", "", "", 0, "/pa/", "", "", "sc:/pa/"});
211231 checkURL("notspecial:/notuser:notpassword@nothost", {"notspecial", "", "", "", 0, "/notuser:notpassword@nothost", "", "", "notspecial:/notuser:notpassword@nothost"});
212232 checkURL("sc://pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
 233 checkURL("sc://\tpa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
 234 checkURL("sc:/\t/pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
 235 checkURL("sc:\t//pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
213236 checkURL("http://host \a ", {"http", "", "", "host", 0, "/", "", "", "http://host/"});
214237 checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
215238 checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});

@@TEST_F(URLParserTest, Basic)
217240 checkURL("http://256/", {"http", "", "", "256", 0, "/", "", "", "http://256/"});
218241 checkURL("http://256./", {"http", "", "", "256.", 0, "/", "", "", "http://256./"});
219242 checkURL("http://123.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
 243 checkURL("http://123\t.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
 244 checkURL("http://123.\t256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
220245 checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
221246 checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
222247 checkURL("notspecial:/", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
223248 checkURL("data:image/png;base64,encoded-data-follows-here", {"data", "", "", "", 0, "image/png;base64,encoded-data-follows-here", "", "", "data:image/png;base64,encoded-data-follows-here"});
224249 checkURL("data:image/png;base64,encoded/data-with-slash", {"data", "", "", "", 0, "image/png;base64,encoded/data-with-slash", "", "", "data:image/png;base64,encoded/data-with-slash"});
 250 checkURL("about:~", {"about", "", "", "", 0, "~", "", "", "about:~"});
225251
226252 // This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,
227253 // and Firefox fails the web platform test differently. Maybe the web platform test ought to be changed.

@@TEST_F(URLParserTest, ParseRelative)
287313 checkRelativeURL("\\@", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/@", "", "", "http://example.org/@"});
288314 checkRelativeURL("/path3", "http://user@example.org/path1/path2", {"http", "user", "", "example.org", 0, "/path3", "", "", "http://user@example.org/path3"});
289315 checkRelativeURL("", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
 316 checkRelativeURL("\t", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
 317 checkRelativeURL(" ", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
290318 checkRelativeURL(" \a \t\n", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
291319 checkRelativeURL(":foo.com\\", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/:foo.com/", "", "", "http://example.org/foo/:foo.com/"});
292320 checkRelativeURL("http:/example.com/", "about:blank", {"http", "", "", "example.com", 0, "/", "", "", "http://example.com/"});

@@TEST_F(URLParserTest, ParseRelative)
314342 checkRelativeURL("notspecial:/", "http://host", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
315343 checkRelativeURL("foo:/", "http://example.org/foo/bar", {"foo", "", "", "", 0, "/", "", "", "foo:/"});
316344 checkRelativeURL("://:0/", "http://webkit.org/", {"http", "", "", "webkit.org", 0, "/://:0/", "", "", "http://webkit.org/://:0/"});
 345 checkRelativeURL(String(), "http://webkit.org/", {"http", "", "", "webkit.org", 0, "/", "", "", "http://webkit.org/"});
317346
318347 // The checking of slashes in SpecialAuthoritySlashes needed to get this to pass contradicts what is in the spec,
319348 // but it is included in the web platform tests.

@@TEST_F(URLParserTest, ParserDifferences)
590619 checkURLDifferences(utf16String(u"http://host/path#šŸ’©\tšŸ’©"),
591620 {"http", "", "", "host", 0, "/path", "", utf16String(u"šŸ’©šŸ’©"), utf16String(u"http://host/path#šŸ’©šŸ’©")},
592621 {"http", "", "", "host", 0, "/path", "", "%F0%9F%92%A9%F0%9F%92%A9", "http://host/path#%F0%9F%92%A9%F0%9F%92%A9"});
 622 checkURLDifferences("http://%48OsT",
 623 {"http", "", "", "host", 0, "/", "", "", "http://host/"},
 624 {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
 625 checkURLDifferences("http://h%4FsT",
 626 {"http", "", "", "host", 0, "/", "", "", "http://host/"},
 627 {"http", "", "", "h%4fst", 0, "/", "", "", "http://h%4fst/"});
 628 checkURLDifferences("http://h%4fsT",
 629 {"http", "", "", "host", 0, "/", "", "", "http://host/"},
 630 {"http", "", "", "h%4fst", 0, "/", "", "", "http://h%4fst/"});
 631 checkURLDifferences("http://h%6fsT",
 632 {"http", "", "", "host", 0, "/", "", "", "http://host/"},
 633 {"http", "", "", "h%6fst", 0, "/", "", "", "http://h%6fst/"});
 634 checkURLDifferences("http://host/`",
 635 {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},
 636 {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});
 637 checkURLDifferences("aA://",
 638 {"aa", "", "", "", 0, "/", "", "", "aa:///"},
 639 {"aa", "", "", "", 0, "//", "", "", "aa://"});
 640 checkURLDifferences("A://",
 641 {"a", "", "", "", 0, "/", "", "", "a:///"},
 642 {"a", "", "", "", 0, "//", "", "", "a://"});
593643}
594644
595645TEST_F(URLParserTest, DefaultPort)
596646{
597647 checkURL("FtP://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
598648 checkURL("ftp://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
 649 checkURL("f\ttp://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
 650 checkURL("f\ttp://host\t:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
 651 checkURL("f\ttp://host:\t21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
 652 checkURL("f\ttp://host:2\t1/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
 653 checkURL("f\ttp://host:21\t/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
 654 checkURL("ftp://host\t:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
 655 checkURL("ftp://host:\t21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
 656 checkURL("ftp://host:2\t1/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
 657 checkURL("ftp://host:21\t/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
599658 checkURL("ftp://host:22/", {"ftp", "", "", "host", 22, "/", "", "", "ftp://host:22/"});
600659 checkURLDifferences("ftp://host:21",
601660 {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"},

@@TEST_F(URLParserTest, DefaultPort)
670729 checkURLDifferences("unknown://host:81",
671730 {"unknown", "", "", "host", 81, "/", "", "", "unknown://host:81/"},
672731 {"unknown", "", "", "host", 81, "", "", "", "unknown://host:81"});
673  checkURLDifferences("http://%48OsT",
674  {"http", "", "", "host", 0, "/", "", "", "http://host/"},
675  {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
676  checkURLDifferences("http://host/`",
677  {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},
678  {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});
679732}
680733
681734static void shouldFail(const String& urlString)

@@TEST_F(URLParserTest, ParserFailures)
694747 shouldFail(" \a ");
695748 shouldFail("");
696749 shouldFail(String());
 750 shouldFail("", "about:blank");
 751 shouldFail(String(), "about:blank");
697752 shouldFail("http://127.0.0.1:abc");
698753 shouldFail("http://host:abc");
699754 shouldFail("http://a:@", "about:blank");

@@TEST_F(URLParserTest, ParserFailures)
723778 shouldFail("://:0/");
724779 shouldFail("://:0/", "");
725780 shouldFail("://:0/", "about:blank");
 781 shouldFail("about~");
726782}
727783
728784// These are in the spec but not in the web platform tests.
206412