URLParser.cpp

@@template<typename CharacterType> inline

static bool shouldPercentEncodeQueryByte(uint8_t byte) { return characterClassTable[byte] & QueryPercent; }

template<typename CharacterType>

413 void URLParser::incrementIteratorSkippingTabAndNewLine(CodePointIterator<CharacterType>& iterator)

413void URLParser::incrementIteratorSkippingTabsAndNewlines(CodePointIterator<CharacterType>& iterator, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition)

414414{

415415 ++iterator;

416 while (!iterator.atEnd() && isTabOrNewline(*iterator)) {

417 syntax~~Err~~or(iterator);

416 while (UNLIKELY(!iterator.atEnd() && isTabOrNewline(*iterator))) {

417 syntaxViolation(iteratorForSyntaxViolationPosition);

        ++iterator;

    }

}

@@bool URLParser::isWindowsDriveLetter(Cod

{

    if (iterator.atEnd() || !isASCIIAlpha(*iterator))

        return false;

427 incrementIteratorSkippingTabAndNewLine(iterator);

427 incrementIteratorSkippingTabsAndNewlines(iterator);

    if (iterator.atEnd())

        return false;

    if (*iterator == ':')

        return true;

432 if (*iterator == '|') {

433 syntax~~Err~~or(iterator);

432 if (UNLIKELY(*iterator == '|')) {

433 syntaxViolation(iterator);

        return true;

    }

    return false;

@@void URLParser::appendToASCIIBuffer(UCha

{

    ASSERT(m_unicodeFragmentBuffer.isEmpty());

    ASSERT(isASCII(codePoint));

450 if (m_seenSyntax~~Err~~or)

450 if (UNLIKELY(m_seenSyntaxViolation))

        m_asciiBuffer.append(codePoint);

}

void URLParser::appendToASCIIBuffer(const char* characters, size_t length)

{

    ASSERT(m_unicodeFragmentBuffer.isEmpty());

457 if (m_seenSyntax~~Err~~or)

457 if (UNLIKELY(m_seenSyntaxViolation))

        m_asciiBuffer.append(characters, length);

}

@@void URLParser::checkWindowsDriveLetter(

{

    if (isWindowsDriveLetter(iterator)) {

        appendToASCIIBuffer(*iterator);

466 incrementIteratorSkippingTabAndNewLine(iterator);

466 incrementIteratorSkippingTabsAndNewlines(iterator);

        ASSERT(!iterator.atEnd());

        ASSERT(*iterator == ':' || *iterator == '|');

        appendToASCIIBuffer(':');

470 incrementIteratorSkippingTabAndNewLine(iterator);

470 incrementIteratorSkippingTabsAndNewlines(iterator);

    }

}

@@bool URLParser::shouldCopyFileURL(CodePo

        return true;

    if (iterator.atEnd())

        return false;

481 incrementIteratorSkippingTabAndNewLine(iterator);

481 incrementIteratorSkippingTabsAndNewlines(iterator);

482482 if (iterator.atEnd())

483483 return true;

484 incrementIteratorSkippingTabAndNewLine(iterator);

484 incrementIteratorSkippingTabsAndNewlines(iterator);

    if (iterator.atEnd())

        return true;

    return !isSlashQuestionOrHash(*iterator);

@@void URLParser::percentEncodeByte(uint8_

const char replacementCharacterUTF8PercentEncoded[10] = "%EF%BF%BD";

const size_t replacementCharacterUTF8PercentEncodedLength = sizeof(replacementCharacterUTF8PercentEncoded) - 1;

507 template<bool(*isInCodeSet)(UChar32)>

508 void URLParser::utf8PercentEncode(U~~Char32 c~~odePoint)

507template<bool(*isInCodeSet)(UChar32), typename CharacterType>

508void URLParser::utf8PercentEncode(const CodePointIterator<CharacterType>& iterator)

509509{

510 if (isASCII(codePoint)) {

511 if (isInCodeSet(codePoint))

    ASSERT(!iterator.atEnd());

    UChar32 codePoint = *iterator;

    if (LIKELY(isASCII(codePoint))) {

        if (UNLIKELY(isInCodeSet(codePoint))) {

            syntaxViolation(iterator);

512515 percentEncodeByte(codePoint);

513 else

516 } else

            appendToASCIIBuffer(codePoint);

        return;

    }

    ASSERT_WITH_MESSAGE(isInCodeSet(codePoint), "isInCodeSet should always return true for non-ASCII characters");

521 syntaxViolation(iterator);

    
    if (!U_IS_UNICODE_CHAR(codePoint)) {

        appendToASCIIBuffer(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);

@@void URLParser::utf8PercentEncode(UChar3

        percentEncodeByte(buffer[i]);

}

531

532 void URLParser::utf8QueryEncode(U~~Char32 c~~odePoint)

535template<typename CharacterType>

536void URLParser::utf8QueryEncode(const CodePointIterator<CharacterType>& iterator)

533537{

534 if (isASCII(codePoint)) {

535 if (shouldPercentEncodeQueryByte(codePoint))

    ASSERT(!iterator.atEnd());

    UChar32 codePoint = *iterator;

    if (LIKELY(isASCII(codePoint))) {

        if (UNLIKELY(shouldPercentEncodeQueryByte(codePoint))) {

            syntaxViolation(iterator);

536543 percentEncodeByte(codePoint);

537 else

544 } else

            appendToASCIIBuffer(codePoint);

        return;

    }

549 syntaxViolation(iterator);

550

    if (!U_IS_UNICODE_CHAR(codePoint)) {

        appendToASCIIBuffer(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);

        return;

@@void URLParser::copyASCIIStringUntil(con

    }

}

751 void URLParser::copyURLPartsUntil(const URL& base, URLPart part)

760template<typename CharacterType>

761void URLParser::copyURLPartsUntil(const URL& base, URLPart part, const CodePointIterator<CharacterType>& iterator)

752762{

763 syntaxViolation(iterator);

764

    m_asciiBuffer.clear();

    m_unicodeFragmentBuffer.clear();

    if (part == URLPart::FragmentEnd) {

@@void URLParser::popPath()

}

template<typename CharacterType>

936 void URLParser::syntaxError(const CodePointIterator<CharacterType>&)

void URLParser::syntaxViolation(const CodePointIterator<CharacterType>& iterator)

{

    if (m_seenSyntaxViolation)

        return;

    m_seenSyntaxViolation = true;

    ASSERT(m_asciiBuffer.isEmpty());

    ASSERT(m_unicodeFragmentBuffer.isEmpty());

    ASSERT_WITH_MESSAGE(!m_url.m_queryEnd, "syntaxViolation should not be used in the fragment, which might contain non-ASCII code points when serialized");

    size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));

    RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());

    m_asciiBuffer.reserveCapacity(m_inputString.length());

    for (size_t i = 0; i < codeUnitsToCopy; ++i) {

        ASSERT(isASCII(m_inputString[i]));

        m_asciiBuffer.uncheckedAppend(m_inputString[i]);

    }

}

template<typename CharacterType>

void URLParser::fragmentSyntaxViolation(const CodePointIterator<CharacterType>& iterator)

937968{

938 // FIXME: Implement.

    if (m_seenSyntaxViolation)

        return;

    m_seenSyntaxViolation = true;

    ASSERT(m_asciiBuffer.isEmpty());

    ASSERT(m_unicodeFragmentBuffer.isEmpty());

    size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));

    size_t asciiCodeUnitsToCopy = m_url.m_queryEnd;

    size_t unicodeCodeUnitsToCopy = codeUnitsToCopy - asciiCodeUnitsToCopy;

    RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());

    m_asciiBuffer.reserveCapacity(asciiCodeUnitsToCopy);

    for (size_t i = 0; i < asciiCodeUnitsToCopy; ++i) {

        ASSERT(isASCII(m_inputString[i]));

        m_asciiBuffer.uncheckedAppend(m_inputString[i]);

    }

    m_unicodeFragmentBuffer.reserveCapacity(m_inputString.length() - asciiCodeUnitsToCopy);

    for (size_t i = 0; i < unicodeCodeUnitsToCopy; ++i)

        m_unicodeFragmentBuffer.uncheckedAppend(m_inputString[i + asciiCodeUnitsToCopy]);

}

void URLParser::failure()

@@void URLParser::failure()

    m_url.m_string = m_inputString;

}

StringView URLParser::parsedDataView(size_t start, size_t length)

{

    if (UNLIKELY(m_seenSyntaxViolation)) {

        ASSERT(start + length <= m_asciiBuffer.size());

        return StringView(m_asciiBuffer.data() + start, length);

    }

    ASSERT(start + length <= m_inputString.length());

    return StringView(m_inputString).substring(start, length);

}

template<typename CharacterType>

size_t URLParser::currentPosition(const CodePointIterator<CharacterType>& iterator)

{

950 if (m_seenSyntaxError)

1008 if (UNLIKELY(m_seenSyntaxViolation)) {

1009 ASSERT(m_unicodeFragmentBuffer.isEmpty());

9511010 return m_asciiBuffer.size();

1011 }

    
    return iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));

}

@@size_t URLParser::currentPosition(const

URLParser::URLParser(const String& input, const URL& base, const TextEncoding& encoding)

    : m_inputString(input)

{

959 if (input.isNull())

    if (input.isNull()) {

        if (base.isValid() && !base.m_cannotBeABaseURL)

            m_url = base;

9601022 return;

1023 }

    if (input.is8Bit()) {

        m_inputBegin = input.characters8();

@@URLParser::URLParser(const String& input

        m_inputBegin = input.characters16();

        parse(input.characters16(), input.length(), base, encoding);

    }

    ASSERT(!m_url.m_isValid

        || m_seenSyntaxViolation == (m_url.string() != input)

        || (input.isEmpty() && m_url.m_string == base.m_string));

}

template<typename CharacterType>

@@void URLParser::parse(const CharacterTyp

    m_url = { };

    ASSERT(m_asciiBuffer.isEmpty());

    ASSERT(m_unicodeFragmentBuffer.isEmpty());

978 m_asciiBuffer.reserveInitialCapacity(length);

    
    bool isUTF8Encoding = encoding == UTF8Encoding();

    Vector<UChar> queryBuffer;

    unsigned endIndex = length;

984 while (endIndex && isC0ControlOrSpace(input[endIndex - 1]))

1049 while (UNLIKELY(endIndex && isC0ControlOrSpace(input[endIndex - 1]))) {

1050 syntaxViolation(CodePointIterator<CharacterType>(input, input));

9851051 endIndex--;

1052 }

9861053 CodePointIterator<CharacterType> c(input, input + endIndex);

9871054 CodePointIterator<CharacterType> authorityOrHostBegin;

988 while (!c.atEnd() && isC0ControlOrSpace(*c))

1055 while (UNLIKELY(!c.atEnd() && isC0ControlOrSpace(*c))) {

1056 syntaxViolation(c);

9891057 ++c;

1058 }

    auto beginAfterControlAndSpace = c;

    enum class State : uint8_t {

@@void URLParser::parse(const CharacterTyp

        Fragment,

    };

1014 #define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, a~~sciiBuff~~er size %zu", x, *c, currentPosition(c))

1083#define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, parsed data <%s> size %zu", x, *c, parsedDataView(0, currentPosition(c)).utf8().data(), currentPosition(c))

#define LOG_FINAL_STATE(x) LOG(URLParser, "Final State: %s", x)

    State state = State::SchemeStart;

    while (!c.atEnd()) {

1019 if (isTabOrNewline(*c)) {

1020 syntax~~Err~~or(c);

1088 if (UNLIKELY(isTabOrNewline(*c))) {

1089 syntaxViolation(c);

            ++c;

            continue;

        }

@@void URLParser::parse(const CharacterTyp

        case State::SchemeStart:

            LOG_STATE("SchemeStart");

            if (isASCIIAlpha(*c)) {

1098 if (UNLIKELY(isASCIIUpper(*c)))

1099 syntaxViolation(c);

10291100 appendToASCIIBuffer(toASCIILower(*c));

1030 incrementIteratorSkippingTabAndNewLine(c);

1101 incrementIteratorSkippingTabsAndNewlines(c);

                if (c.atEnd()) {

                    m_asciiBuffer.clear();

                    state = State::NoScheme;

@@void URLParser::parse(const CharacterTyp

            break;

        case State::Scheme:

            LOG_STATE("Scheme");

1042 if (isValidSchemeCharacter(*c))

            if (isValidSchemeCharacter(*c)) {

                if (UNLIKELY(isASCIIUpper(*c)))

                    syntaxViolation(c);

10431116 appendToASCIIBuffer(toASCIILower(*c));

1044 else if (*c == ':') {

1117 } else if (*c == ':') {

10451118 m_url.m_schemeEnd = currentPosition(c);

1046 StringView urlScheme = S~~tring~~View(~~m_asciiBuffer.data()~~, m_url.m_schemeEnd);

1119 StringView urlScheme = parsedDataView(0, m_url.m_schemeEnd);

10471120 m_url.m_protocolIsInHTTPFamily = urlScheme == "http" || urlScheme == "https";

1121 appendToASCIIBuffer(':');

                if (urlScheme == "file") {

                    m_urlIsSpecial = true;

                    state = State::File;

1051 appendToASCIIBuffer(':');

1055 appendToASCIIBuffer(':');

10561128 if (isSpecialScheme(urlScheme)) {

10571129 m_urlIsSpecial = true;

1058 if (base.protocolIs(~~m_asciiBuffer.data(), currentPosition(c) - 1~~))

1130 if (base.protocolIs(urlScheme))

                        state = State::SpecialRelativeOrAuthority;

                    else

                        state = State::SpecialAuthoritySlashes;

1134 ++c;

10621135 } else {

10631136 auto maybeSlash = c;

1064 incrementIteratorSkippingTabAndNewLine(maybeSlash);

1137 incrementIteratorSkippingTabsAndNewlines(maybeSlash);

10651138 if (!maybeSlash.atEnd() && *maybeSlash == '/') {

10661139 appendToASCIIBuffer('/');

1067 m_url.m_userStart = currentPosition(c);

1068 state = State::PathOrAuthority;

10691140 c = maybeSlash;

1141 state = State::PathOrAuthority;

10701142 ASSERT(*c == '/');

1143 ++c;

1144 m_url.m_userStart = currentPosition(c);

10711145 } else {

1146 ++c;

                        m_url.m_userStart = currentPosition(c);

                        m_url.m_userEnd = m_url.m_userStart;

                        m_url.m_passwordEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp

                        state = State::CannotBeABaseURLPath;

                    }

                }

1082 ++c;

                break;

            } else {

                m_asciiBuffer.clear();

@@void URLParser::parse(const CharacterTyp

                c = beginAfterControlAndSpace;

                break;

            }

1090 incrementIteratorSkippingTabAndNewLine(c);

1164 incrementIteratorSkippingTabsAndNewlines(c);

            if (c.atEnd()) {

                m_asciiBuffer.clear();

                state = State::NoScheme;

@@void URLParser::parse(const CharacterTyp

                return;

            }

            if (base.m_cannotBeABaseURL && *c == '#') {

1104 copyURLPartsUntil(base, URLPart::QueryEnd);

1178 copyURLPartsUntil(base, URLPart::QueryEnd, c);

                state = State::Fragment;

                appendToASCIIBuffer('#');

                ++c;

@@void URLParser::parse(const CharacterTyp

                state = State::Relative;

                break;

            }

1114 copyURLPartsUntil(base, URLPart::SchemeEnd);

1188 copyURLPartsUntil(base, URLPart::SchemeEnd, c);

            appendToASCIIBuffer(':');

            state = State::File;

            break;

@@void URLParser::parse(const CharacterTyp

            LOG_STATE("SpecialRelativeOrAuthority");

            if (*c == '/') {

                appendToASCIIBuffer('/');

1122 incrementIteratorSkippingTabAndNewLine(c);

1196 incrementIteratorSkippingTabsAndNewlines(c);

                if (c.atEnd()) {

                    failure();

                    return;

@@void URLParser::parse(const CharacterTyp

            LOG_STATE("PathOrAuthority");

            if (*c == '/') {

                appendToASCIIBuffer('/');

1140 m_url.m_userStart = currentPosition(c);

11411214 state = State::AuthorityOrHost;

11421215 ++c;

1216 m_url.m_userStart = currentPosition(c);

11431217 authorityOrHostBegin = c;

11441218 } else {

1145 ASSERT(m_asci~~iBuffer.last~~() == '/');

1219 ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");

                m_url.m_userStart = currentPosition(c) - 1;

                m_url.m_userEnd = m_url.m_userStart;

                m_url.m_passwordEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp

                ++c;

                break;

            case '?':

1164 copyURLPartsUntil(base, URLPart::PathEnd);

1238 copyURLPartsUntil(base, URLPart::PathEnd, c);

                appendToASCIIBuffer('?');

                state = State::Query;

                ++c;

                break;

            case '#':

1170 copyURLPartsUntil(base, URLPart::QueryEnd);

1244 copyURLPartsUntil(base, URLPart::QueryEnd, c);

                appendToASCIIBuffer('#');

                state = State::Fragment;

                ++c;

                break;

            default:

1176 copyURLPartsUntil(base, URLPart::PathAfterLastSlash);

1250 copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c);

                state = State::Path;

                break;

            }

@@void URLParser::parse(const CharacterTyp

            LOG_STATE("RelativeSlash");

            if (*c == '/' || *c == '\\') {

                ++c;

1185 copyURLPartsUntil(base, URLPart::SchemeEnd);

1259 copyURLPartsUntil(base, URLPart::SchemeEnd, c);

                appendToASCIIBuffer("://", 3);

                state = State::SpecialAuthorityIgnoreSlashes;

            } else {

1189 copyURLPartsUntil(base, URLPart::PortEnd);

1263 copyURLPartsUntil(base, URLPart::PortEnd, c);

                appendToASCIIBuffer('/');

                m_url.m_pathAfterLastSlash = base.m_portEnd + 1;

                state = State::Path;

@@void URLParser::parse(const CharacterTyp

            break;

        case State::SpecialAuthoritySlashes:

            LOG_STATE("SpecialAuthoritySlashes");

            appendToASCIIBuffer("//", 2);

            if (*c == '/' || *c == '\\') {

                incrementIteratorSkippingTabAndNewLine(c);

                if (!c.atEnd() && (*c == '/' || *c == '\\'))

            if (LIKELY(*c == '/' || *c == '\\')) {

                if (UNLIKELY(*c == '\\'))

                    syntaxViolation(c);

                appendToASCIIBuffer('/');

                incrementIteratorSkippingTabsAndNewlines(c);

                if (LIKELY(!c.atEnd() && (*c == '/' || *c == '\\'))) {

                    if (UNLIKELY(*c == '\\'))

                        syntaxViolation(c);

12011279 ++c;

                    appendToASCIIBuffer('/');

                } else {

                    syntaxViolation(c);

                    appendToASCIIBuffer('/');

                }

            } else {

                syntaxViolation(c);

                appendToASCIIBuffer("//", 2);

            }

            state = State::SpecialAuthorityIgnoreSlashes;

            break;

@@void URLParser::parse(const CharacterTyp

            authorityOrHostBegin = c;

            break;

        case State::AuthorityOrHost:

1216 ~~LOG_STATE("~~AuthorityOrHost~~");~~

1302 CaseAuthorityOrHost:

12171303 {

1304 LOG_STATE("AuthorityOrHost");

                if (*c == '@') {

                    auto lastAt = c;

                    auto findLastAt = c;

@@void URLParser::parse(const CharacterTyp

                    }

                    parseAuthority(CodePointIterator<CharacterType>(authorityOrHostBegin, lastAt));

                    c = lastAt;

1228 incrementIteratorSkippingTabAndNewLine(c);

1315 incrementIteratorSkippingTabsAndNewlines(c);

                    authorityOrHostBegin = c;

                    state = State::Host;

                    m_hostHasPercentOrNonASCII = false;

@@void URLParser::parse(const CharacterTyp

                }

                bool isSlash = *c == '/' || (m_urlIsSpecial && *c == '\\');

                if (isSlash || *c == '?' || *c == '#') {

1236 m_url.m_userEnd = currentPosition(c);

1323 m_url.m_userEnd = currentPosition(authorityOrHostBegin);

                    m_url.m_passwordEnd = m_url.m_userEnd;

                    if (!parseHostAndPort(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) {

                        failure();

                        return;

                    }

1242 if (!isSlash) {

1329 if (UNLIKELY(!isSlash)) {

1330 syntaxViolation(c);

                        appendToASCIIBuffer('/');

                        m_url.m_pathAfterLastSlash = currentPosition(c);

                    }

@@void URLParser::parse(const CharacterTyp

                if (isPercentOrNonASCII(*c))

                    m_hostHasPercentOrNonASCII = true;

                ++c;

1340 if (c.atEnd())

1341 break;

12521342 }

1253 break;

            // Skip the check for tabs which might cause a syntaxViolation.

            // We want to handle the syntaxViolations while actually parsing the authority or host.

            goto CaseAuthorityOrHost;

        case State::Host:

            LOG_STATE("Host");

            if (*c == '/' || *c == '?' || *c == '#') {

@@void URLParser::parse(const CharacterTyp

        case State::File:

            LOG_STATE("File");

            switch (*c) {

1271 case '/':

12721363 case '\\':

                syntaxViolation(c);

                FALLTHROUGH;

            case '/':

                appendToASCIIBuffer('/');

                state = State::FileSlash;

                ++c;

                break;

            case '?':

1372 syntaxViolation(c);

12781373 if (base.isValid() && base.protocolIs("file"))

1279 copyURLPartsUntil(base, URLPart::PathEnd);

1374 copyURLPartsUntil(base, URLPart::PathEnd, c);

                appendToASCIIBuffer("///?", 4);

                m_url.m_userStart = currentPosition(c) - 2;

                m_url.m_userEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp

                ++c;

                break;

            case '#':

1387 syntaxViolation(c);

12921388 if (base.isValid() && base.protocolIs("file"))

1293 copyURLPartsUntil(base, URLPart::QueryEnd);

1389 copyURLPartsUntil(base, URLPart::QueryEnd, c);

                appendToASCIIBuffer("///#", 4);

                m_url.m_userStart = currentPosition(c) - 2;

                m_url.m_userEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp

                ++c;

                break;

            default:

1403 syntaxViolation(c);

13071404 if (base.isValid() && base.protocolIs("file") && shouldCopyFileURL(c))

1308 copyURLPartsUntil(base, URLPart::PathAfterLastSlash);

1405 copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c);

                else {

                    appendToASCIIBuffer("///", 3);

                    m_url.m_userStart = currentPosition(c) - 1;

@@void URLParser::parse(const CharacterTyp

            break;

        case State::FileSlash:

            LOG_STATE("FileSlash");

1325 if (*c == '/' || *c == '\\') {

            if (LIKELY(*c == '/' || *c == '\\')) {

                if (UNLIKELY(*c == '\\'))

                    syntaxViolation(c);

                ++c;

                appendToASCIIBuffer('/');

                m_url.m_userStart = currentPosition(c);

@@void URLParser::parse(const CharacterTyp

1449 syntaxViolation(c);

            appendToASCIIBuffer("//", 2);

            m_url.m_userStart = currentPosition(c) - 1;

            m_url.m_userEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp

                    break;

                }

                if (authorityOrHostBegin == c) {

1368 ASSERT(m_asciiBuffer[currentPosition(c) - 1] == '/');

1369 if (*c == '?') {

                    ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");

                    if (UNLIKELY(*c == '?')) {

                        syntaxViolation(c);

13701471 appendToASCIIBuffer("/?", 2);

1472 ++c;

                        m_url.m_pathAfterLastSlash = currentPosition(c) - 1;

                        m_url.m_pathEnd = m_url.m_pathAfterLastSlash;

                        state = State::Query;

1374 ++c;

13751476 break;

13761477 }

1377 if (*c == '#') {

1478 if (UNLIKELY(*c == '#')) {

1479 syntaxViolation(c);

13781480 appendToASCIIBuffer("/#", 2);

1481 ++c;

                        m_url.m_pathAfterLastSlash = currentPosition(c) - 1;

                        m_url.m_pathEnd = m_url.m_pathAfterLastSlash;

                        m_url.m_queryEnd = m_url.m_pathAfterLastSlash;

                        state = State::Fragment;

1383 ++c;

                        break;

                    }

                    state = State::Path;

@@void URLParser::parse(const CharacterTyp

                    failure();

                    return;

                }

1393

1394 if (S~~tringView(m_asciiBuffer.data() + m_url.m_passwordEnd, currentPosi~~tion(c) ~~- m_url.m_passwordEnd) == "localhost") {~~

1495 if (UNLIKELY(equalLettersIgnoringASCIICase(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd), "localhost"))) {

1496 syntaxViolation(c);

                    m_asciiBuffer.shrink(m_url.m_passwordEnd);

                    m_url.m_hostEnd = currentPosition(c);

                    m_url.m_portEnd = m_url.m_hostEnd;

@@void URLParser::parse(const CharacterTyp

        case State::Path:

            LOG_STATE("Path");

            if (*c == '/' || (m_urlIsSpecial && *c == '\\')) {

1518 if (UNLIKELY(m_urlIsSpecial && *c == '\\'))

1519 syntaxViolation(c);

14161520 appendToASCIIBuffer('/');

1417 m_url.m_pathAfterLastSlash = currentPosition(c);

14181521 ++c;

1522 m_url.m_pathAfterLastSlash = currentPosition(c);

14191523 break;

14201524 }

1421 if (currentPosition(c) && m_asciiBuffer[currentPosition(c) - 1] == '/') {

1422 if (isDoubleDotPathSegment(c)) {

            if (UNLIKELY(currentPosition(c) && parsedDataView(currentPosition(c) - 1, 1) == "/")) {

                if (UNLIKELY(isDoubleDotPathSegment(c))) {

                    syntaxViolation(c);

                    consumeDoubleDotPathSegment(c);

                    popPath();

                    break;

                }

1427 if (m_asciiBuffer[currentPosition(c) - 1] == '/' && isSingleDotPathSegment(c)) {

1532 if (UNLIKELY(isSingleDotPathSegment(c))) {

1533 syntaxViolation(c);

                    consumeSingleDotPathSegment(c);

                    break;

                }

@@void URLParser::parse(const CharacterTyp

                break;

            }

            if (isPercentEncodedDot(c)) {

1550 if (UNLIKELY(*c != '.'))

1551 syntaxViolation(c);

                appendToASCIIBuffer('.');

                ASSERT(*c == '%');

                ++c;

@@void URLParser::parse(const CharacterTyp

1453 utf8PercentEncode<isInDefaultEncodeSet>(*c);

1561 utf8PercentEncode<isInDefaultEncodeSet>(c);

            ++c;

            break;

        case State::CannotBeABaseURLPath:

@@void URLParser::parse(const CharacterTyp

                state = State::Fragment;

            } else if (*c == '/') {

                appendToASCIIBuffer('/');

1467 m_url.m_pathAfterLastSlash = currentPosition(c);

14681575 ++c;

1576 m_url.m_pathAfterLastSlash = currentPosition(c);

14691577 } else {

1470 utf8PercentEncode<isInSimpleEncodeSet>(*c);

1578 utf8PercentEncode<isInSimpleEncodeSet>(c);

@@void URLParser::parse(const CharacterTyp

                break;

            }

            if (isUTF8Encoding)

1484 utf8QueryEncode(*c);

1592 utf8QueryEncode(c);

            else

                appendCodePoint(queryBuffer, *c);

            ++c;

            break;

        case State::Fragment:

1490 LOG_STATE("Fragment");

1491 if (m_unicodeFragmentBuffer.isEmpty() && isASCII(*c))

            CaseFragment:

            LOG(URLParser, "State Fragment");

            if (!m_seenUnicodeFragmentCodePoint && isASCII(*c))

14921601 appendToASCIIBuffer(*c);

1493 else

1494 appendCodePoint(m_unicodeFragmentBuffer, *c);

            else {

                m_seenUnicodeFragmentCodePoint = true;

                if (UNLIKELY(m_seenSyntaxViolation))

                    appendCodePoint(m_unicodeFragmentBuffer, *c);

                else {

                    ASSERT(m_asciiBuffer.isEmpty());

                    ASSERT(m_unicodeFragmentBuffer.isEmpty());

                }

            }

14951611 ++c;

1496 break;

            while (UNLIKELY(!c.atEnd() && isTabOrNewline(*c))) {

                fragmentSyntaxViolation(c);

                ++c;

            }

            if (c.atEnd())

                break;

            // Skip the check for tabs which might cause a syntaxViolation.

            // We need to handle them differently with fragmentSyntaxViolation.

            goto CaseFragment;

        }

    }

    switch (state) {

    case State::SchemeStart:

        LOG_FINAL_STATE("SchemeStart");

1503 if (!currentPosition(c) && base.isValid()) {

1628 if (!currentPosition(c) && base.isValid() && !base.m_cannotBeABaseURL) {

            m_url = base;

            return;

        }

@@void URLParser::parse(const CharacterTyp

        RELEASE_ASSERT_NOT_REACHED();

    case State::SpecialRelativeOrAuthority:

        LOG_FINAL_STATE("SpecialRelativeOrAuthority");

1518 copyURLPartsUntil(base, URLPart::QueryEnd);

1643 copyURLPartsUntil(base, URLPart::QueryEnd, c);

        m_url.m_fragmentEnd = m_url.m_queryEnd;

        break;

    case State::PathOrAuthority:

        LOG_FINAL_STATE("PathOrAuthority");

        ASSERT(m_url.m_userStart);

        ASSERT(m_url.m_userStart == currentPosition(c));

1525 ASSERT(m_asci~~iBuffer.last~~() == '/');

1650 ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");

        m_url.m_userStart--;

        m_url.m_userEnd = m_url.m_userStart;

        m_url.m_passwordEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp

        break;

    case State::Relative:

        LOG_FINAL_STATE("Relative");

1538 copyURLPartsUntil(base, URLPart::FragmentEnd);

1663 copyURLPartsUntil(base, URLPart::FragmentEnd, c);

        break;

    case State::RelativeSlash:

        LOG_FINAL_STATE("RelativeSlash");

1542 copyURLPartsUntil(base, URLPart::PortEnd);

1667 copyURLPartsUntil(base, URLPart::PortEnd, c);

        appendToASCIIBuffer('/');

        m_url.m_pathAfterLastSlash = base.m_portEnd + 1;

        m_url.m_pathEnd = m_url.m_pathAfterLastSlash;

@@void URLParser::parse(const CharacterTyp

        break;

    case State::AuthorityOrHost:

        LOG_FINAL_STATE("AuthorityOrHost");

1568 m_url.m_userEnd = currentPosition(c);

1693 m_url.m_userEnd = currentPosition(authorityOrHostBegin);

        m_url.m_passwordEnd = m_url.m_userEnd;

        if (authorityOrHostBegin.atEnd()) {

            m_url.m_hostEnd = m_url.m_userEnd;

@@void URLParser::parse(const CharacterTyp

            failure();

            return;

        }

1702 syntaxViolation(c);

        appendToASCIIBuffer('/');

        m_url.m_pathEnd = m_url.m_portEnd + 1;

        m_url.m_pathAfterLastSlash = m_url.m_pathEnd;

@@void URLParser::parse(const CharacterTyp

            failure();

            return;

        }

1715 syntaxViolation(c);

        appendToASCIIBuffer('/');

        m_url.m_pathEnd = m_url.m_portEnd + 1;

        m_url.m_pathAfterLastSlash = m_url.m_pathEnd;

@@void URLParser::parse(const CharacterTyp

    case State::File:

        LOG_FINAL_STATE("File");

        if (base.isValid() && base.protocolIs("file")) {

1598 copyURLPartsUntil(base, URLPart::QueryEnd);

1725 copyURLPartsUntil(base, URLPart::QueryEnd, c);

15991726 appendToASCIIBuffer(':');

16001727 }

1728 syntaxViolation(c);

        appendToASCIIBuffer("///", 3);

        m_url.m_userStart = currentPosition(c) - 1;

        m_url.m_userEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp

        break;

    case State::FileSlash:

        LOG_FINAL_STATE("FileSlash");

1742 syntaxViolation(c);

1743 m_url.m_userStart = currentPosition(c) + 1;

16141744 appendToASCIIBuffer("//", 2);

1615 m_url.m_userStart = currentPosition(c) - 1;

        m_url.m_userEnd = m_url.m_userStart;

        m_url.m_passwordEnd = m_url.m_userStart;

        m_url.m_hostEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp

    case State::FileHost:

        LOG_FINAL_STATE("FileHost");

        if (authorityOrHostBegin == c) {

1757 syntaxViolation(c);

            appendToASCIIBuffer('/');

            m_url.m_userStart = currentPosition(c) - 1;

            m_url.m_userEnd = m_url.m_userStart;

@@void URLParser::parse(const CharacterTyp

            return;

        }

1646 if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd) == "localhost") {

1776 syntaxViolation(c);

1777 if (equalLettersIgnoringASCIICase(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd), "localhost")) {

            m_asciiBuffer.shrink(m_url.m_passwordEnd);

            m_url.m_hostEnd = currentPosition(c);

            m_url.m_portEnd = m_url.m_hostEnd;

@@void URLParser::parse(const CharacterTyp

        m_url.m_fragmentEnd = m_url.m_queryEnd;

        break;

    case State::Fragment:

        LOG_FINAL_STATE("Fragment");

        m_url.m_fragmentEnd = currentPosition(c) + m_unicodeFragmentBuffer.size();

        break;

        {

            LOG_FINAL_STATE("Fragment");

            size_t length = m_seenSyntaxViolation ? m_asciiBuffer.size() + m_unicodeFragmentBuffer.size() : c.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));

            m_url.m_fragmentEnd = length;

            break;

        }

16831817 }

16841818

1685 if (!m_seenSyntax~~Err~~or) {

1819 if (LIKELY(!m_seenSyntaxViolation)) {

        m_url.m_string = m_inputString;

        ASSERT(m_asciiBuffer.isEmpty());

        ASSERT(m_unicodeFragmentBuffer.isEmpty());

1689 } else if (m_unicodeFragmentBuffer.isEmpty())

1823 } else if (!m_seenUnicodeFragmentCodePoint) {

1824 ASSERT(m_unicodeFragmentBuffer.isEmpty());

16901825 m_url.m_string = String::adopt(WTFMove(m_asciiBuffer));

1691 else {

1826 } else {

16921827 Vector<UChar> buffer;

1693 buffer.reserveInitialCapacity(cu~~rrentPosition~~(c) + m_unicodeFragmentBuffer.size());

1828 buffer.reserveInitialCapacity(m_asciiBuffer.size() + m_unicodeFragmentBuffer.size());

        buffer.appendVector(m_asciiBuffer);

        buffer.appendVector(m_unicodeFragmentBuffer);

        m_url.m_string = String::adopt(WTFMove(buffer));

@@void URLParser::parse(const CharacterTyp

template<typename CharacterType>

void URLParser::parseAuthority(CodePointIterator<CharacterType> iterator)

{

1706 if (iterator.atEnd()) {

1841 if (UNLIKELY(iterator.atEnd())) {

1842 syntaxViolation(iterator);

        m_url.m_userEnd = currentPosition(iterator);

        m_url.m_passwordEnd = m_url.m_userEnd;

        return;

    }

1711 for (; !iterator.atEnd(); ++iterator) {

1847 auto authorityOrHostBegin = iterator;

1848 for (; !iterator.atEnd(); incrementIteratorSkippingTabsAndNewlines(iterator)) {

17121849 if (*iterator == ':') {

1713 ++iterator;

17141850 m_url.m_userEnd = currentPosition(iterator);

1715 if (iterator.atEnd()) {

            auto iteratorAtColon = iterator;

            incrementIteratorSkippingTabsAndNewlines(iterator, authorityOrHostBegin);

            if (UNLIKELY(iterator.atEnd())) {

                syntaxViolation(iteratorAtColon);

                m_url.m_passwordEnd = m_url.m_userEnd;

                if (m_url.m_userEnd > m_url.m_userStart)

                    appendToASCIIBuffer('@');

@@void URLParser::parseAuthority(CodePoint

            appendToASCIIBuffer(':');

            break;

        }

1724 utf8PercentEncode<isInUserInfoEncodeSet>(*iterator);

1863 utf8PercentEncode<isInUserInfoEncodeSet>(iterator);

17251864 }

1726 for (; !iterator.atEnd(); ++iterator)

1727 utf8PercentEncode<isInUserInfoEncodeSet>(*iterator);

1865 for (; !iterator.atEnd(); incrementIteratorSkippingTabsAndNewlines(iterator))

1866 utf8PercentEncode<isInUserInfoEncodeSet>(iterator);

    m_url.m_passwordEnd = currentPosition(iterator);

    if (!m_url.m_userEnd)

        m_url.m_userEnd = m_url.m_passwordEnd;

@@void URLParser::serializeIPv6(URLParser:

}

template<typename CharacterType>

1827 ~~inline static~~ Optional<uint32_t> parseIPv4Number(CodePointIterator<CharacterType>& iterator)

1966Optional<uint32_t> URLParser::parseIPv4Number(CodePointIterator<CharacterType>& iterator, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition)

{

    // FIXME: Check for overflow.

    enum class State : uint8_t {

@@inline static Optional<uint32_t> parseIP

        }

        switch (state) {

        case State::UnknownBase:

1846 if (*iterator == '0') {

1985 if (UNLIKELY(*iterator == '0')) {

                ++iterator;

                state = State::OctalOrHex;

                break;

@@inline static Optional<uint32_t> parseIP

            state = State::Decimal;

            break;

        case State::OctalOrHex:

1993 syntaxViolation(iteratorForSyntaxViolationPosition);

            if (*iterator == 'x' || *iterator == 'X') {

                ++iterator;

                state = State::Hex;

@@inline static Optional<uint32_t> parseIP

            ++iterator;

            break;

        case State::Octal:

2009 ASSERT(m_seenSyntaxViolation);

            if (*iterator < '0' || *iterator > '7')

                return Nullopt;

            value *= 8;

@@inline static Optional<uint32_t> parseIP

            ++iterator;

            break;

        case State::Hex:

2017 ASSERT(m_seenSyntaxViolation);

            if (!isASCIIHexDigit(*iterator))

                return Nullopt;

            value *= 16;

@@inline static uint64_t pow256(size_t exp

template<typename CharacterType>

Optional<URLParser::IPv4Address> URLParser::parseIPv4Host(CodePointIterator<CharacterType> iterator)

{

2039 auto hostBegin = iterator;

2040

    Vector<uint32_t, 4> items;

    items.reserveInitialCapacity(4);

    while (!iterator.atEnd()) {

        if (items.size() >= 4)

            return Nullopt;

1902 if (auto item = parseIPv4Number(iterator))

2046 if (auto item = parseIPv4Number(iterator, hostBegin))

            items.append(item.value());

        else

            return Nullopt;

@@Optional<URLParser::IPv4Address> URLPars

        if (item > 255)

            return Nullopt;

    }

    if (UNLIKELY(items.size() != 4))

        syntaxViolation(hostBegin);

    IPv4Address ipv4 = items.takeLast();

    for (size_t counter = 0; counter < items.size(); ++counter)

        ipv4 += items[counter] * pow256(3 - counter);

@@Optional<URLParser::IPv4Address> URLPars

template<typename CharacterType>

Optional<URLParser::IPv6Address> URLParser::parseIPv6Host(CodePointIterator<CharacterType> c)

{

    ASSERT(*c == '[');

    auto hostBegin = c;

    incrementIteratorSkippingTabsAndNewlines(c, hostBegin);

    if (c.atEnd())

        return Nullopt;

@@Optional<URLParser::IPv6Address> URLPars

    Optional<size_t> compressPointer;

    if (*c == ':') {

1938 ++c;

2089 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);

        if (c.atEnd())

            return Nullopt;

        if (*c != ':')

            return Nullopt;

1943 ++c;

2094 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);

        ++piecePointer;

        compressPointer = piecePointer;

    }

@@Optional<URLParser::IPv6Address> URLPars

        if (*c == ':') {

            if (compressPointer)

                return Nullopt;

1954 ++c;

2105 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);

            ++piecePointer;

            compressPointer = piecePointer;

            continue;

        }

        uint16_t value = 0;

1960 for (size_t length = 0; length < 4; length++) {

2111 size_t length = 0;

2112 for (; length < 4; length++) {

            if (c.atEnd())

                break;

            if (!isASCIIHexDigit(*c))

                break;

            value = value * 0x10 + toASCIIHexValue(*c);

1966 ++c;

2118 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);

19672119 }

        if (UNLIKELY(length > 1 && !value))

            syntaxViolation(hostBegin);

        address[piecePointer++] = value;

        if (c.atEnd())

            break;

        if (*c != ':')

            return Nullopt;

1973 ++c;

2128 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);

    }

    if (!c.atEnd()) {

@@Optional<URLParser::IPv6Address> URLPars

                    return Nullopt;

                else

                    value = value.value() * 10 + number;

1992 ++c;

2147 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);

                if (c.atEnd())

                    return Nullopt;

                if (value.value() > 255)

@@Optional<URLParser::IPv6Address> URLPars

            if (dotsSeen == 1 || dotsSeen == 3)

                piecePointer++;

            if (!c.atEnd())

2004 ++c;

2159 incrementIteratorSkippingTabsAndNewlines(c, hostBegin);

            if (dotsSeen == 3 && !c.atEnd())

                return Nullopt;

            dotsSeen++;

@@Optional<URLParser::IPv6Address> URLPars

            std::swap(address[piecePointer--], address[compressPointer.value() + swaps-- - 1]);

    } else if (piecePointer != 8)

        return Nullopt;

    Optional<size_t> possibleCompressPointer = findLongestZeroSequence(address);

    if (possibleCompressPointer)

        possibleCompressPointer.value()++;

    if (UNLIKELY(compressPointer != possibleCompressPointer))

        syntaxViolation(hostBegin);

    return address;

}

@@inline static bool hasInvalidDomainChara

template<typename CharacterType>

bool URLParser::parsePort(CodePointIterator<CharacterType>& iterator)

{

    ASSERT(*iterator == ':');

    auto colonIterator = iterator;

    incrementIteratorSkippingTabsAndNewlines(iterator, colonIterator);

21082273 uint32_t port = 0;

2109 if (iterator.atEnd()) {

2110 m_url.m_portEnd = currentPosition(iterator);

    if (UNLIKELY(iterator.atEnd())) {

        m_url.m_portEnd = currentPosition(colonIterator);

        syntaxViolation(colonIterator);

21112277 return true;

21122278 }

2113 appendToASCIIBuffer(':');

21142279 for (; !iterator.atEnd(); ++iterator) {

2115 if (isTabOrNewline(*iterator))

2280 if (UNLIKELY(isTabOrNewline(*iterator))) {

2281 syntaxViolation(colonIterator);

21162282 continue;

2283 }

        if (isASCIIDigit(*iterator)) {

            port = port * 10 + *iterator - '0';

            if (port > std::numeric_limits<uint16_t>::max())

@@bool URLParser::parsePort(CodePointItera

            return false;

    }

    if (isDefaultPort(StringView(m_asciiBuffer.data(), m_url.m_schemeEnd), port)) {

        ASSERT(m_asciiBuffer.last() == ':');

        m_asciiBuffer.shrink(currentPosition(iterator) - 1);

    } else {

    if (UNLIKELY(isDefaultPort(parsedDataView(0, m_url.m_schemeEnd), port)))

        syntaxViolation(colonIterator);

    else {

        appendToASCIIBuffer(':');

        ASSERT(port <= std::numeric_limits<uint16_t>::max());

        appendNumberToASCIIBuffer<uint16_t>(static_cast<uint16_t>(port));

    }

@@bool URLParser::parseHostAndPort(CodePoi

    if (iterator.atEnd())

        return false;

    if (*iterator == '[') {

2143 ++iterator;

        auto ipv6End = iterator;

        while (!ipv6End.atEnd() && *ipv6End != ']')

            ++ipv6End;

        if (auto address = parseIPv6Host(CodePointIterator<CharacterType>(iterator, ipv6End))) {

            serializeIPv6(address.value());

2149 m_url.m_hostEnd = currentPosition(iterator);

21502315 if (!ipv6End.atEnd()) {

2151 ++ipv6End;

2316 incrementIteratorSkippingTabsAndNewlines(ipv6End);

21522317 if (!ipv6End.atEnd() && *ipv6End == ':') {

2153 ++ipv6End;

2318 m_url.m_hostEnd = currentPosition(ipv6End);

21542319 return parsePort(ipv6End);

21552320 }

2156 m_url.m_portEnd = currentPosition(iterator);

2321 m_url.m_hostEnd = currentPosition(ipv6End);

2322 m_url.m_portEnd = m_url.m_hostEnd;

21572323 return true;

21582324 }

2325 m_url.m_hostEnd = currentPosition(ipv6End);

            return true;

        }

    }

2162

2163 if (!m_hostHasPercentOrNonASCII) {

2329

2330 if (LIKELY(!m_hostHasPercentOrNonASCII)) {

        auto hostIterator = iterator;

        for (; !iterator.atEnd(); ++iterator) {

            if (isTabOrNewline(*iterator))

@@bool URLParser::parseHostAndPort(CodePoi

                m_url.m_portEnd = currentPosition(iterator);

                return true;

            }

2180 ++iterator;

            return parsePort(iterator);

        }

        for (; hostIterator != iterator; ++hostIterator) {

2184 if (!isTabOrNewline(*hostIterator))

            if (LIKELY(!isTabOrNewline(*hostIterator))) {

                if (UNLIKELY(isASCIIUpper(*hostIterator)))

                    syntaxViolation(hostIterator);

21852353 appendToASCIIBuffer(toASCIILower(*hostIterator));

2354 } else

2355 syntaxViolation(hostIterator);

21862356 }

21872357 m_url.m_hostEnd = currentPosition(iterator);

        if (!hostIterator.atEnd()) {

            ASSERT(*hostIterator == ':');

            incrementIteratorSkippingTabAndNewLine(hostIterator);

2358 if (!hostIterator.atEnd())

21912359 return parsePort(hostIterator);

2192 }

        m_url.m_portEnd = currentPosition(iterator);

        return true;

    }

2364 syntaxViolation(iterator);

2365

    Vector<LChar, defaultInlineBufferSize> utf8Encoded;

    for (; !iterator.atEnd(); ++iterator) {

        if (isTabOrNewline(*iterator))

@@bool URLParser::parseHostAndPort(CodePoi

            m_url.m_portEnd = currentPosition(iterator);

            return true;

        }

2226 ++iterator;

        return parsePort(iterator);

    }

    appendToASCIIBuffer(asciiDomainCharacters, asciiDomainValue.size());

    m_url.m_hostEnd = currentPosition(iterator);

    if (!iterator.atEnd()) {

        ASSERT(*iterator == ':');

        incrementIteratorSkippingTabAndNewLine(iterator);

2400 if (!iterator.atEnd())

22352401 return parsePort(iterator);

2236 }

    m_url.m_portEnd = currentPosition(iterator);

    return true;

}

@@inline static void serializeURLEncodedFo

            || (byte >= 0x30 && byte <= 0x39)

            || (byte >= 0x41 && byte <= 0x5A)

            || byte == 0x5F

2284 || (byte >= 0x61 && byte <= 0x7A))

2449 || (byte >= 0x61 && byte <= 0x7A)) // FIXME: Put these in the characterClassTable to avoid branches.

            output.append(byte);

        else

            percentEncodeByte(byte, output);

@@bool URLParser::allValuesEqual(const URL

}

bool URLParser::internalValuesConsistent(const URL& url)

2352 {

2517{

    return url.m_schemeEnd <= url.m_userStart

        && url.m_userStart <= url.m_userEnd

        && url.m_userEnd <= url.m_passwordEnd

206412

Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp

@@TEST_F(URLParserTest, Basic)

    checkURL("http://[0:f::f:f:0:0]", {"http", "", "", "[0:f::f:f:0:0]", 0, "/", "", "", "http://[0:f::f:f:0:0]/"});

    checkURL("http://[0:f:0:0:f::]", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});

    checkURL("http://[::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});

    checkURL("http://[0:f:0:0:f::]:", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});

    checkURL("http://[0:f:0:0:f::]:\t", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});

    checkURL("http://[0:f:0:0:f::]\t:", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});

    checkURL("http://\t[::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});

    checkURL("http://[\t::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});

    checkURL("http://[:\t:f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});

    checkURL("http://[::\tf:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});

    checkURL("http://[::f\t:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});

    checkURL("http://[::f:\t0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});

    checkURL("http://example.com/path1/path2/.", {"http", "", "", "example.com", 0, "/path1/path2/", "", "", "http://example.com/path1/path2/"});

    checkURL("http://example.com/path1/path2/..", {"http", "", "", "example.com", 0, "/path1/", "", "", "http://example.com/path1/"});

    checkURL("http://example.com/path1/path2/./path3", {"http", "", "", "example.com", 0, "/path1/path2/path3", "", "", "http://example.com/path1/path2/path3"});

@@TEST_F(URLParserTest, Basic)

    checkURL("http://host/a%20B", {"http", "", "", "host", 0, "/a%20B", "", "", "http://host/a%20B"});

    checkURL("http://host?q=@ <>!#fragment", {"http", "", "", "host", 0, "/", "q=@%20%3C%3E!", "fragment", "http://host/?q=@%20%3C%3E!#fragment"});

    checkURL("http://user:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});

    checkURL("http://user:@\thost", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});

    checkURL("http://user\t:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});

    checkURL("http://use\tr:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});

194206 checkURL("http://127.0.0.1:10100/path", {"http", "", "", "127.0.0.1", 10100, "/path", "", "", "http://127.0.0.1:10100/path"});

195207 checkURL("http://127.0.0.1:/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});

    checkURL("http://127.0.0.1\t:/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});

    checkURL("http://127.0.0.1:\t/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});

    checkURL("http://127.0.0.1:/\tpath", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});

    checkURL("http://127.0.0.1:123", {"http", "", "", "127.0.0.1", 123, "/", "", "", "http://127.0.0.1:123/"});

    checkURL("http://127.0.0.1:", {"http", "", "", "127.0.0.1", 0, "/", "", "", "http://127.0.0.1/"});

    checkURL("http://[0:f::f:f:0:0]:123/path", {"http", "", "", "[0:f::f:f:0:0]", 123, "/path", "", "", "http://[0:f::f:f:0:0]:123/path"});

    checkURL("http://[0:f::f:f:0:0]:123", {"http", "", "", "[0:f::f:f:0:0]", 123, "/", "", "", "http://[0:f::f:f:0:0]:123/"});

    checkURL("http://[0:f:0:0:f:\t:]:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});

    checkURL("http://[0:f:0:0:f::\t]:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});

    checkURL("http://[0:f:0:0:f::]\t:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});

    checkURL("http://[0:f:0:0:f::]:\t123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});

    checkURL("http://[0:f:0:0:f::]:1\t23", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});

    checkURL("http://[0:f::f:f:0:0]:/path", {"http", "", "", "[0:f::f:f:0:0]", 0, "/path", "", "", "http://[0:f::f:f:0:0]/path"});

    checkURL("http://[0:f::f:f:0:0]:", {"http", "", "", "[0:f::f:f:0:0]", 0, "/", "", "", "http://[0:f::f:f:0:0]/"});

    checkURL("http://host:10100/path", {"http", "", "", "host", 10100, "/path", "", "", "http://host:10100/path"});

@@TEST_F(URLParserTest, Basic)

    checkURL("sc:/pa/", {"sc", "", "", "", 0, "/pa/", "", "", "sc:/pa/"});

    checkURL("notspecial:/notuser:notpassword@nothost", {"notspecial", "", "", "", 0, "/notuser:notpassword@nothost", "", "", "notspecial:/notuser:notpassword@nothost"});

    checkURL("sc://pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});

    checkURL("sc://\tpa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});

    checkURL("sc:/\t/pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});

    checkURL("sc:\t//pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});

    checkURL("http://host   \a   ", {"http", "", "", "host", 0, "/", "", "", "http://host/"});

    checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});

    checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});

@@TEST_F(URLParserTest, Basic)

    checkURL("http://256/", {"http", "", "", "256", 0, "/", "", "", "http://256/"});

    checkURL("http://256./", {"http", "", "", "256.", 0, "/", "", "", "http://256./"});

    checkURL("http://123.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});

243 checkURL("http://123\t.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});

244 checkURL("http://123.\t256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});

    checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});

    checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});

    checkURL("notspecial:/", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});

    checkURL("data:image/png;base64,encoded-data-follows-here", {"data", "", "", "", 0, "image/png;base64,encoded-data-follows-here", "", "", "data:image/png;base64,encoded-data-follows-here"});

    checkURL("data:image/png;base64,encoded/data-with-slash", {"data", "", "", "", 0, "image/png;base64,encoded/data-with-slash", "", "", "data:image/png;base64,encoded/data-with-slash"});

250 checkURL("about:~", {"about", "", "", "", 0, "~", "", "", "about:~"});

    // This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,

    // and Firefox fails the web platform test differently. Maybe the web platform test ought to be changed.

@@TEST_F(URLParserTest, ParseRelative)

    checkRelativeURL("\\@", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/@", "", "", "http://example.org/@"});

    checkRelativeURL("/path3", "http://user@example.org/path1/path2", {"http", "user", "", "example.org", 0, "/path3", "", "", "http://user@example.org/path3"});

    checkRelativeURL("", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});

316 checkRelativeURL("\t", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});

317 checkRelativeURL(" ", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});

    checkRelativeURL("  \a  \t\n", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});

    checkRelativeURL(":foo.com\\", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/:foo.com/", "", "", "http://example.org/foo/:foo.com/"});

    checkRelativeURL("http:/example.com/", "about:blank", {"http", "", "", "example.com", 0, "/", "", "", "http://example.com/"});

@@TEST_F(URLParserTest, ParseRelative)

    checkRelativeURL("notspecial:/", "http://host", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});

    checkRelativeURL("foo:/", "http://example.org/foo/bar", {"foo", "", "", "", 0, "/", "", "", "foo:/"});

    checkRelativeURL("://:0/", "http://webkit.org/", {"http", "", "", "webkit.org", 0, "/://:0/", "", "", "http://webkit.org/://:0/"});

345 checkRelativeURL(String(), "http://webkit.org/", {"http", "", "", "webkit.org", 0, "/", "", "", "http://webkit.org/"});

    // The checking of slashes in SpecialAuthoritySlashes needed to get this to pass contradicts what is in the spec,

    // but it is included in the web platform tests.

@@TEST_F(URLParserTest, ParserDifferences)

    checkURLDifferences(utf16String(u"http://host/path#💩\t💩"),

        {"http", "", "", "host", 0, "/path", "", utf16String(u"💩💩"), utf16String(u"http://host/path#💩💩")},

        {"http", "", "", "host", 0, "/path", "", "%F0%9F%92%A9%F0%9F%92%A9", "http://host/path#%F0%9F%92%A9%F0%9F%92%A9"});

    checkURLDifferences("http://%48OsT",

        {"http", "", "", "host", 0, "/", "", "", "http://host/"},

        {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});

    checkURLDifferences("http://h%4FsT",

        {"http", "", "", "host", 0, "/", "", "", "http://host/"},

        {"http", "", "", "h%4fst", 0, "/", "", "", "http://h%4fst/"});

    checkURLDifferences("http://h%4fsT",

        {"http", "", "", "host", 0, "/", "", "", "http://host/"},

        {"http", "", "", "h%4fst", 0, "/", "", "", "http://h%4fst/"});

    checkURLDifferences("http://h%6fsT",

        {"http", "", "", "host", 0, "/", "", "", "http://host/"},

        {"http", "", "", "h%6fst", 0, "/", "", "", "http://h%6fst/"});

    checkURLDifferences("http://host/`",

        {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},

        {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});

    checkURLDifferences("aA://",

        {"aa", "", "", "", 0, "/", "", "", "aa:///"},

        {"aa", "", "", "", 0, "//", "", "", "aa://"});

    checkURLDifferences("A://",

        {"a", "", "", "", 0, "/", "", "", "a:///"},

        {"a", "", "", "", 0, "//", "", "", "a://"});

}

TEST_F(URLParserTest, DefaultPort)

{

    checkURL("FtP://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("ftp://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("f\ttp://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("f\ttp://host\t:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("f\ttp://host:\t21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("f\ttp://host:2\t1/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("f\ttp://host:21\t/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("ftp://host\t:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("ftp://host:\t21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("ftp://host:2\t1/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("ftp://host:21\t/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});

    checkURL("ftp://host:22/", {"ftp", "", "", "host", 22, "/", "", "", "ftp://host:22/"});

    checkURLDifferences("ftp://host:21",

        {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"},

@@TEST_F(URLParserTest, DefaultPort)

    checkURLDifferences("unknown://host:81",

        {"unknown", "", "", "host", 81, "/", "", "", "unknown://host:81/"},

        {"unknown", "", "", "host", 81, "", "", "", "unknown://host:81"});

    checkURLDifferences("http://%48OsT",

        {"http", "", "", "host", 0, "/", "", "", "http://host/"},

        {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});

    checkURLDifferences("http://host/`",

        {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},

        {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});

}

static void shouldFail(const String& urlString)

@@TEST_F(URLParserTest, ParserFailures)

    shouldFail("  \a  ");

    shouldFail("");

    shouldFail(String());

750 shouldFail("", "about:blank");

751 shouldFail(String(), "about:blank");

    shouldFail("http://127.0.0.1:abc");

    shouldFail("http://host:abc");

    shouldFail("http://a:@", "about:blank");

@@TEST_F(URLParserTest, ParserFailures)

    shouldFail("://:0/");

    shouldFail("://:0/", "");

    shouldFail("://:0/", "about:blank");

781 shouldFail("about~");

}

// These are in the spec but not in the web platform tests.

206412

Source/WebCore/ChangeLog

Source/WebCore/platform/URL.cpp

Source/WebCore/platform/URL.h

Source/WebCore/platform/URLParser.cpp

Source/WebCore/platform/URLParser.h

Tools/ChangeLog

Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp