WebKit Bugzilla
Attachment 342430 Details for
Bug 186359
: Add script to update web-platform-tests TestExpectations after import
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch
bug-186359-20180611110217.patch (text/plain), 20.83 KB, created by
Brendan McLoughlin
on 2018-06-11 08:02:18 PDT
(
hide
)
Description:
Patch
Filename:
MIME Type:
Creator:
Brendan McLoughlin
Created:
2018-06-11 08:02:18 PDT
Size:
20.83 KB
patch
obsolete
>Subversion Revision: 232551 >diff --git a/Tools/ChangeLog b/Tools/ChangeLog >index 357f65e9fb45266d3292a74b659553ae90fd1ca9..3a07762785fd345b47cb48017790baa28d33b1b8 100644 >--- a/Tools/ChangeLog >+++ b/Tools/ChangeLog >@@ -1,3 +1,42 @@ >+2018-06-06 Brendan McLoughlin <brendan@bocoup.com> >+ >+ WIP Add script to update web-platform-tests expectations after import >+ https://bugs.webkit.org/show_bug.cgi?id=186359 >+ >+ Reviewed by NOBODY (OOPS!). >+ >+ * Scripts/update-w3c-tests: Added. >+ * Scripts/webkitpy/layout_tests/controllers/manager.py: >+ (Manager._upload_json_files): >+ * Scripts/webkitpy/w3c/test_updater.py: Added. >+ (configure_logging): >+ (configure_logging.LogHandler): >+ (configure_logging.LogHandler.format): >+ (render_expectations): >+ (remove_test_expectation): >+ (flatten_path): >+ (pre_process_tests): >+ (main): >+ (TestExpecationUpdater): >+ (TestExpecationUpdater.__init__): >+ (TestExpecationUpdater.do_update): >+ (TestExpecationUpdater.update_expectation): >+ (TestExpecationUpdater.missing_test): >+ (TestExpecationUpdater.failing_ref_test): >+ (TestExpecationUpdater.reset_testharness_test): >+ (TestExpecationUpdater.failing_testharness_test): >+ (TestExpecationUpdater.crash_test): >+ (TestExpecationUpdater.timeout_test): >+ (TestExpecationUpdater.resolve_flaky_test): >+ (TestExpecationUpdater.unexpected_pass_test): >+ (TestExpecationUpdater.run_webkit_tests): >+ (TestExpecationUpdater.extract_test_result): >+ (TestExpecationUpdater.update_test_expectation): >+ (TestExpecationUpdater.remove_test_expectation): >+ (TestExpecationUpdater.test_expectations_path): >+ (TestExpecationUpdater.results_file): >+ (results_match_expectation): >+ > 2018-06-06 Brent Fulgham <bfulgham@apple.com> > > Adjust compile and runtime flags to match shippable state of features (Part 2) >diff --git a/Tools/Scripts/update-w3c-test-expectations b/Tools/Scripts/update-w3c-test-expectations >new file mode 100755 >index 0000000000000000000000000000000000000000..96fc9dd9dd24a92a97c685308e941704a147cd0a >--- /dev/null >+++ b/Tools/Scripts/update-w3c-test-expectations >@@ -0,0 +1,35 @@ >+#!/usr/bin/env python >+ >+# Copyright (C) 2018 Bocoup LLC. All rights reserved. >+# >+# Redistribution and use in source and binary forms, with or without >+# modification, are permitted provided that the following conditions >+# are met: >+# >+# 1. Redistributions of source code must retain the above >+# copyright notice, this list of conditions and the following >+# disclaimer. >+# 2. Redistributions in binary form must reproduce the above >+# copyright notice, this list of conditions and the following >+# disclaimer in the documentation and/or other materials >+# provided with the distribution. >+# >+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY >+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE >+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR >+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE >+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, >+# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, >+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR >+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR >+# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF >+# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF >+# SUCH DAMAGE. >+ >+import sys >+ >+from webkitpy.w3c import test_updater >+ >+ >+sys.exit(test_updater.main(sys.argv[1:], sys.stdout, sys.stderr)) >diff --git a/Tools/Scripts/webkitpy/layout_tests/controllers/manager.py b/Tools/Scripts/webkitpy/layout_tests/controllers/manager.py >index cb10dba4c259e7d3f29b8c40d4c3246b086bac17..6e8f092ec619604340eee729366b82c19adab282 100644 >--- a/Tools/Scripts/webkitpy/layout_tests/controllers/manager.py >+++ b/Tools/Scripts/webkitpy/layout_tests/controllers/manager.py >@@ -456,8 +456,6 @@ class Manager(object): > # The tools use the version we uploaded to the results server anyway. > self._filesystem.remove(times_json_path) > self._filesystem.remove(incremental_results_path) >- if results_including_passes: >- self._filesystem.remove(results_json_path) > > def upload_results(self, results_json_path, start_time, end_time): > if not self._options.results_server_host: >@@ -541,7 +539,7 @@ class Manager(object): > def _print_expectation_line_for_test(self, format_string, test): > line = self._expectations.model().get_expectation_line(test) > print(format_string.format(test, line.expected_behavior, self._expectations.readable_filename_and_line_number(line), line.original_string or '')) >- >+ > def _print_expectations_for_subset(self, device_class, test_col_width, tests_to_run, tests_to_skip={}): > format_string = '{{:{width}}} {{}} {{}} {{}}'.format(width=test_col_width) > if tests_to_skip: >diff --git a/Tools/Scripts/webkitpy/w3c/test_updater.py b/Tools/Scripts/webkitpy/w3c/test_updater.py >new file mode 100644 >index 0000000000000000000000000000000000000000..f8036e7ab187ecfff402ae30b5770dbd371221e3 >--- /dev/null >+++ b/Tools/Scripts/webkitpy/w3c/test_updater.py >@@ -0,0 +1,383 @@ >+#!/usr/bin/env python >+ >+# Copyright (C) 2018 Bocoup LLC All rights reserved. >+# >+# Redistribution and use in source and binary forms, with or without >+# modification, are permitted provided that the following conditions >+# are met: >+# >+# 1. Redistributions of source code must retain the above >+# copyright notice, this list of conditions and the following >+# disclaimer. >+# 2. Redistributions in binary form must reproduce the above >+# copyright notice, this list of conditions and the following >+# disclaimer in the documentation and/or other materials >+# provided with the distribution. >+# >+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY >+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE >+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR >+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE >+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, >+# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, >+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR >+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR >+# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF >+# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF >+# SUCH DAMAGE. >+ >+""" >+ >+ This run-webkit-tests and analyzes the results then it attempts to update the >+ -expected.txt or the root TestExpecations file for failing test. This script is >+ intended to be used after runnning Tools/Scripts/import-w3c-tests to assist in >+ creating a new test expectation baseline after importing new tests from >+ web-platform-tests. >+ >+ The script will update the expectations files according to the following rules: >+ >+ Initially the script runs Tools/Script/run-webkit-tests on the specified tests >+ or directories to generate a baseline. >+ >+ - Missing tests will be re-run to ensure they are not flaky. >+ >+ - Crashing or Timing out tests will be added to the root TestExpecations >+ files with [ Skip ] directive. >+ >+ - Tests that pass and are expected to fail will remove the failing >+ directive from the TestExpecations file and will be run again. >+ >+ - Failing ref tests will be added to the root TestExpecations files with >+ [ ImageOnlyFailure ] directive. >+ >+ - Failing testharness tests will be run again with the --reset-results flag >+ to reset the -expected.txt file. If testharness tests fail multiple times >+ they will be added to the root TestExpecations files with [ Failure ] >+ directive. >+ >+ - Flaky tests will be added to the root TestExpecations files with all of >+ their failure state directives. >+""" >+ >+import argparse >+import json >+from subprocess import Popen >+import io >+import os >+import re >+import sys >+from sets import Set >+import logging >+from collections import defaultdict >+ >+from webkitpy.layout_tests.run_webkit_tests import parse_args >+from webkitpy.layout_tests.run_webkit_tests import main as run_webkit_tests >+from webkitpy.common.host import Host >+from webkitpy.layout_tests.models.test_expectations import TestExpectationParser >+ >+EXPECTATION_MAP = TestExpectationParser._inverted_expectation_tokens >+_log = logging.getLogger(__name__) >+ >+ >+def configure_logging(): >+ class LogHandler(logging.StreamHandler): >+ >+ def format(self, record): >+ if record.levelno > logging.INFO: >+ return "%s: %s" % (record.levelname, record.getMessage()) >+ return record.getMessage() >+ >+ logger = logging.getLogger(__name__) >+ logger.setLevel(logging.INFO) >+ handler = LogHandler() >+ handler.setLevel(logging.INFO) >+ logger.addHandler(handler) >+ return handler >+ >+ >+# TODO >+# Add documentation of argorithm for updating test expectations >+# cleanup code to follow webkitpy standards >+# add unittests >+ >+def main(_argv, _stdout, _stderr): >+ configure_logging() >+ >+ test_updater = TestExpectationUpdater(Host(), _argv) >+ test_updater.do_update() >+ >+ >+class TestExpectationUpdater(object): >+ def __init__(self, host, args): >+ self._host = host >+ options, path_args = parse_args(args) >+ self._options = options >+ option_args = list(Set(args).difference(Set(path_args))) >+ # preserve original order of arguments >+ option_args = [arg for arg in args if arg in option_args] >+ >+ self._option_args = option_args >+ self._base_test = path_args >+ self._port = host.port_factory.get(options.platform, options) >+ >+ def do_update(self): >+ # Run tests once to get a baseline >+ self._run_webkit_tests(self._base_test) >+ data = self._load_results() >+ tests = self._pre_process_tests(data['tests']) >+ test_dict = self._sort_tests(tests) >+ missing_tests = test_dict['MISSING'] >+ reset_tests = test_dict['RESET'] >+ crash_tests = test_dict['CRASH'] >+ timeout_tests = test_dict['TIMEOUT'] >+ image_tests = test_dict['IMAGE'] >+ unexpected_pass_tests = test_dict['UNEXPECTED_PASS'] >+ testharness_tests = test_dict['TEXT'] >+ flaky_tests = test_dict['FLAKY'] >+ >+ if missing_tests: >+ _log.info('Rechecking missing tests to ensure they are not flaky') >+ for test in missing_tests: >+ self._log_progress(test, missing_tests) >+ self._missing_test(test) >+ >+ if reset_tests: >+ _log.info('Resetting results for %s failing testharness tests' % len(reset_tests)) >+ for test in reset_tests: >+ self._log_progress(test, reset_tests) >+ self._reset_testharness_test(test) >+ >+ if crash_tests: >+ _log.info('Updating TestExpectations to skip %s crash tests' % len(crash_tests)) >+ for test in crash_tests: >+ self._log_progress(test, crash_tests) >+ self._crash_test(test) >+ >+ if timeout_tests: >+ _log.info('Updating TestExpectations to skip %s timeout tests' % len(timeout_tests)) >+ for test in timeout_tests: >+ self._log_progress(test, timeout_tests) >+ self._timeout_test(test) >+ >+ if image_tests: >+ _log.info('Updating TestExpectations to ImageOnlyFailure for %s failing ref tests' % len(image_tests)) >+ for test in image_tests: >+ self._log_progress(test, image_tests) >+ self._failing_ref_test(test) >+ >+ if unexpected_pass_tests: >+ _log.info('Updating TestExpectations to remove old expectations for %s passing tests' % len(unexpected_pass_tests)) >+ for test in unexpected_pass_tests: >+ self._log_progress(test, unexpected_pass_tests) >+ self._unexpected_pass_test(test) >+ >+ if testharness_tests: >+ _log.info('Updating TestExpectations to add Failure for %s failing testharness tests' % len(testharness_tests)) >+ for test in testharness_tests: >+ self._log_progress(test, testharness_tests) >+ self._failing_testharness_test(test) >+ >+ if flaky_tests: >+ _log.info('Updating TestExpectations to add expectations for %s flaky tests' % len(flaky_tests)) >+ for test in flaky_tests: >+ self._log_progress(test, flaky_tests) >+ self._flaky_test(test) >+ >+ def _update_expectation_for_failing_test(self, test, post_reset_result=False, previous_result=None): >+ _log.debug(test) >+ failure_type = self._classify_test(test, post_reset_result=post_reset_result, previous_result=previous_result) >+ >+ if failure_type == 'FLAKY': >+ return self._flaky_test(test, previous_result) >+ if failure_type == 'MISSING': >+ return self._missing_test(test) >+ if failure_type == 'UNEXPECTED_PASS': >+ self._unexpected_pass_test(test) >+ if failure_type == 'CRASH': >+ return self._crash_test(test) >+ if failure_type == 'TIMEOUT': >+ return self._timeout_test(test) >+ if failure_type == 'IMAGE': >+ return self._failing_ref_test(test) >+ if failure_type == 'TEXT': >+ return self._failing_testharness_test(test) >+ if failure_type == 'RESET': >+ return self._reset_testharness_test(test) >+ >+ def _flaky_test(self, test, previous_result=None): >+ expectations = test['actual'].split(' ') >+ if previous_result: >+ expectations = expectations + previous_result['actual'].split(' ') >+ expectations = list(Set(expectations + ['FAIL'])) >+ self._update_test_expectation(test['name'], self._render_expectations(expectations)) >+ self._run_webkit_tests([test['name']]) >+ result = self._extract_failing_test_result(test) >+ if result: >+ self._update_expectation_for_failing_test(result, previous_result=test) >+ >+ def _missing_test(self, test): >+ self._run_webkit_tests([test['name']]) >+ result = self._extract_failing_test_result(test) >+ if result: >+ # Test is still failing, attempt to re-classify >+ self._update_expectation_for_failing_test(result) >+ >+ def _unexpected_pass_test(self, test): >+ self._remove_test_expectation(test['name']) >+ >+ self._run_webkit_tests([test['name']]) >+ result = self._extract_failing_test_result(test) >+ if result: >+ self._update_expectation_for_failing_test(result, previous_result=test) >+ >+ def _crash_test(self, test): >+ self._update_test_expectation(test['name'], 'Skip') >+ >+ def _timeout_test(self, test): >+ self._update_test_expectation(test['name'], 'Skip') >+ >+ def _failing_ref_test(self, test): >+ self._update_test_expectation(test['name'], 'ImageOnlyFailure') >+ self._run_webkit_tests([test['name']]) >+ result = self._extract_failing_test_result(test) >+ if result: >+ # Test is still failing, attempt to re-classify >+ self._update_expectation_for_failing_test(result, previous_result=test) >+ >+ def _reset_testharness_test(self, test): >+ self._run_webkit_tests([test['name']], reset_results=True) >+ self._run_webkit_tests([test['name']]) >+ result = self._extract_failing_test_result(test) >+ if result: >+ self._update_expectation_for_failing_test(result, post_reset_result=True) >+ >+ def _failing_testharness_test(self, test): >+ self._update_test_expectation(test['name'], 'Failure') >+ self._run_webkit_tests([test['name']]) >+ result = self._extract_failing_test_result(test) >+ if result: >+ self._update_expectation_for_failing_test(result, previous_result=test) >+ >+ def _run_webkit_tests(self, test_files, reset_results=False): >+ args = ['Tools/Scripts/run-webkit-tests'] + self._option_args >+ if reset_results: >+ args.append('--reset-results') >+ >+ args = args + test_files >+ >+ _log.info('Running webkit tests for: %s' % test_files) >+ return run_webkit_tests(args, sys.stdout, sys.stderr) >+ >+ def _test_expectations_path(self): >+ return self._port.path_to_generic_test_expectations_file() >+ >+ def _load_results(self): >+ with open(self._results_file()) as f: >+ results = f.read() >+ # FIXME Look into removing this callback from layout_tests.controllers.manager >+ results = re.sub('^ADD_RESULTS\(', '', results) >+ results = re.sub('\);$', '', results) >+ return json.loads(results) >+ >+ def _results_file(self): >+ options = self._options >+ return self._host.filesystem.join(options.build_directory, options.configuration, 'layout-test-results/full_results.json') >+ >+ def _classify_test(self, test, post_reset_result=False, previous_result=None): >+ if test.get('report') == 'FLAKY' or previous_result: >+ return 'FLAKY' >+ if test.get('report') == 'MISSING': >+ return 'MISSING' >+ if test.get('report') == 'REGRESSION' and test.get('expected') == 'CRASH': >+ return 'UNEXPECTED_PASS' >+ if test.get('actual') == 'CRASH': >+ return 'CRASH' >+ if test.get('actual') == 'TIMEOUT': >+ return 'TIMEOUT' >+ if test.get('actual') == 'PASS': >+ return 'UNEXPECTED_PASS' >+ if test.get('actual') == 'IMAGE': >+ return 'IMAGE' >+ if test.get('actual') == 'TEXT MISSING': >+ return 'MISSING' >+ if test.get('actual') == 'TEXT' and post_reset_result: >+ return 'TEXT' >+ if test.get('actual') == 'TEXT IMAGE+TEXT': >+ return 'TEXT' >+ if test.get('actual') == 'TEXT': >+ return 'RESET' >+ raise NotImplementedError('The test updater decision engine could not figure out how to handle test: %s' % json.dumps(test)) >+ >+ def _render_expectations(self, failures): >+ return ' '.join([EXPECTATION_MAP[failure] for failure in failures]) >+ >+ def _update_test_expectation(self, test, expectation): >+ self._remove_test_expectation(test) >+ with open(self._test_expectations_path(), 'a') as myfile: >+ _log.info('Updating TestExpectations %s [ %s ]' % (test, expectation)) >+ myfile.write('\n%s [ %s ]\n' % (test, expectation)) >+ >+ def _remove_test_expectation(self, test_name): >+ for path in self._port.expectations_files(): >+ if os.path.isfile(path): >+ self._remove_test_expectation_from_path(path, test_name) >+ >+ def _remove_test_expectation_from_path(self, expectation_file, test_name): >+ with io.open(expectation_file, 'r', encoding="utf-8") as fd: >+ lines = fd.readlines() >+ >+ with io.open(expectation_file, 'w', encoding="utf-8") as fd: >+ for line in lines: >+ if test_name not in line: >+ fd.write(line) >+ >+ def _extract_failing_test_result(self, test): >+ data = self._load_results() >+ tests = self._pre_process_tests(data['tests']) >+ matching_tests = [t for t in tests if t['name'] == test['name']] >+ >+ if len(matching_tests) and not self._results_match_expectation(matching_tests[0]): >+ return matching_tests[0] >+ else: >+ return None >+ >+ def _sort_tests(self, tests): >+ test_dict = defaultdict(list) >+ for test in tests: >+ test_dict[self._classify_test(test)].append(test) >+ return test_dict >+ >+ def _pre_process_tests(self, test_dict): >+ tests = self._flatten_path(test_dict) >+ processed_tests = [] >+ for file_name, results in tests.items(): >+ results['name'] = file_name >+ processed_tests.append(results) >+ >+ processed_tests = [test for test in processed_tests if not self._results_match_expectation(test)] >+ >+ return processed_tests >+ >+ def _log_progress(self, test, collection): >+ _log.info('%s/%s Processing test %s' % (collection.index(test) + 1, len(collection), test['name'])) >+ >+ def _results_match_expectation(self, result): >+ if 'FAIL' in result['expected'] and result['actual'] == 'TEXT': >+ return True >+ if result['actual'] in result['expected']: >+ return True >+ return False >+ >+ def _flatten_path(self, obj): >+ to_return = {} >+ for k, v in obj.items(): >+ if 'expected' in v: >+ # terminary node >+ to_return[k] = v >+ pass >+ else: >+ flat_object = self._flatten_path(v) >+ for k2, v2 in flat_object.items(): >+ to_return[k + '/' + k2] = v2 >+ return to_return
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Formatted Diff
|
Diff
Attachments on
bug 186359
:
342067
|
342081
|
342161
|
342162
|
342430
|
342464
|
342509