scripts/slave/recipe_modules/auto_bisect/bisector.py - Issue 1573293002: Change auto_bisect to post results to perf dashboard.

Side by Side Diff: scripts/slave/recipe_modules/auto_bisect/bisector.py

Issue 1573293002: Change auto_bisect to post results to perf dashboard. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master

Patch Set: . Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import json	5 import json

6 import re	6 import re

7 import time	7 import time

	8 import urllib

8	9

9 from . import bisect_results

10 from . import depot_config	10 from . import depot_config

11 from . import revision_state	11 from . import revision_state

12	12

13 _DEPS_SHA_PATCH = """	13 _DEPS_SHA_PATCH = """

14 diff --git DEPS.sha DEPS.sha	14 diff --git DEPS.sha DEPS.sha

15 new file mode 100644	15 new file mode 100644

16 --- /dev/null	16 --- /dev/null

17 +++ DEPS.sha	17 +++ DEPS.sha

18 @@ -0,0 +1 @@	18 @@ -0,0 +1 @@

19 +%(deps_sha)s	19 +%(deps_sha)s

(...skipping 27 matching lines...) Expand all Loading...
47 # difference is established.	47 # difference is established.

48 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60	48 REGRESSION_CHECK_TIMEOUT = 2 * 60 * 60

49 # If we reach this number of samples on the reference range and have not	49 # If we reach this number of samples on the reference range and have not

50 # achieved statistical significance, bail.	50 # achieved statistical significance, bail.

51 MAX_REQUIRED_SAMPLES = 50	51 MAX_REQUIRED_SAMPLES = 50

52	52

53 # Significance level to use for determining difference between revisions via	53 # Significance level to use for determining difference between revisions via

54 # hypothesis testing.	54 # hypothesis testing.

55 SIGNIFICANCE_LEVEL = 0.01	55 SIGNIFICANCE_LEVEL = 0.01

56	56

	57 _FAILED_INITIAL_CONFIDENCE_ABORT_REASON = (

	58 'The metric values for the initial "good" and "bad" revisions '

	59 'do not represent a clear regression.')

	60

	61 _DIRECTION_OF_IMPROVEMENT_ABORT_REASON = (

	62 'The metric values for the initial "good" and "bad" revisions match the '

	63 'expected direction of improvement. Thus, likely represent an improvement '

	64 'and not a regression.')

	65

57	66

58 class Bisector(object):	67 class Bisector(object):

59 """This class abstracts an ongoing bisect (or n-sect) job."""	68 """This class abstracts an ongoing bisect (or n-sect) job."""

60	69

61 def __init__(self, api, bisect_config, revision_class, init_revisions=True):	70 def __init__(self, api, bisect_config, revision_class, init_revisions=True):

62 """Initializes the state of a new bisect job from a dictionary.	71 """Initializes the state of a new bisect job from a dictionary.

63	72

64 Note that the initial good_rev and bad_rev MUST resolve to a commit position	73 Note that the initial good_rev and bad_rev MUST resolve to a commit position

65 in the chromium repo.	74 in the chromium repo.

66 """	75 """

(...skipping 465 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
532 result += 'bisector.fkbr: %r\n\n' % self.fkbr	541 result += 'bisector.fkbr: %r\n\n' % self.fkbr

533 result += self._revision_value_table()	542 result += self._revision_value_table()

534 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values):	543 if (self.lkgr and self.lkgr.values and self.fkbr and self.fkbr.values):

535 result += '\n' + self._t_test_results()	544 result += '\n' + self._t_test_results()

536 return result	545 return result

537	546

538 def _revision_value_table(self):	547 def _revision_value_table(self):

539 """Returns a string table showing revisions and their values."""	548 """Returns a string table showing revisions and their values."""

540 header = [['Revision', 'Values']]	549 header = [['Revision', 'Values']]

541 rows = [[str(r.commit_pos), str(r.values)] for r in self.revisions]	550 rows = [[str(r.commit_pos), str(r.values)] for r in self.revisions]

542 return bisect_results.pretty_table(header + rows)	551 return self._pretty_table(header + rows)

	552

	553 def _pretty_table(self, data):

	554 results = []

	555 for row in data:

	556 results.append('%-15s' * len(row) % tuple(row))

	557 return '\n'.join(results)

543	558

544 def _t_test_results(self):	559 def _t_test_results(self):

545 """Returns a string showing t-test results for lkgr and fkbr."""	560 """Returns a string showing t-test results for lkgr and fkbr."""

546 t, df, p = self.api.m.math_utils.welchs_t_test(	561 t, df, p = self.api.m.math_utils.welchs_t_test(

547 self.lkgr.values, self.fkbr.values)	562 self.lkgr.values, self.fkbr.values)

548 lines = [	563 lines = [

549 'LKGR values: %r' % self.lkgr.values,	564 'LKGR values: %r' % self.lkgr.values,

550 'FKBR values: %r' % self.fkbr.values,	565 'FKBR values: %r' % self.fkbr.values,

551 't-statistic: %r' % t,	566 't-statistic: %r' % t,

552 'deg. of freedom: %r' % df,	567 'deg. of freedom: %r' % df,

553 'p-value: %r' % p,	568 'p-value: %r' % p,

554 'Confidence score: %r' % (100 * (1 - p))	569 'Confidence score: %r' % (100 * (1 - p))

555 ]	570 ]

556 return '\n'.join(lines)	571 return '\n'.join(lines)

557	572

558 def partial_results(self):

559 return bisect_results.BisectResults(self, partial=True).as_string()

560

561 def print_result_debug_info(self):	573 def print_result_debug_info(self):

562 """Prints extra debug info at the end of the bisect process."""	574 """Prints extra debug info at the end of the bisect process."""

563 lines = self._results_debug_message().splitlines()	575 lines = self._results_debug_message().splitlines()

564 # If we emit a null step then add a log to it, the log should be kept	576 # If we emit a null step then add a log to it, the log should be kept

565 # longer than 7 days (which is often needed to debug some issues).	577 # longer than 7 days (which is often needed to debug some issues).

566 self.api.m.step('Debug Info', [])	578 self.api.m.step('Debug Info', [])

567 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines	579 self.api.m.step.active_result.presentation.logs['Debug Info'] = lines

568	580

569 def print_result(self):	581 def post_result(self, halt_on_failure=False):

570 results = bisect_results.BisectResults(self).as_string()	582 """Posts bisect results to Perf Dashboard."""

571 self.api.m.python.inline(	583 self.api.m.perf_dashboard.set_default_config()

572 'Results',	584 self.api.m.perf_dashboard.post_bisect_results(

573 """	585 self.get_result(), halt_on_failure)

574 import shutil

575 import sys

576 shutil.copyfileobj(open(sys.argv[1]), sys.stdout)

577 """,

578 args=[self.api.m.raw_io.input(data=results)])

579	586

580 def get_revision_to_eval(self):	587 def get_revision_to_eval(self):

581 """Gets the next RevisionState object in the candidate range.	588 """Gets the next RevisionState object in the candidate range.

582	589

583 Returns:	590 Returns:

584 The next Revision object in a list.	591 The next Revision object in a list.

585 """	592 """

586 self._update_candidate_range()	593 self._update_candidate_range()

587 candidate_range = [revision for revision in	594 candidate_range = [revision for revision in

588 self.revisions[self.lkgr.list_index + 1:	595 self.revisions[self.lkgr.list_index + 1:

(...skipping 289 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
878	885

879 def surface_result(self, result_string):	886 def surface_result(self, result_string):

880 assert result_string in VALID_RESULT_CODES	887 assert result_string in VALID_RESULT_CODES

881 prefix = 'B4T_' # To avoid collision. Stands for bisect (abbr. `a la i18n).	888 prefix = 'B4T_' # To avoid collision. Stands for bisect (abbr. `a la i18n).

882 result_code = prefix + result_string	889 result_code = prefix + result_string

883 assert len(result_code) <= 20	890 assert len(result_code) <= 20

884 if result_code not in self.result_codes:	891 if result_code not in self.result_codes:

885 self.result_codes.add(result_code)	892 self.result_codes.add(result_code)

886 properties = self.api.m.step.active_result.presentation.properties	893 properties = self.api.m.step.active_result.presentation.properties

887 properties['extra_result_code'] = sorted(self.result_codes)	894 properties['extra_result_code'] = sorted(self.result_codes)

	895

	896 def get_result(self):

	897 """Returns the results as a jsonable object."""

	898 config = self.bisect_config

	899 results_confidence = 0

	900 if self.culprit:

	901 results_confidence = self.api.m.math_utils.confidence_score(

	902 self.lkgr.values, self.fkbr.values)

	903

	904 if self.failed:

	905 status = 'failed'

	906 elif self.bisect_over:

	907 status = 'completed'

	908 else:

	909 status = 'started'

	910

	911 fail_reason = None

	912 if self.failed_initial_confidence:

	913 fail_reason = _FAILED_INITIAL_CONFIDENCE_ABORT_REASON

	914 elif self.failed_direction:

	915 fail_reason = _DIRECTION_OF_IMPROVEMENT_ABORT_REASON

	916 return {

	917 'try_job_id': config.get('try_job_id'),

	918 'bug_id': config.get('bug_id'),

	919 'status': status,

	920 'buildbot_log_url': self._get_build_url(),

	921 'bisect_bot': self.get_perf_tester_name(),

	922 'command': config['command'],

	923 'test_type': config['test_type'],

	924 'metric': config['metric'],

	925 'change': self.relative_change,

	926 'score': results_confidence,

	927 'good_revision': self.good_rev.commit_hash,

	928 'bad_revision': self.bad_rev.commit_hash,

	929 'warnings': self.warnings,

	930 'fail_reason': fail_reason,

	931 'culprit_data': self._culprit_data(),

	932 'revision_data': self._revision_data()

	933 }

	934

	935 def _culprit_data(self):

	936 culprit = self.culprit

	937 api = self.api

	938 if not culprit:

	939 return None

	940 culprit_cl_hash = culprit.deps_revision or culprit.commit_hash

	941 culprit_info = api.query_revision_info(

	942 culprit_cl_hash, culprit.depot_name)

	943

	944 return {

	945 'subject': culprit_info['subject'],

	946 'author': culprit_info['author'],

	947 'email': culprit_info['email'],

	948 'cl_date': culprit_info['date'],

	949 'commit_info': culprit_info['body'],

	950 'revisions_links': [],

	951 'cl': culprit.deps_revision or culprit.commit_hash

	952 }

	953

	954 def _revision_data(self):

	955 revision_rows = []

	956 for r in self.revisions:

	957 if r.tested or r.aborted:

	958 revision_rows.append({

	959 'depot_name': r.depot_name,

	960 'deps_revision': r.deps_revision,

	961 'commit_pos': r.commit_pos,

	962 'mean_value': r.mean_value,

	963 'std_dev': r.std_dev,

	964 'values': r.values,

	965 'result': 'good' if r.good else 'bad' if r.bad else 'unknown',

	966 })

	967 return revision_rows

	968

	969 def _get_build_url(self):

	970 properties = self.api.m.properties

	971 bot_url = properties.get('buildbotURL',

	972 'http://build.chromium.org/p/chromium/')

	973 builder_name = urllib.quote(properties.get('buildername', ''))

	974 builder_number = str(properties.get('buildnumber', ''))

	975 return '%sbuilders/%s/builds/%s' % (bot_url, builder_name, builder_number)

OLD	NEW