openoker
/
ray


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
							from typing import List

from ray_release.test_automation.state_machine import (
    TestStateMachine,
    WEEKLY_RELEASE_BLOCKER_TAG,
)
from ray_release.test import Test, TestState, TestResult


CONTINUOUS_FAILURE_TO_FLAKY = 3  # Number of continuous failures before flaky
CONTINUOUS_PASSING_TO_PASSING = 10  # Number of continuous passing before passing
FLAKY_PERCENTAGE_THRESHOLD = 7  # Percentage threshold to be considered as flaky
FAILING_TO_FLAKY_MESSAGE = (
    "This test is now considered as flaky because it has been "
    "failing on postmerge for too long. Flaky tests do not run on premerge."
)
JAILED_MESSAGE = (
    "This test is confirmed to be jailed because of the presence of the jailed tag. "
    "Jailed tests are no longer release blocking. To unjail this test, close this "
    "issue."
)
JAILED_TAG = "jailed-test"
MAX_REPORT_FAILURE_NO = 5


class CITestStateMachine(TestStateMachine):
    def __init__(self, test: Test, dry_run: bool = False) -> None:
        # Need long enough test history to detect flaky tests
        super().__init__(test, dry_run=dry_run, history_length=30)

    def _move_hook(self, from_state: TestState, to_state: TestState) -> None:
        change = (from_state, to_state)
        if change == (TestState.PASSING, TestState.CONSITENTLY_FAILING):
            self._create_github_issue()
            self._trigger_bisect()
        elif change == (TestState.FAILING, TestState.CONSITENTLY_FAILING):
            self._create_github_issue()
            self._trigger_bisect()
        elif change == (TestState.CONSITENTLY_FAILING, TestState.PASSING):
            self._close_github_issue()
        elif change == (TestState.CONSITENTLY_FAILING, TestState.FLAKY):
            self._comment_github_issue(FAILING_TO_FLAKY_MESSAGE)
        elif change == (TestState.PASSING, TestState.FLAKY):
            self._create_github_issue()
        elif change == (TestState.FLAKY, TestState.PASSING):
            self._close_github_issue()
        elif change == (TestState.FLAKY, TestState.JAILED):
            self._comment_github_issue(JAILED_MESSAGE)

    def _state_hook(self, _: TestState) -> None:
        pass

    def _comment_github_issue(self, comment: str) -> bool:
        github_issue_number = self.test.get(Test.KEY_GITHUB_ISSUE_NUMBER)
        if not github_issue_number:
            return False
        issue = self.ray_repo.get_issue(github_issue_number)
        issue.create_comment(comment)
        return True

    def _create_github_issue(self) -> None:
        labels = [
            "bug",
            "ci-test",
            "ray-test-bot",
            "flaky-tracker",
            "stability",
            "triage",
            self.test.get_oncall(),
            WEEKLY_RELEASE_BLOCKER_TAG,
        ]
        recent_failures = [
            result for result in self.test_results if result.is_failing()
        ][:MAX_REPORT_FAILURE_NO]
        body = (
            f"CI test **{self.test.get_name()}** is {self.test.get_state().value}. "
            "Recent failures: \n"
        )
        for failure in recent_failures:
            body += f"\t- {failure.url}\n"
        # This line is to match the regex in https://shorturl.at/aiK25
        body += f"\nDataCaseName-{self.test.get_name()}-END\n"
        body += "Managed by OSS Test Policy"
        title = f"CI test {self.test.get_name()} is {self.test.get_state().value}"

        # If the issue already exists, update the issue; otherwise creating a new one
        github_issue_number = self.test.get(Test.KEY_GITHUB_ISSUE_NUMBER)
        if not github_issue_number:
            issue_number = self.ray_repo.create_issue(
                title=title,
                body=body,
                labels=labels,
            ).number
            self.test[Test.KEY_GITHUB_ISSUE_NUMBER] = issue_number
            return
        else:
            issue = self.ray_repo.get_issue(github_issue_number)
            issue.edit(title=title, state="open")
            issue.create_comment(body)
            return

    def _consistently_failing_to_jailed(self) -> bool:
        return False

    def _passing_to_flaky(self) -> bool:
        # A test is flaky if it has been changing from passing to failing for
        # a certain percentage of time in the test history. However, if it has been
        # recently stable, then it is not flaky.
        if self._is_recently_stable():
            return False

        return self.is_flaky_result_history(self.test_results)

    @staticmethod
    def is_flaky_result_history(results: List[TestResult]):
        transition = 0
        for i in range(0, len(results) - 1):
            if results[i].is_failing() and results[i + 1].is_passing():
                transition += 1
        if transition >= FLAKY_PERCENTAGE_THRESHOLD * len(results) / 100:
            return True
        return False

    def _consistently_failing_to_flaky(self) -> bool:
        return len(self.test_results) >= CONTINUOUS_FAILURE_TO_FLAKY and all(
            result.is_failing()
            for result in self.test_results[:CONTINUOUS_FAILURE_TO_FLAKY]
        )

    def _flaky_to_passing(self) -> bool:
        # A flaky test is considered passing if it has been passing for a certain
        # period and the github issue is closed (by a human).
        return self._is_recently_stable()

    def _is_recently_stable(self) -> bool:
        return len(self.test_results) >= CONTINUOUS_PASSING_TO_PASSING and all(
            result.is_passing()
            for result in self.test_results[:CONTINUOUS_PASSING_TO_PASSING]
        )

    def _flaky_to_jailed(self) -> bool:
        # If a human has confirmed that this test is jailed (by adding the jailed tag),
        # then it is jailed
        github_issue_number = self.test.get(Test.KEY_GITHUB_ISSUE_NUMBER)
        if not github_issue_number:
            return False
        issue = self.ray_repo.get_issue(github_issue_number)
        return JAILED_TAG in [label.name for label in issue.get_labels()]