Distinguish AC with stderr from WA on test command (kyuridenamida#132)

blue-jam · kyuridenamida · commit ff6d956e5943 · 2019-03-29T23:16:33.000+09:00
* Change message in tester if program has stderr kyuridenamida#119 To count an execution with a correct answer with stderr as success, check separate execution failures (WA, TLE, RE) and existence of stderr separately. tester returns false still if there are outputs in stderr to prevent TLEs because of massive debug outputs. * Hide I/O if program emit correct answer kyuridenamida#119 Make it possible to hide inputs, expected outputs, and actual outputs if a program emits correct actual outputs. Emitting all inputs, expected outputs, and actual outputs are verbose and make it difficult to find failed test cases. Keep emitting stderr so that a user can easily remove stderrs before submit. * Fix test for python3.5 kyuridenamida#119 * Comply with autopep8 kyuridenamida#119 * Fix pep8 kyuridenamida#119 * Fix pep8 kyuridenamida#119 * Change flag name for hiding stderr kyuridenamida#119 * Hide all outputs if answer is correct kyuridenamida#119 * Fix PEP8 issue kyuridenamida#119
diff --git a/atcodertools/tools/tester.py b/atcodertools/tools/tester.py
@@ -22,6 +22,15 @@ class IrregularSampleFileError(Exception):
     pass
 
 
+class TestSummary:
+    def __init__(self, success_count: int, has_error_output: bool):
+        self.success_count = success_count
+        self.has_error_output = has_error_output
+
+    def __eq__(self, other):
+        return self.success_count == other.success_count and self.has_error_output == other.has_error_output
+
+
 def is_executable_file(file_name):
     return os.access(file_name, os.X_OK) and Path(file_name).is_file() \
         and file_name.find(".cpp") == -1 and not file_name.endswith(".txt")  # cppやtxtを省くのは一応の Cygwin 対策
@@ -58,16 +67,19 @@ def append(text: str, end='\n'):
         nonlocal res
         res += text + end
 
+    with open(output_file, "r") as f:
+        expected_output = f.read()
+
     append(with_color("[Input]", Fore.LIGHTMAGENTA_EX))
     with open(input_file, "r") as f:
         append(f.read(), end='')
 
     append(with_color("[Expected]", Fore.LIGHTMAGENTA_EX))
-    with open(output_file, "r") as f:
-        append(f.read(), end='')
+    append(expected_output, end='')
 
     append(with_color("[Received]", Fore.LIGHTMAGENTA_EX))
     append(exec_res.output, end='')
+
     if exec_res.status != ExecStatus.NORMAL:
         append(with_color("Aborted ({})\n".format(
             exec_res.status.name), Fore.LIGHTYELLOW_EX))
@@ -78,8 +90,10 @@ def append(text: str, end='\n'):
     return res
 
 
-def run_for_samples(exec_file: str, sample_pair_list: List[Tuple[str, str]], timeout_sec: int, knock_out: bool = False):
+def run_for_samples(exec_file: str, sample_pair_list: List[Tuple[str, str]], timeout_sec: int, knock_out: bool = False,
+                    skip_io_on_success: bool = False) -> TestSummary:
     success_count = 0
+    has_error_output = False
     for in_sample_file, out_sample_file in sample_pair_list:
         # Run program
         exec_res = run_program(exec_file, in_sample_file,
@@ -90,37 +104,38 @@ def run_for_samples(exec_file: str, sample_pair_list: List[Tuple[str, str]], tim
             answer_text = f.read()
 
         is_correct = exec_res.is_correct_output(answer_text)
-        passed = is_correct and not exec_res.has_stderr()
+        has_error_output = has_error_output or exec_res.has_stderr()
 
-        if passed:
-            message = "{} {elapsed} ms".format(
-                with_color("PASSED", Fore.LIGHTGREEN_EX),
-                elapsed=exec_res.elapsed_ms)
-            success_count += 1
-        else:
-            if is_correct:
+        if is_correct:
+            if exec_res.has_stderr():
                 message = with_color(
                     "CORRECT but with stderr (Please remove stderr!)", Fore.LIGHTYELLOW_EX)
             else:
-                if exec_res.status == ExecStatus.NORMAL:
-                    message = with_color("WA", Fore.LIGHTRED_EX)
-                else:
-                    message = with_color(
-                        exec_res.status.name, Fore.LIGHTYELLOW_EX)
+                message = "{} {elapsed} ms".format(
+                    with_color("PASSED", Fore.LIGHTGREEN_EX),
+                    elapsed=exec_res.elapsed_ms)
+            success_count += 1
+        else:
+            if exec_res.status == ExecStatus.NORMAL:
+                message = with_color("WA", Fore.LIGHTRED_EX)
+            else:
+                message = with_color(
+                    exec_res.status.name, Fore.LIGHTYELLOW_EX)
 
         print("# {case_name} ... {message}".format(
             case_name=os.path.basename(in_sample_file),
             message=message,
         ))
 
-        # Output details for incorrect results.
-        if not passed:
+        # Output details for incorrect results or has stderr.
+        if not is_correct or (exec_res.has_stderr() and not skip_io_on_success):
             print('{}\n'.format(build_details_str(
                 exec_res, in_sample_file, out_sample_file)))
-            if knock_out:
-                print('Stop testing ...')
-                break
-    return success_count
+
+        if knock_out and not is_correct:
+            print('Stop testing ...')
+            break
+    return TestSummary(success_count, has_error_output)
 
 
 def validate_sample_pair(in_sample_file, out_sample_file):
@@ -153,13 +168,14 @@ def single_or_none(lst: List):
 
     validate_sample_pair(in_sample_file, out_sample_file)
 
-    success_count = run_for_samples(
+    test_summary = run_for_samples(
         exec_file, [(in_sample_file, out_sample_file)], timeout_sec)
 
-    return success_count == 1
+    return test_summary.success_count == 1 and not test_summary.has_error_output
 
 
-def run_all_tests(exec_file, in_sample_file_list, out_sample_file_list, timeout_sec: int, knock_out: bool) -> bool:
+def run_all_tests(exec_file, in_sample_file_list, out_sample_file_list, timeout_sec: int, knock_out: bool,
+                  skip_stderr_on_success: bool) -> bool:
     if len(in_sample_file_list) != len(out_sample_file_list):
         logging.error("{0}{1}{2}".format(
             "The number of the sample inputs and outputs are different.\n",
@@ -171,18 +187,23 @@ def run_all_tests(exec_file, in_sample_file_list, out_sample_file_list, timeout_
         validate_sample_pair(in_sample_file, out_sample_file)
         samples.append((in_sample_file, out_sample_file))
 
-    success_count = run_for_samples(exec_file, samples, timeout_sec, knock_out)
+    test_summary = run_for_samples(
+        exec_file, samples, timeout_sec, knock_out, skip_stderr_on_success)
 
     if len(samples) == 0:
         print("No test cases")
         return False
-    elif success_count != len(samples):
+    elif test_summary.success_count != len(samples):
         print("{msg} (passed {success_count} of {total})".format(
             msg=with_color("Some cases FAILED", Fore.LIGHTRED_EX),
-            success_count=success_count,
+            success_count=test_summary.success_count,
             total=len(samples),
         ))
         return False
+    elif test_summary.has_error_output:
+        print(with_color(
+            "Passed all test case but with stderr. (Please remove stderr!)", Fore.LIGHTYELLOW_EX))
+        return False
     else:
         print(with_color("Passed all test cases!!!", Fore.LIGHTGREEN_EX))
         return True
@@ -233,6 +254,12 @@ def main(prog, args) -> bool:
                         action="store_true",
                         default=False)
 
+    parser.add_argument('--skip-almost-ac-feedback', '-s',
+                        help='Hide inputs and expected/actual outputs if result is correct and there are error outputs'
+                             ' [Default] False,',
+                        action='store_true',
+                        default=False)
+
     args = parser.parse_args(args)
     exec_file = args.exec or infer_exec_file(
         glob.glob(os.path.join(args.dir, '*')))
@@ -246,7 +273,8 @@ def main(prog, args) -> bool:
         glob.glob(os.path.join(args.dir, out_ex_pattern)))
 
     if args.num is None:
-        return run_all_tests(exec_file, in_sample_file_list, out_sample_file_list, args.timeout, args.knock_out)
+        return run_all_tests(exec_file, in_sample_file_list, out_sample_file_list, args.timeout, args.knock_out,
+                             args.skip_almost_ac_feedback)
     else:
         return run_single_test(exec_file, in_sample_file_list, out_sample_file_list, args.timeout, args.num)
 
diff --git a/tests/test_tester.py b/tests/test_tester.py
@@ -1,9 +1,13 @@
 import os
 import unittest
-from unittest.mock import patch
 
+from colorama import Fore
+from unittest.mock import patch, mock_open, MagicMock
+
+from atcodertools.executils.run_program import ExecResult, ExecStatus
 from atcodertools.tools import tester
-from atcodertools.tools.tester import is_executable_file
+from atcodertools.tools.tester import is_executable_file, TestSummary, build_details_str
+from atcodertools.tools.utils import with_color
 
 RESOURCE_DIR = os.path.abspath(os.path.join(
     os.path.dirname(os.path.abspath(__file__)),
@@ -47,6 +51,111 @@ def test_is_executable_file__text(self, os_mock, is_file_mock):
     def test_is_executable_file__directory(self, os_mock, is_file_mock):
         self.assertFalse(is_executable_file('directory'))
 
+    @patch('atcodertools.tools.tester.run_program', return_value=ExecResult(ExecStatus.NORMAL, 'correct', '', 0))
+    def test_run_for_samples(self, run_program_mock: MagicMock):
+        io_mock = mock_open(read_data='correct')
+
+        with patch('atcodertools.tools.tester.open', io_mock):
+            self.assertEqual(TestSummary(1, False), tester.run_for_samples(
+                'a.out', [('in_1.txt', 'out_1.txt')], 1))
+            self.assertEqual(1, run_program_mock.call_count)
+
+    @patch('atcodertools.tools.tester.build_details_str', return_value='')
+    @patch('atcodertools.tools.tester.run_program', return_value=ExecResult(ExecStatus.NORMAL, 'correct', 'stderr', 0))
+    def test_run_for_samples__with_stderr(self, run_program_mock: MagicMock, build_details_str_mock: MagicMock):
+        io_mock = mock_open(read_data='correct')
+
+        with patch('atcodertools.tools.tester.open', io_mock):
+            self.assertEqual(TestSummary(1, True), tester.run_for_samples(
+                'a.out', [('in_1.txt', 'out_1.txt')], 1))
+            self.assertEqual(1, run_program_mock.call_count)
+            self.assertEqual(1, build_details_str_mock.call_count)
+
+    @patch('atcodertools.tools.tester.build_details_str', return_value='')
+    @patch('atcodertools.tools.tester.run_program', return_value=ExecResult(ExecStatus.NORMAL, 'wrong', '', 0))
+    def test_run_for_samples__wrong_answer(self, run_program_mock: MagicMock, build_details_str_mock: MagicMock):
+        io_mock = mock_open(read_data='correct')
+
+        with patch('atcodertools.tools.tester.open', io_mock):
+            self.assertEqual(TestSummary(0, False), tester.run_for_samples(
+                'a.out', [('in_1.txt', 'out_1.txt')], 1))
+            self.assertEqual(1, run_program_mock.call_count)
+            self.assertEqual(1, build_details_str_mock.call_count)
+
+    @patch('atcodertools.tools.tester.build_details_str', return_value='')
+    @patch('atcodertools.tools.tester.run_program', return_value=ExecResult(ExecStatus.NORMAL, 'wrong', '', 0))
+    def test_run_for_samples__stop_execution_on_first_failure(self, run_program_mock: MagicMock,
+                                                              build_details_str_mock: MagicMock):
+        io_mock = mock_open(read_data='correct')
+
+        with patch('atcodertools.tools.tester.open', io_mock):
+            sample_pair_list = [('in_1.txt', 'out_1.txt'),
+                                ('in_2.txt', 'out_2.txt')]
+            self.assertEqual(TestSummary(0, False), tester.run_for_samples(
+                'a.out', sample_pair_list, 1, True))
+            self.assertEqual(1, run_program_mock.call_count)
+            self.assertEqual(1, build_details_str_mock.call_count)
+
+    @patch('atcodertools.tools.tester.build_details_str', return_value='')
+    @patch('atcodertools.tools.tester.run_program', return_value=ExecResult(ExecStatus.NORMAL, 'correct', 'stderr', 0))
+    def test_run_for_samples__skip_stderr(self, run_program_mock: MagicMock, build_details_str_mock: MagicMock):
+        io_mock = mock_open(read_data='correct')
+
+        with patch('atcodertools.tools.tester.open', io_mock):
+            self.assertEqual(TestSummary(1, True), tester.run_for_samples(
+                'a.out', [('in_1.txt', 'out_1.txt')], 1, skip_io_on_success=True))
+            self.assertEqual(1, run_program_mock.call_count)
+            self.assertEqual(0, build_details_str_mock.call_count)
+
+    def test_build_details_str(self):
+        in_out = 'correct\n'
+        output = 'wrong\n'
+        stderr = 'stderr\n'
+        expected = (with_color('[Input]', Fore.LIGHTMAGENTA_EX) + '\n'
+                    + in_out + with_color('[Expected]',
+                                          Fore.LIGHTMAGENTA_EX) + '\n' + in_out
+                    + with_color('[Received]',
+                                 Fore.LIGHTMAGENTA_EX) + '\n' + output
+                    + with_color('[Error]', Fore.LIGHTYELLOW_EX) + '\n' + stderr)
+        io_mock = mock_open(read_data=in_out)
+
+        with patch('atcodertools.tools.tester.open', io_mock):
+            result = build_details_str(ExecResult(
+                ExecStatus.NORMAL, output, stderr), 'in.txt', 'out.txt')
+            self.assertEqual(expected, result)
+
+    def test_build_details_str__show_testcase_if_there_is_stderr(self):
+        in_out = 'correct\n'
+        stderr = 'stderr\n'
+        expected = (with_color('[Input]', Fore.LIGHTMAGENTA_EX) + '\n'
+                    + in_out + with_color('[Expected]',
+                                          Fore.LIGHTMAGENTA_EX) + '\n' + in_out
+                    + with_color('[Received]',
+                                 Fore.LIGHTMAGENTA_EX) + '\n' + in_out
+                    + with_color('[Error]', Fore.LIGHTYELLOW_EX) + '\n' + stderr)
+        io_mock = mock_open(read_data=in_out)
+
+        with patch('atcodertools.tools.tester.open', io_mock):
+            result = build_details_str(ExecResult(
+                ExecStatus.NORMAL, in_out, stderr), 'in.txt', 'out.txt')
+            self.assertEqual(expected, result)
+
+    def test_build_details_str__on_runtime_failure(self):
+        in_out = 'correct\n'
+        stderr = ''
+        expected = (with_color('[Input]', Fore.LIGHTMAGENTA_EX) + '\n'
+                    + in_out + with_color('[Expected]',
+                                          Fore.LIGHTMAGENTA_EX) + '\n' + in_out
+                    + with_color('[Received]',
+                                 Fore.LIGHTMAGENTA_EX) + '\n' + in_out
+                    + with_color('Aborted ({})\n'.format(ExecStatus.RE.name), Fore.LIGHTYELLOW_EX) + '\n')
+        io_mock = mock_open(read_data=in_out)
+
+        with patch('atcodertools.tools.tester.open', io_mock):
+            result = build_details_str(ExecResult(
+                ExecStatus.RE, in_out, stderr), 'in.txt', 'out.txt')
+            self.assertEqual(expected, result)
+
 
 if __name__ == '__main__':
     unittest.main()