Skip to content

Commit ff6d956

Browse files
blue-jamkyuridenamida
authored andcommitted
Distinguish AC with stderr from WA on test command (kyuridenamida#132)
* Change message in tester if program has stderr kyuridenamida#119 To count an execution with a correct answer with stderr as success, check separate execution failures (WA, TLE, RE) and existence of stderr separately. tester returns false still if there are outputs in stderr to prevent TLEs because of massive debug outputs. * Hide I/O if program emit correct answer kyuridenamida#119 Make it possible to hide inputs, expected outputs, and actual outputs if a program emits correct actual outputs. Emitting all inputs, expected outputs, and actual outputs are verbose and make it difficult to find failed test cases. Keep emitting stderr so that a user can easily remove stderrs before submit. * Fix test for python3.5 kyuridenamida#119 * Comply with autopep8 kyuridenamida#119 * Fix pep8 kyuridenamida#119 * Fix pep8 kyuridenamida#119 * Change flag name for hiding stderr kyuridenamida#119 * Hide all outputs if answer is correct kyuridenamida#119 * Fix PEP8 issue kyuridenamida#119
1 parent c07fcdd commit ff6d956

File tree

2 files changed

+168
-31
lines changed

2 files changed

+168
-31
lines changed

atcodertools/tools/tester.py

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,15 @@ class IrregularSampleFileError(Exception):
2222
pass
2323

2424

25+
class TestSummary:
26+
def __init__(self, success_count: int, has_error_output: bool):
27+
self.success_count = success_count
28+
self.has_error_output = has_error_output
29+
30+
def __eq__(self, other):
31+
return self.success_count == other.success_count and self.has_error_output == other.has_error_output
32+
33+
2534
def is_executable_file(file_name):
2635
return os.access(file_name, os.X_OK) and Path(file_name).is_file() \
2736
and file_name.find(".cpp") == -1 and not file_name.endswith(".txt") # cppやtxtを省くのは一応の Cygwin 対策
@@ -58,16 +67,19 @@ def append(text: str, end='\n'):
5867
nonlocal res
5968
res += text + end
6069

70+
with open(output_file, "r") as f:
71+
expected_output = f.read()
72+
6173
append(with_color("[Input]", Fore.LIGHTMAGENTA_EX))
6274
with open(input_file, "r") as f:
6375
append(f.read(), end='')
6476

6577
append(with_color("[Expected]", Fore.LIGHTMAGENTA_EX))
66-
with open(output_file, "r") as f:
67-
append(f.read(), end='')
78+
append(expected_output, end='')
6879

6980
append(with_color("[Received]", Fore.LIGHTMAGENTA_EX))
7081
append(exec_res.output, end='')
82+
7183
if exec_res.status != ExecStatus.NORMAL:
7284
append(with_color("Aborted ({})\n".format(
7385
exec_res.status.name), Fore.LIGHTYELLOW_EX))
@@ -78,8 +90,10 @@ def append(text: str, end='\n'):
7890
return res
7991

8092

81-
def run_for_samples(exec_file: str, sample_pair_list: List[Tuple[str, str]], timeout_sec: int, knock_out: bool = False):
93+
def run_for_samples(exec_file: str, sample_pair_list: List[Tuple[str, str]], timeout_sec: int, knock_out: bool = False,
94+
skip_io_on_success: bool = False) -> TestSummary:
8295
success_count = 0
96+
has_error_output = False
8397
for in_sample_file, out_sample_file in sample_pair_list:
8498
# Run program
8599
exec_res = run_program(exec_file, in_sample_file,
@@ -90,37 +104,38 @@ def run_for_samples(exec_file: str, sample_pair_list: List[Tuple[str, str]], tim
90104
answer_text = f.read()
91105

92106
is_correct = exec_res.is_correct_output(answer_text)
93-
passed = is_correct and not exec_res.has_stderr()
107+
has_error_output = has_error_output or exec_res.has_stderr()
94108

95-
if passed:
96-
message = "{} {elapsed} ms".format(
97-
with_color("PASSED", Fore.LIGHTGREEN_EX),
98-
elapsed=exec_res.elapsed_ms)
99-
success_count += 1
100-
else:
101-
if is_correct:
109+
if is_correct:
110+
if exec_res.has_stderr():
102111
message = with_color(
103112
"CORRECT but with stderr (Please remove stderr!)", Fore.LIGHTYELLOW_EX)
104113
else:
105-
if exec_res.status == ExecStatus.NORMAL:
106-
message = with_color("WA", Fore.LIGHTRED_EX)
107-
else:
108-
message = with_color(
109-
exec_res.status.name, Fore.LIGHTYELLOW_EX)
114+
message = "{} {elapsed} ms".format(
115+
with_color("PASSED", Fore.LIGHTGREEN_EX),
116+
elapsed=exec_res.elapsed_ms)
117+
success_count += 1
118+
else:
119+
if exec_res.status == ExecStatus.NORMAL:
120+
message = with_color("WA", Fore.LIGHTRED_EX)
121+
else:
122+
message = with_color(
123+
exec_res.status.name, Fore.LIGHTYELLOW_EX)
110124

111125
print("# {case_name} ... {message}".format(
112126
case_name=os.path.basename(in_sample_file),
113127
message=message,
114128
))
115129

116-
# Output details for incorrect results.
117-
if not passed:
130+
# Output details for incorrect results or has stderr.
131+
if not is_correct or (exec_res.has_stderr() and not skip_io_on_success):
118132
print('{}\n'.format(build_details_str(
119133
exec_res, in_sample_file, out_sample_file)))
120-
if knock_out:
121-
print('Stop testing ...')
122-
break
123-
return success_count
134+
135+
if knock_out and not is_correct:
136+
print('Stop testing ...')
137+
break
138+
return TestSummary(success_count, has_error_output)
124139

125140

126141
def validate_sample_pair(in_sample_file, out_sample_file):
@@ -153,13 +168,14 @@ def single_or_none(lst: List):
153168

154169
validate_sample_pair(in_sample_file, out_sample_file)
155170

156-
success_count = run_for_samples(
171+
test_summary = run_for_samples(
157172
exec_file, [(in_sample_file, out_sample_file)], timeout_sec)
158173

159-
return success_count == 1
174+
return test_summary.success_count == 1 and not test_summary.has_error_output
160175

161176

162-
def run_all_tests(exec_file, in_sample_file_list, out_sample_file_list, timeout_sec: int, knock_out: bool) -> bool:
177+
def run_all_tests(exec_file, in_sample_file_list, out_sample_file_list, timeout_sec: int, knock_out: bool,
178+
skip_stderr_on_success: bool) -> bool:
163179
if len(in_sample_file_list) != len(out_sample_file_list):
164180
logging.error("{0}{1}{2}".format(
165181
"The number of the sample inputs and outputs are different.\n",
@@ -171,18 +187,23 @@ def run_all_tests(exec_file, in_sample_file_list, out_sample_file_list, timeout_
171187
validate_sample_pair(in_sample_file, out_sample_file)
172188
samples.append((in_sample_file, out_sample_file))
173189

174-
success_count = run_for_samples(exec_file, samples, timeout_sec, knock_out)
190+
test_summary = run_for_samples(
191+
exec_file, samples, timeout_sec, knock_out, skip_stderr_on_success)
175192

176193
if len(samples) == 0:
177194
print("No test cases")
178195
return False
179-
elif success_count != len(samples):
196+
elif test_summary.success_count != len(samples):
180197
print("{msg} (passed {success_count} of {total})".format(
181198
msg=with_color("Some cases FAILED", Fore.LIGHTRED_EX),
182-
success_count=success_count,
199+
success_count=test_summary.success_count,
183200
total=len(samples),
184201
))
185202
return False
203+
elif test_summary.has_error_output:
204+
print(with_color(
205+
"Passed all test case but with stderr. (Please remove stderr!)", Fore.LIGHTYELLOW_EX))
206+
return False
186207
else:
187208
print(with_color("Passed all test cases!!!", Fore.LIGHTGREEN_EX))
188209
return True
@@ -233,6 +254,12 @@ def main(prog, args) -> bool:
233254
action="store_true",
234255
default=False)
235256

257+
parser.add_argument('--skip-almost-ac-feedback', '-s',
258+
help='Hide inputs and expected/actual outputs if result is correct and there are error outputs'
259+
' [Default] False,',
260+
action='store_true',
261+
default=False)
262+
236263
args = parser.parse_args(args)
237264
exec_file = args.exec or infer_exec_file(
238265
glob.glob(os.path.join(args.dir, '*')))
@@ -246,7 +273,8 @@ def main(prog, args) -> bool:
246273
glob.glob(os.path.join(args.dir, out_ex_pattern)))
247274

248275
if args.num is None:
249-
return run_all_tests(exec_file, in_sample_file_list, out_sample_file_list, args.timeout, args.knock_out)
276+
return run_all_tests(exec_file, in_sample_file_list, out_sample_file_list, args.timeout, args.knock_out,
277+
args.skip_almost_ac_feedback)
250278
else:
251279
return run_single_test(exec_file, in_sample_file_list, out_sample_file_list, args.timeout, args.num)
252280

tests/test_tester.py

Lines changed: 111 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
import os
22
import unittest
3-
from unittest.mock import patch
43

4+
from colorama import Fore
5+
from unittest.mock import patch, mock_open, MagicMock
6+
7+
from atcodertools.executils.run_program import ExecResult, ExecStatus
58
from atcodertools.tools import tester
6-
from atcodertools.tools.tester import is_executable_file
9+
from atcodertools.tools.tester import is_executable_file, TestSummary, build_details_str
10+
from atcodertools.tools.utils import with_color
711

812
RESOURCE_DIR = os.path.abspath(os.path.join(
913
os.path.dirname(os.path.abspath(__file__)),
@@ -47,6 +51,111 @@ def test_is_executable_file__text(self, os_mock, is_file_mock):
4751
def test_is_executable_file__directory(self, os_mock, is_file_mock):
4852
self.assertFalse(is_executable_file('directory'))
4953

54+
@patch('atcodertools.tools.tester.run_program', return_value=ExecResult(ExecStatus.NORMAL, 'correct', '', 0))
55+
def test_run_for_samples(self, run_program_mock: MagicMock):
56+
io_mock = mock_open(read_data='correct')
57+
58+
with patch('atcodertools.tools.tester.open', io_mock):
59+
self.assertEqual(TestSummary(1, False), tester.run_for_samples(
60+
'a.out', [('in_1.txt', 'out_1.txt')], 1))
61+
self.assertEqual(1, run_program_mock.call_count)
62+
63+
@patch('atcodertools.tools.tester.build_details_str', return_value='')
64+
@patch('atcodertools.tools.tester.run_program', return_value=ExecResult(ExecStatus.NORMAL, 'correct', 'stderr', 0))
65+
def test_run_for_samples__with_stderr(self, run_program_mock: MagicMock, build_details_str_mock: MagicMock):
66+
io_mock = mock_open(read_data='correct')
67+
68+
with patch('atcodertools.tools.tester.open', io_mock):
69+
self.assertEqual(TestSummary(1, True), tester.run_for_samples(
70+
'a.out', [('in_1.txt', 'out_1.txt')], 1))
71+
self.assertEqual(1, run_program_mock.call_count)
72+
self.assertEqual(1, build_details_str_mock.call_count)
73+
74+
@patch('atcodertools.tools.tester.build_details_str', return_value='')
75+
@patch('atcodertools.tools.tester.run_program', return_value=ExecResult(ExecStatus.NORMAL, 'wrong', '', 0))
76+
def test_run_for_samples__wrong_answer(self, run_program_mock: MagicMock, build_details_str_mock: MagicMock):
77+
io_mock = mock_open(read_data='correct')
78+
79+
with patch('atcodertools.tools.tester.open', io_mock):
80+
self.assertEqual(TestSummary(0, False), tester.run_for_samples(
81+
'a.out', [('in_1.txt', 'out_1.txt')], 1))
82+
self.assertEqual(1, run_program_mock.call_count)
83+
self.assertEqual(1, build_details_str_mock.call_count)
84+
85+
@patch('atcodertools.tools.tester.build_details_str', return_value='')
86+
@patch('atcodertools.tools.tester.run_program', return_value=ExecResult(ExecStatus.NORMAL, 'wrong', '', 0))
87+
def test_run_for_samples__stop_execution_on_first_failure(self, run_program_mock: MagicMock,
88+
build_details_str_mock: MagicMock):
89+
io_mock = mock_open(read_data='correct')
90+
91+
with patch('atcodertools.tools.tester.open', io_mock):
92+
sample_pair_list = [('in_1.txt', 'out_1.txt'),
93+
('in_2.txt', 'out_2.txt')]
94+
self.assertEqual(TestSummary(0, False), tester.run_for_samples(
95+
'a.out', sample_pair_list, 1, True))
96+
self.assertEqual(1, run_program_mock.call_count)
97+
self.assertEqual(1, build_details_str_mock.call_count)
98+
99+
@patch('atcodertools.tools.tester.build_details_str', return_value='')
100+
@patch('atcodertools.tools.tester.run_program', return_value=ExecResult(ExecStatus.NORMAL, 'correct', 'stderr', 0))
101+
def test_run_for_samples__skip_stderr(self, run_program_mock: MagicMock, build_details_str_mock: MagicMock):
102+
io_mock = mock_open(read_data='correct')
103+
104+
with patch('atcodertools.tools.tester.open', io_mock):
105+
self.assertEqual(TestSummary(1, True), tester.run_for_samples(
106+
'a.out', [('in_1.txt', 'out_1.txt')], 1, skip_io_on_success=True))
107+
self.assertEqual(1, run_program_mock.call_count)
108+
self.assertEqual(0, build_details_str_mock.call_count)
109+
110+
def test_build_details_str(self):
111+
in_out = 'correct\n'
112+
output = 'wrong\n'
113+
stderr = 'stderr\n'
114+
expected = (with_color('[Input]', Fore.LIGHTMAGENTA_EX) + '\n'
115+
+ in_out + with_color('[Expected]',
116+
Fore.LIGHTMAGENTA_EX) + '\n' + in_out
117+
+ with_color('[Received]',
118+
Fore.LIGHTMAGENTA_EX) + '\n' + output
119+
+ with_color('[Error]', Fore.LIGHTYELLOW_EX) + '\n' + stderr)
120+
io_mock = mock_open(read_data=in_out)
121+
122+
with patch('atcodertools.tools.tester.open', io_mock):
123+
result = build_details_str(ExecResult(
124+
ExecStatus.NORMAL, output, stderr), 'in.txt', 'out.txt')
125+
self.assertEqual(expected, result)
126+
127+
def test_build_details_str__show_testcase_if_there_is_stderr(self):
128+
in_out = 'correct\n'
129+
stderr = 'stderr\n'
130+
expected = (with_color('[Input]', Fore.LIGHTMAGENTA_EX) + '\n'
131+
+ in_out + with_color('[Expected]',
132+
Fore.LIGHTMAGENTA_EX) + '\n' + in_out
133+
+ with_color('[Received]',
134+
Fore.LIGHTMAGENTA_EX) + '\n' + in_out
135+
+ with_color('[Error]', Fore.LIGHTYELLOW_EX) + '\n' + stderr)
136+
io_mock = mock_open(read_data=in_out)
137+
138+
with patch('atcodertools.tools.tester.open', io_mock):
139+
result = build_details_str(ExecResult(
140+
ExecStatus.NORMAL, in_out, stderr), 'in.txt', 'out.txt')
141+
self.assertEqual(expected, result)
142+
143+
def test_build_details_str__on_runtime_failure(self):
144+
in_out = 'correct\n'
145+
stderr = ''
146+
expected = (with_color('[Input]', Fore.LIGHTMAGENTA_EX) + '\n'
147+
+ in_out + with_color('[Expected]',
148+
Fore.LIGHTMAGENTA_EX) + '\n' + in_out
149+
+ with_color('[Received]',
150+
Fore.LIGHTMAGENTA_EX) + '\n' + in_out
151+
+ with_color('Aborted ({})\n'.format(ExecStatus.RE.name), Fore.LIGHTYELLOW_EX) + '\n')
152+
io_mock = mock_open(read_data=in_out)
153+
154+
with patch('atcodertools.tools.tester.open', io_mock):
155+
result = build_details_str(ExecResult(
156+
ExecStatus.RE, in_out, stderr), 'in.txt', 'out.txt')
157+
self.assertEqual(expected, result)
158+
50159

51160
if __name__ == '__main__':
52161
unittest.main()

0 commit comments

Comments
 (0)