|
5 | 5 | import string
|
6 | 6 | from collections import Counter
|
7 | 7 | from concurrent.futures import ProcessPoolExecutor
|
8 |
| -from typing import Awaitable, Callable, Dict |
| 8 | +from typing import Awaitable, Callable |
9 | 9 |
|
10 |
| -import multiprocess |
11 | 10 | import pytest
|
12 | 11 |
|
13 | 12 |
|
@@ -56,67 +55,79 @@ def inner_file(size: int = 1000):
|
56 | 55 | return inner_file
|
57 | 56 |
|
58 | 57 |
|
59 |
| -def read_segment(file: pathlib.Path, start: int, end: int) -> str: |
60 |
| - with open(file) as f: |
61 |
| - f.seek(start) |
62 |
| - return f.read(end - start) |
| 58 | +def reference_exercise1( |
| 59 | + input_file: pathlib.Path, size: int, n_processes: int |
| 60 | +) -> dict[str, int]: |
| 61 | + def read_segment(file: pathlib.Path, start: int, end: int) -> str: |
| 62 | + with open(file) as f: |
| 63 | + f.seek(start) |
| 64 | + return f.read(end - start) |
63 | 65 |
|
| 66 | + def segment_stat(segment: str) -> dict[str, int]: |
| 67 | + return Counter(segment.strip()) |
64 | 68 |
|
65 |
| -def segment_stat(segment: str) -> Dict[str, int]: |
66 |
| - return Counter(segment.strip()) |
| 69 | + def count_words( |
| 70 | + file: pathlib.Path, size: int, n_processes: int, segment_index: int |
| 71 | + ) -> dict[str, int]: |
| 72 | + segment_size = size // n_processes |
| 73 | + remainder = size % n_processes |
| 74 | + start = segment_index * segment_size + min(segment_index, remainder) |
| 75 | + end = start + segment_size + (1 if segment_index < remainder else 0) |
| 76 | + return segment_stat(read_segment(file, start, end)) |
67 | 77 |
|
68 |
| - |
69 |
| -def count_words( |
70 |
| - file: pathlib.Path, size: int, n_processes: int, index: int |
71 |
| -) -> Dict[str, int]: |
72 |
| - segment_size = size // n_processes |
73 |
| - start = index * segment_size |
74 |
| - end = start + segment_size |
75 |
| - return segment_stat(read_segment(file, start, end)) |
76 |
| - |
77 |
| - |
78 |
| -def reference_exercise1(input_path: pathlib.Path, size: int) -> Dict[str, int]: |
79 |
| - workers = multiprocess.cpu_count() |
80 |
| - with ProcessPoolExecutor(workers) as executor: |
| 78 | + with ProcessPoolExecutor(n_processes) as executor: |
81 | 79 | result = executor.map(
|
82 |
| - functools.partial(count_words, input_path, size, workers), range(workers) |
| 80 | + functools.partial(count_words, input_file, size, n_processes), |
| 81 | + range(n_processes), |
83 | 82 | )
|
84 | 83 | return dict(functools.reduce(lambda x, y: x + y, result, Counter()))
|
85 | 84 |
|
86 | 85 |
|
87 |
| -@pytest.mark.parametrize("size", [1000, 10000, 100000]) |
| 86 | +random_file_sizes = [53, 123, 517, 1000, 10000] |
| 87 | + |
| 88 | + |
| 89 | +@pytest.mark.parametrize( |
| 90 | + "size, n_processes", [(s, w) for s in random_file_sizes for w in [2, 4, 5, 7]] |
| 91 | +) |
88 | 92 | def test_exercise1_total_counts(
|
89 | 93 | function_to_test: Callable,
|
90 | 94 | make_random_file: Callable[[None], pathlib.Path],
|
91 | 95 | size: int,
|
| 96 | + n_processes: int, |
92 | 97 | ):
|
93 | 98 | rf = make_random_file(size)
|
94 |
| - reference_res = reference_exercise1(rf, size) |
95 |
| - total_letters = sum(reference_res.values()) |
96 |
| - user_res = function_to_test(rf, size) |
| 99 | + user_res = function_to_test(rf, size, n_processes) |
97 | 100 | total_letters_user = sum(user_res.values())
|
98 |
| - assert total_letters == total_letters_user |
| 101 | + assert total_letters_user == size |
99 | 102 |
|
100 | 103 |
|
101 |
| -@pytest.mark.parametrize("size", [1000, 10000, 100000]) |
| 104 | +@pytest.mark.parametrize( |
| 105 | + "size, workers", [(s, w) for s in random_file_sizes for w in [2, 4, 5, 7]] |
| 106 | +) |
102 | 107 | def test_exercise1_counts(
|
103 | 108 | function_to_test: Callable,
|
104 | 109 | make_random_file: Callable[[None], pathlib.Path],
|
105 | 110 | size: int,
|
| 111 | + workers: int, |
106 | 112 | ):
|
107 | 113 | rf = make_random_file(size)
|
108 |
| - reference_res = reference_exercise1(rf, size) |
109 |
| - user_res = function_to_test(rf, size) |
110 |
| - assert user_res == reference_res |
| 114 | + # We read the file and use a counter as a trick. It is not parallel but we are |
| 115 | + # sure it is correct |
| 116 | + with open(rf) as f: |
| 117 | + file_content = f.read() |
| 118 | + # reference_res = count_words_parallel(rf, size, workers) |
| 119 | + user_res = function_to_test(rf, size, workers) |
| 120 | + assert user_res == Counter(file_content) |
111 | 121 |
|
112 | 122 |
|
113 |
| -# #TODO: find a way to test that the user is using multiprocessing (directly or indirectly) |
| 123 | +# TODO: find a way to test that the user is using multiprocessing (directly or indirectly) |
114 | 124 | # def test_exercise1_processes(function_to_test: Callable, make_random_file: Callable[[None], pathlib.Path], monkeypatch: pytest.MonkeyPatch):
|
115 |
| -# with patch.object(multiprocessing.Process, "start") as process_mock: |
116 |
| -# size = 1000 |
117 |
| -# rf = make_random_file(size) |
118 |
| -# user_res = function_to_test(rf, size) |
119 |
| -# assert process_mock.mock_calls or |
| 125 | +# n_process_mock = MagicMock() |
| 126 | +# n_process_mock.return_value = 2 |
| 127 | +# size = 1000 |
| 128 | +# rf = make_random_file(size) |
| 129 | +# user_res = function_to_test(rf, size, n_process_mock) |
| 130 | +# assert n_process_mock.called |
120 | 131 |
|
121 | 132 |
|
122 | 133 | def find_word(letters: list[str], separator: str) -> bool:
|
|
0 commit comments