Skip to content

Commit 157b49f

Browse files
committed
Readded the calculation in a serial mode for profiling which was lost during the API changes
1 parent e8f1afa commit 157b49f

File tree

2 files changed

+30
-12
lines changed

2 files changed

+30
-12
lines changed

tests/feature_extraction/test_extraction.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,18 @@ def test_extract_features_per_sample_equals_per_kind(self):
175175
features_per_kind = extract_features(df, column_id="id", column_sort="sort", column_kind="kind",
176176
column_value="val", parallelization='per_kind',
177177
n_processes=self.n_processes)
178+
features_serial = extract_features(df, column_id="id", column_sort="sort", column_kind="kind",
179+
column_value="val", parallelization='serial')
178180

179181
six.assertCountEqual(self, features_per_sample.columns, features_per_kind.columns)
182+
six.assertCountEqual(self, features_per_sample.columns, features_serial.columns)
180183

181184
for col in features_per_sample.columns:
182185
self.assertIsNone(np.testing.assert_array_almost_equal(features_per_sample[col],
183186
features_per_kind[col]))
187+
self.assertIsNone(np.testing.assert_array_almost_equal(features_per_sample[col],
188+
features_serial[col]))
189+
184190

185191
class ParallelExtractionTestCase(DataTestCase):
186192
def setUp(self):

tsfresh/feature_extraction/extraction.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def extract_features(timeseries_container, default_fc_parameters=None,
129129
"""
130130
import logging
131131
logging.basicConfig()
132-
132+
133133
# Always use the standardized way of storing the data.
134134
# See the function normalize_input_to_internal_representation for more information.
135135
kind_to_df_map, column_id, column_value = \
@@ -155,9 +155,11 @@ def extract_features(timeseries_container, default_fc_parameters=None,
155155

156156
# Calculate the result
157157
if parallelization == 'per_kind':
158-
calculation_function = _extract_features_parallel_per_kind
158+
calculation_function = _extract_features_per_kind
159159
elif parallelization == 'per_sample':
160160
calculation_function = _extract_features_parallel_per_sample
161+
elif parallelization == 'serial':
162+
calculation_function = partial(_extract_features_per_kind, serial=True)
161163
else:
162164
raise ValueError("Argument parallelization must be one of: 'per_kind', 'per_sample'")
163165

@@ -181,14 +183,15 @@ def extract_features(timeseries_container, default_fc_parameters=None,
181183
return result
182184

183185

184-
def _extract_features_parallel_per_kind(kind_to_df_map,
185-
column_id, column_value,
186-
default_fc_parameters,
187-
kind_to_fc_parameters=None,
188-
chunksize=defaults.CHUNKSIZE,
189-
n_processes=defaults.N_PROCESSES, show_warnings=defaults.SHOW_WARNINGS,
190-
disable_progressbar=defaults.DISABLE_PROGRESSBAR,
191-
impute_function=defaults.IMPUTE_FUNCTION):
186+
def _extract_features_per_kind(kind_to_df_map,
187+
column_id, column_value,
188+
default_fc_parameters,
189+
kind_to_fc_parameters=None,
190+
chunksize=defaults.CHUNKSIZE,
191+
n_processes=defaults.N_PROCESSES, show_warnings=defaults.SHOW_WARNINGS,
192+
disable_progressbar=defaults.DISABLE_PROGRESSBAR,
193+
impute_function=defaults.IMPUTE_FUNCTION,
194+
serial=False):
192195
"""
193196
Parallelize the feature extraction per kind.
194197
@@ -226,6 +229,9 @@ def _extract_features_parallel_per_kind(kind_to_df_map,
226229
:param impute_function: None, if no imputing should happen or the function to call for imputing.
227230
:type impute_function: None or function
228231
232+
:param serial: Run in serial instead of in parallel mode for performance testing
233+
:type serial: bool
234+
229235
:return: The (maybe imputed) DataFrame containing extracted features.
230236
:rtype: pandas.DataFrame
231237
"""
@@ -241,8 +247,14 @@ def _extract_features_parallel_per_kind(kind_to_df_map,
241247
chunksize = _calculate_best_chunksize(kind_to_df_map, n_processes)
242248

243249
total_number_of_expected_results = len(kind_to_df_map)
244-
extracted_features = tqdm(pool.imap_unordered(partial_extract_features_for_one_time_series, kind_to_df_map.items(),
245-
chunksize=chunksize), total=total_number_of_expected_results,
250+
251+
if serial:
252+
map_function = map
253+
else:
254+
map_function = partial(pool.imap_unordered, chunksize=chunksize)
255+
256+
extracted_features = tqdm(map_function(partial_extract_features_for_one_time_series, kind_to_df_map.items()),
257+
total=total_number_of_expected_results,
246258
desc="Feature Extraction", disable=disable_progressbar)
247259
pool.close()
248260

0 commit comments

Comments
 (0)