Skip to content

Commit 45f9118

Browse files
committed
PERF: Optimize loadtxt usecols.
7-10% speedup in usecols benchmarks; it appears that even in the single-usecol case, avoiding the iteration over `usecols` more than compensates the cost of the extra function call to usecols_getter.
1 parent db1a343 commit 45f9118

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

numpy/lib/npyio.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -993,8 +993,8 @@ def read_data(lineno_words_iter, chunk_size):
993993
X = []
994994
for lineno, words in lineno_words_iter:
995995
if usecols:
996-
words = [words[j] for j in usecols]
997-
if len(words) != ncols:
996+
words = usecols_getter(words)
997+
elif len(words) != ncols:
998998
raise ValueError(f"Wrong number of columns at line {lineno}")
999999
# Convert each value according to its column, then pack it
10001000
# according to the dtype's nesting
@@ -1033,23 +1033,25 @@ def read_data(lineno_words_iter, chunk_size):
10331033
byte_converters = True
10341034

10351035
if usecols is not None:
1036-
# Allow usecols to be a single int or a sequence of ints
1036+
# Copy usecols, allowing it to be a single int or a sequence of ints.
10371037
try:
1038-
usecols_as_list = list(usecols)
1038+
usecols = list(usecols)
10391039
except TypeError:
1040-
usecols_as_list = [usecols]
1041-
for col_idx in usecols_as_list:
1040+
usecols = [usecols]
1041+
for i, col_idx in enumerate(usecols):
10421042
try:
1043-
opindex(col_idx)
1043+
usecols[i] = opindex(col_idx) # Cast to builtin int now.
10441044
except TypeError as e:
10451045
e.args = (
10461046
"usecols must be an int or a sequence of ints but "
10471047
"it contains at least one element of type %s" %
10481048
type(col_idx),
10491049
)
10501050
raise
1051-
# Fall back to existing code
1052-
usecols = usecols_as_list
1051+
usecols_getter = (
1052+
itemgetter(*usecols) if len(usecols) > 1 else
1053+
# Get an iterable back, even if using a single column.
1054+
lambda obj, _col=usecols[0]: [obj[_col]])
10531055

10541056
# Make sure we're dealing with a proper dtype
10551057
dtype = np.dtype(dtype)

0 commit comments

Comments
 (0)