Skip to content

ENH: Add SIMD operation copysign #19770

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion benchmarks/benchmarks/bench_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ def time_double_add(self):

def time_double_add_temp(self):
1. + self.d + 1.

def time_copysign(self):
np.copysign(self.d, self.d)


class CustomScalar(Benchmark):
Expand Down Expand Up @@ -166,7 +169,6 @@ def time_add_scalar_conv(self):
def time_add_scalar_conv_complex(self):
(self.y + self.z)


class ArgPack:
__slots__ = ['args', 'kwargs']
def __init__(self, *args, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions numpy/core/src/_simd/_simd.dispatch.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ SIMD_IMPL_INTRIN_2(@intrin@_@sfx@, v@sfx@, v@sfx@, v@sfx@)

#if @fp_only@
/**begin repeat1
* #intrin = maxp, minp#
* #intrin = maxp, minp, copysign#
*/
SIMD_IMPL_INTRIN_2(@intrin@_@sfx@, v@sfx@, v@sfx@, v@sfx@)
/**end repeat1**/
Expand Down Expand Up @@ -629,7 +629,7 @@ SIMD_INTRIN_DEF(@intrin@_@sfx@)

#if @fp_only@
/**begin repeat1
* #intrin = maxp, minp#
* #intrin = maxp, minp, copysign#
*/
SIMD_INTRIN_DEF(@intrin@_@sfx@)
/**end repeat1**/
Expand Down
10 changes: 10 additions & 0 deletions numpy/core/src/common/simd/avx2/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,14 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
return _mm256_blendv_epi8(a, b, _mm256_cmpgt_epi64(a, b));
}

// copysign
NPY_FINLINE npyv_f32 npyv_copysign_f32(npyv_f32 a, npyv_f32 b)
{
return _mm256_or_ps(a, _mm256_and_ps(b, _mm256_set1_ps(-0.0)));
}
NPY_FINLINE npyv_f64 npyv_copysign_f64(npyv_f64 a, npyv_f64 b)
{
return _mm256_or_pd(a, _mm256_and_pd(b, _mm256_set1_pd(-0.0)));
}

#endif // _NPY_SIMD_AVX2_MATH_H
10 changes: 10 additions & 0 deletions numpy/core/src/common/simd/avx512/operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,4 +321,14 @@ NPY_FINLINE npyv_b32 npyv_notnan_f32(npyv_f32 a)
NPY_FINLINE npyv_b64 npyv_notnan_f64(npyv_f64 a)
{ return _mm512_cmp_pd_mask(a, a, _CMP_ORD_Q); }

// copysign
NPY_FINLINE npyv_f32 npyv_copysign_f32(npyv_f32 a, npyv_f32 b)
{
return npyv_or_f32(a, npyv_and_f32(b, _mm512_set1_ps(-0.0)));
}
NPY_FINLINE npyv_f64 npyv_copysign_f64(npyv_f64 a, npyv_f64 b)
{
return npyv_or_f64(a, npyv_and_f64(b, _mm512_set1_pd(-0.0)));
}

#endif // _NPY_SIMD_AVX512_OPERATORS_H
12 changes: 12 additions & 0 deletions numpy/core/src/common/simd/neon/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,4 +153,16 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
return vbslq_s64(npyv_cmplt_s64(a, b), a, b);
}

// copysign
NPY_FINLINE npyv_f32 npyv_copysign_f32(npyv_f32 a, npyv_f32 b)
{
return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a), vandq_u32(vreinterpretq_u32_f32(b), vdupq_n_u32(0x80000000))));
}
#if NPY_SIMD_F64
NPY_FINLINE npyv_f64 npyv_copysign_f64(npyv_f64 a, npyv_f64 b)
{
return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a), vandq_u64(vreinterpretq_u64_f64(b), vdupq_n_u64(0x8000000000000000))));
}
#endif

#endif // _NPY_SIMD_NEON_MATH_H
10 changes: 10 additions & 0 deletions numpy/core/src/common/simd/sse/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,14 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
return npyv_select_s64(npyv_cmplt_s64(a, b), a, b);
}

// copysign
NPY_FINLINE npyv_f32 npyv_copysign_f32(npyv_f32 a, npyv_f32 b)
{
return _mm_or_ps(a, _mm_and_ps(b, _mm_set1_ps(-0.0)));
}
NPY_FINLINE npyv_f64 npyv_copysign_f64(npyv_f64 a, npyv_f64 b)
{
return _mm_or_pd(a, _mm_and_pd(b, _mm_set1_pd(-0.0)));
}

#endif // _NPY_SIMD_SSE_MATH_H
10 changes: 10 additions & 0 deletions numpy/core/src/common/simd/vsx/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,14 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_min_u64 vec_min
#define npyv_min_s64 vec_min

// copysign
NPY_FINLINE npyv_f32 npyv_copysign_f32(npyv_f32 a, npyv_f32 b)
{
return npyv_or_f32(a, npyv_and_f32(b, npyv_setall_f32(-0.0)));
}
NPY_FINLINE npyv_f64 npyv_copysign_f64(npyv_f64 a, npyv_f64 b)
{
return npyv_or_f64(a, npyv_and_f64(b, npyv_setall_f64(-0.0)));
}

#endif // _NPY_SIMD_VSX_MATH_H
10 changes: 10 additions & 0 deletions numpy/core/tests/test_simd.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,16 @@ def test_special_cases(self):
nnan = self.notnan(self.setall(self._nan()))
assert nnan == [0]*self.nlanes

def test_copysign(self):
data_a = self._data()
data_b = self._data(reverse=True)
vdata_a = self.load(data_a)
vdata_b = self.load(data_b)

data_copysign = [math.copysign(a, b) for a, b in zip(data_a, data_b)]
vcopysign = self.copysign(vdata_a, vdata_b)
assert vcopysign == data_copysign

class _SIMD_ALL(_Test_Utility):
"""
To test all vector types at once
Expand Down