Skip to content

Commit c463811

Browse files
authored
reduce{h,v}: remove rounding from the double paths (libvips#3532)
1 parent c09d144 commit c463811

File tree

3 files changed

+62
-114
lines changed

3 files changed

+62
-114
lines changed

libvips/resample/reduceh.cpp

Lines changed: 11 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ vips_reduce_get_points(VipsKernel kernel, double shrink)
131131
}
132132
}
133133

134-
template <typename T, int max_value>
134+
template <typename T, T max_value>
135135
static void inline reduceh_unsigned_int_tab(VipsReduceh *reduceh,
136136
VipsPel *pout, const VipsPel *pin,
137137
const int bands, const short *restrict cx)
@@ -141,9 +141,9 @@ static void inline reduceh_unsigned_int_tab(VipsReduceh *reduceh,
141141
const int n = reduceh->n_point;
142142

143143
for (int z = 0; z < bands; z++) {
144-
int sum;
144+
typename LongT<T>::type sum;
145145

146-
sum = reduce_sum<T, int>(in + z, bands, cx, n);
146+
sum = reduce_sum<T>(in + z, bands, cx, n);
147147
sum = unsigned_fixed_round(sum);
148148
out[z] = VIPS_CLIP(0, sum, max_value);
149149
}
@@ -159,9 +159,9 @@ static void inline reduceh_signed_int_tab(VipsReduceh *reduceh,
159159
const int n = reduceh->n_point;
160160

161161
for (int z = 0; z < bands; z++) {
162-
int sum;
162+
typename LongT<T>::type sum;
163163

164-
sum = reduce_sum<T, int>(in + z, bands, cx, n);
164+
sum = reduce_sum<T>(in + z, bands, cx, n);
165165
sum = signed_fixed_round(sum);
166166
out[z] = VIPS_CLIP(min_value, sum, max_value);
167167
}
@@ -179,46 +179,7 @@ static void inline reduceh_float_tab(VipsReduceh *reduceh,
179179
const int n = reduceh->n_point;
180180

181181
for (int z = 0; z < bands; z++)
182-
out[z] = reduce_sum<T, double>(in + z, bands, cx, n);
183-
}
184-
185-
/* 32-bit int output needs a 64-bits intermediate.
186-
*/
187-
188-
template <typename T, unsigned int max_value>
189-
static void inline reduceh_unsigned_int32_tab(VipsReduceh *reduceh,
190-
VipsPel *pout, const VipsPel *pin,
191-
const int bands, const short *restrict cx)
192-
{
193-
T *restrict out = (T *) pout;
194-
const T *restrict in = (T *) pin;
195-
const int n = reduceh->n_point;
196-
197-
for (int z = 0; z < bands; z++) {
198-
uint64_t sum;
199-
200-
sum = reduce_sum<T, uint64_t>(in + z, bands, cx, n);
201-
sum = unsigned_fixed_round(sum);
202-
out[z] = VIPS_CLIP(0, sum, max_value);
203-
}
204-
}
205-
206-
template <typename T, int min_value, int max_value>
207-
static void inline reduceh_signed_int32_tab(VipsReduceh *reduceh,
208-
VipsPel *pout, const VipsPel *pin,
209-
const int bands, const short *restrict cx)
210-
{
211-
T *restrict out = (T *) pout;
212-
const T *restrict in = (T *) pin;
213-
const int n = reduceh->n_point;
214-
215-
for (int z = 0; z < bands; z++) {
216-
int64_t sum;
217-
218-
sum = reduce_sum<T, int64_t>(in + z, bands, cx, n);
219-
sum = signed_fixed_round(sum);
220-
out[z] = VIPS_CLIP(min_value, sum, max_value);
221-
}
182+
out[z] = reduce_sum<T>(in + z, bands, cx, n);
222183
}
223184

224185
/* Ultra-high-quality version for double images.
@@ -232,17 +193,13 @@ static void inline reduceh_notab(VipsReduceh *reduceh,
232193
const T *restrict in = (T *) pin;
233194
const int n = reduceh->n_point;
234195

235-
double cx[MAX_POINT];
196+
typename LongT<T>::type cx[MAX_POINT];
236197

237198
vips_reduce_make_mask(cx, reduceh->kernel, reduceh->n_point,
238199
reduceh->hshrink, x);
239200

240-
for (int z = 0; z < bands; z++) {
241-
double sum;
242-
sum = reduce_sum<T, double>(in + z, bands, cx, n);
243-
244-
out[z] = VIPS_ROUND_UINT(sum);
245-
}
201+
for (int z = 0; z < bands; z++)
202+
out[z] = reduce_sum<T>(in + z, bands, cx, n);
246203
}
247204

248205
static int
@@ -330,12 +287,12 @@ vips_reduceh_gen(VipsRegion *out_region, void *seq,
330287
break;
331288

332289
case VIPS_FORMAT_UINT:
333-
reduceh_unsigned_int32_tab<unsigned int,
290+
reduceh_unsigned_int_tab<unsigned int,
334291
UINT_MAX>(reduceh, q, p, bands, cxs);
335292
break;
336293

337294
case VIPS_FORMAT_INT:
338-
reduceh_signed_int32_tab<signed int,
295+
reduceh_signed_int_tab<signed int,
339296
INT_MIN, INT_MAX>(reduceh, q, p, bands, cxs);
340297
break;
341298

libvips/resample/reducev.cpp

Lines changed: 11 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ vips_reducev_compile(VipsReducev *reducev)
402402
/* You'd think this would vectorise, but gcc hates mixed types in nested loops
403403
* :-(
404404
*/
405-
template <typename T, int max_value>
405+
template <typename T, T max_value>
406406
static void inline reducev_unsigned_int_tab(VipsReducev *reducev,
407407
VipsPel *pout, const VipsPel *pin,
408408
const int ne, const int lskip, const short *restrict cy)
@@ -413,9 +413,9 @@ static void inline reducev_unsigned_int_tab(VipsReducev *reducev,
413413
const int l1 = lskip / sizeof(T);
414414

415415
for (int z = 0; z < ne; z++) {
416-
int sum;
416+
typename LongT<T>::type sum;
417417

418-
sum = reduce_sum<T, int>(in + z, l1, cy, n);
418+
sum = reduce_sum<T>(in + z, l1, cy, n);
419419
sum = unsigned_fixed_round(sum);
420420
out[z] = VIPS_CLIP(0, sum, max_value);
421421
}
@@ -432,9 +432,9 @@ static void inline reducev_signed_int_tab(VipsReducev *reducev,
432432
const int l1 = lskip / sizeof(T);
433433

434434
for (int z = 0; z < ne; z++) {
435-
int sum;
435+
typename LongT<T>::type sum;
436436

437-
sum = reduce_sum<T, int>(in + z, l1, cy, n);
437+
sum = reduce_sum<T>(in + z, l1, cy, n);
438438
sum = signed_fixed_round(sum);
439439
out[z] = VIPS_CLIP(min_value, sum, max_value);
440440
}
@@ -453,48 +453,7 @@ static void inline reducev_float_tab(VipsReducev *reducev,
453453
const int l1 = lskip / sizeof(T);
454454

455455
for (int z = 0; z < ne; z++)
456-
out[z] = reduce_sum<T, double>(in + z, l1, cy, n);
457-
}
458-
459-
/* 32-bit int output needs a 64-bits intermediate.
460-
*/
461-
462-
template <typename T, unsigned int max_value>
463-
static void inline reducev_unsigned_int32_tab(VipsReducev *reducev,
464-
VipsPel *pout, const VipsPel *pin,
465-
const int ne, const int lskip, const short *restrict cy)
466-
{
467-
T *restrict out = (T *) pout;
468-
const T *restrict in = (T *) pin;
469-
const int n = reducev->n_point;
470-
const int l1 = lskip / sizeof(T);
471-
472-
for (int z = 0; z < ne; z++) {
473-
uint64_t sum;
474-
475-
sum = reduce_sum<T, uint64_t>(in + z, l1, cy, n);
476-
sum = unsigned_fixed_round(sum);
477-
out[z] = VIPS_CLIP(0, sum, max_value);
478-
}
479-
}
480-
481-
template <typename T, int min_value, int max_value>
482-
static void inline reducev_signed_int32_tab(VipsReducev *reducev,
483-
VipsPel *pout, const VipsPel *pin,
484-
const int ne, const int lskip, const short *restrict cy)
485-
{
486-
T *restrict out = (T *) pout;
487-
const T *restrict in = (T *) pin;
488-
const int n = reducev->n_point;
489-
const int l1 = lskip / sizeof(T);
490-
491-
for (int z = 0; z < ne; z++) {
492-
int64_t sum;
493-
494-
sum = reduce_sum<T, int64_t>(in + z, l1, cy, n);
495-
sum = signed_fixed_round(sum);
496-
out[z] = VIPS_CLIP(min_value, sum, max_value);
497-
}
456+
out[z] = reduce_sum<T>(in + z, l1, cy, n);
498457
}
499458

500459
/* Ultra-high-quality version for double images.
@@ -509,17 +468,13 @@ static void inline reducev_notab(VipsReducev *reducev,
509468
const int n = reducev->n_point;
510469
const int l1 = lskip / sizeof(T);
511470

512-
double cy[MAX_POINT];
471+
typename LongT<T>::type cy[MAX_POINT];
513472

514473
vips_reduce_make_mask(cy, reducev->kernel, reducev->n_point,
515474
reducev->vshrink, y);
516475

517-
for (int z = 0; z < ne; z++) {
518-
double sum;
519-
sum = reduce_sum<T, double>(in + z, l1, cy, n);
520-
521-
out[z] = VIPS_ROUND_UINT(sum);
522-
}
476+
for (int z = 0; z < ne; z++)
477+
out[z] = reduce_sum<T>(in + z, l1, cy, n);
523478
}
524479

525480
static int
@@ -591,12 +546,12 @@ vips_reducev_gen(VipsRegion *out_region, void *vseq,
591546
break;
592547

593548
case VIPS_FORMAT_UINT:
594-
reducev_unsigned_int32_tab<unsigned int,
549+
reducev_unsigned_int_tab<unsigned int,
595550
UINT_MAX>(reducev, q, p, ne, lskip, cys);
596551
break;
597552

598553
case VIPS_FORMAT_INT:
599-
reducev_signed_int32_tab<signed int,
554+
reducev_signed_int_tab<signed int,
600555
INT_MIN, INT_MAX>(reducev, q, p, ne, lskip, cys);
601556
break;
602557

libvips/resample/templates.h

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
2929
*/
3030

31+
#include <cstdint>
32+
3133
/*
3234
* Various casts which assume that the data is already in range. (That
3335
* is, they are to be used with monotone samplers.)
@@ -149,7 +151,7 @@ static T inline bilinear_nosign(
149151
template <typename T>
150152
static T inline unsigned_fixed_round(T v)
151153
{
152-
const T round_by = VIPS_INTERPOLATE_SCALE >> 1;
154+
const int round_by = VIPS_INTERPOLATE_SCALE >> 1;
153155

154156
return (v + round_by) >> VIPS_INTERPOLATE_SHIFT;
155157
}
@@ -200,8 +202,8 @@ static int inline bicubic_unsigned_int(
200202
template <typename T>
201203
static T inline signed_fixed_round(T v)
202204
{
203-
const T sign_of_v = 2 * (v >= 0) - 1;
204-
const T round_by = sign_of_v * (VIPS_INTERPOLATE_SCALE >> 1);
205+
const int sign_of_v = 2 * (v >= 0) - 1;
206+
const int round_by = sign_of_v * (VIPS_INTERPOLATE_SCALE >> 1);
205207

206208
return (v + round_by) >> VIPS_INTERPOLATE_SHIFT;
207209
}
@@ -473,10 +475,44 @@ vips_reduce_make_mask(T *c, VipsKernel kernel, const int n_points,
473475
}
474476
}
475477

478+
/* Machinery to promote type T to a larger data type, prevents an
479+
* overflow in reduce_sum(). Defaults to a 32-bit integral type.
480+
*/
481+
template <typename T>
482+
struct LongT {
483+
typedef int32_t type;
484+
};
485+
486+
/* 32-bit integral types needs a 64-bits intermediate.
487+
*/
488+
template <>
489+
struct LongT<int32_t> {
490+
typedef int64_t type;
491+
};
492+
493+
template <>
494+
struct LongT<uint32_t> {
495+
typedef int64_t type;
496+
};
497+
498+
/* 32-bit floating-point types needs a 64-bits intermediate.
499+
*/
500+
template <>
501+
struct LongT<float> {
502+
typedef double type;
503+
};
504+
505+
/* 64-bit floating-point types needs a 128-bits intermediate.
506+
*/
507+
template <>
508+
struct LongT<double> {
509+
typedef long double type;
510+
};
511+
476512
/* Our inner loop for resampling with a convolution of type CT. Operate on
477513
* elements of type T, gather results in an intermediate of type IT.
478514
*/
479-
template <typename T, typename IT, typename CT>
515+
template <typename T, typename CT, typename IT = typename LongT<T>::type>
480516
static IT inline reduce_sum(const T *restrict in, int stride,
481517
const CT *restrict c, int n)
482518
{

0 commit comments

Comments
 (0)