Skip to content

Commit 008fd21

Browse files
committed
all done
1 parent 36f2113 commit 008fd21

File tree

2 files changed

+34
-27
lines changed

2 files changed

+34
-27
lines changed

ChangeLog

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
- better gobject-introspection annotations, thanks astavale
2828
- vips_image_write() severs all links between images, when it can ... thanks
2929
Warren and Nakilon
30+
- vector path for convolution is more accurate and can handle larger masks
3031

3132
29/8/17 started 8.5.9
3233
- make --fail stop jpeg read on any libjpeg warning, thanks @mceachen

libvips/convolution/convi.c

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@
7676
* - remove pts for a small speedup
7777
* 12/10/17
7878
* - fix leak of vectors, thanks MHeimbuc
79+
* 14/10/17
80+
* - switch to half-float for vector path
7981
*/
8082

8183
/*
@@ -108,8 +110,8 @@
108110
/*
109111
#define DEBUG
110112
#define DEBUG_PIXELS
111-
*/
112113
#define DEBUG_COMPILE
114+
*/
113115

114116
#ifdef HAVE_CONFIG_H
115117
#include <config.h>
@@ -124,13 +126,6 @@
124126

125127
#include "pconvolution.h"
126128

127-
/* We do the 8-bit vector path with fixed-point arithmetic. We use 3.5 bits
128-
* for the mask coefficients, so our range is -4 to +3.99, after using scale
129-
* on the mask.
130-
*/
131-
#define FIXED_BITS (5)
132-
#define FIXED_SCALE (1 << FIXED_BITS)
133-
134129
/* Larger than this and we fall back to C.
135130
*/
136131
#define MAX_PASS (20)
@@ -154,22 +149,22 @@ typedef struct {
154149
/* An int version of M.
155150
*/
156151
VipsImage *iM;
152+
int n_point; /* w * h for our matrix */
157153

158154
/* We make a smaller version of the mask with the zeros squeezed out.
159155
*/
160156
int nnz; /* Number of non-zero mask elements */
161157
int *coeff; /* Array of non-zero mask coefficients */
162158
int *coeff_pos; /* Index of each nnz element in mask->coeff */
163159

164-
/* And a half float version for a vector path. mant has the signed
160+
/* And a half float version for the vector path. mant has the signed
165161
* 8-bit mantissas in [-1, +1), sexp has the exponent shift after the
166162
* mul and before the add, and exp has the final exponent shift before
167163
* write-back.
168164
*/
169165
int *mant;
170166
int sexp;
171167
int exp;
172-
int n_point; /* Number of points in fixed-point array */
173168

174169
/* The set of passes we need for this mask.
175170
*/
@@ -180,10 +175,6 @@ typedef struct {
180175
*/
181176
int r;
182177
VipsVector *vector;
183-
184-
/* Remove later.
185-
*/
186-
int *fixed;
187178
} VipsConvi;
188179

189180
typedef VipsConvolutionClass VipsConviClass;
@@ -353,11 +344,13 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
353344

354345
char source[256];
355346
char off[256];
347+
char rnd[256];
348+
char sexp[256];
356349
char coeff[256];
357350

358351
/* Exclude zero elements.
359352
*/
360-
if( !convi->fixed[i] )
353+
if( !convi->mant[i] )
361354
continue;
362355

363356
/* The source. sl0 is the first scanline in the mask.
@@ -379,9 +372,16 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
379372
* of the image and coefficient are interesting, so we can take
380373
* the bottom half of a 16x16->32 multiply.
381374
*/
382-
CONST( coeff, convi->fixed[i], 2 );
375+
CONST( coeff, convi->mant[i], 2 );
383376
ASM3( "mullw", "value", "value", coeff );
384377

378+
/* Shift right before add to prevent overflow on large masks.
379+
*/
380+
CONST( sexp, convi->sexp, 2 );
381+
CONST( rnd, 1 << (convi->sexp - 1), 2 );
382+
ASM3( "addw", "value", "value", rnd );
383+
ASM3( "shrsw", "value", "value", sexp );
384+
385385
/* We accumulate the signed 16-bit result in sum. Saturated
386386
* add.
387387
*/
@@ -420,8 +420,8 @@ vips_convi_compile_clip( VipsConvi *convi )
420420
int offset = VIPS_RINT( vips_image_get_offset( M ) );
421421

422422
VipsVector *v;
423-
char c16[256];
424-
char c5[256];
423+
char rnd[256];
424+
char exp[256];
425425
char c0[256];
426426
char c255[256];
427427
char off[256];
@@ -436,10 +436,10 @@ vips_convi_compile_clip( VipsConvi *convi )
436436
*/
437437
TEMP( "value", 2 );
438438

439-
CONST( c16, 16, 2 );
440-
ASM3( "addw", "value", "r", c16 );
441-
CONST( c5, 5, 2 );
442-
ASM3( "shrsw", "value", "value", c5 );
439+
CONST( rnd, 1 << (convi->exp - 1), 2 );
440+
ASM3( "addw", "value", "r", rnd );
441+
CONST( exp, convi->exp, 2 );
442+
ASM3( "shrsw", "value", "value", exp );
443443

444444
CONST( off, offset, 2 );
445445
ASM3( "addw", "value", "value", off );
@@ -852,8 +852,7 @@ vips__image_intize( VipsImage *in, VipsImage **out )
852852
static int
853853
vips_convi_intize( VipsConvi *convi, VipsImage *M )
854854
{
855-
int n_point = M->Xsize * M->Ysize;
856-
855+
int n_point;
857856
VipsImage *t;
858857
double scale;
859858
double *scaled;
@@ -862,6 +861,10 @@ vips_convi_intize( VipsConvi *convi, VipsImage *M )
862861
int shift;
863862
int i;
864863

864+
n_point = M->Xsize * M->Ysize;
865+
866+
g_assert( convi->n_point == n_point );
867+
865868
if( vips_check_matrix( "vips2imask", M, &t ) )
866869
return( -1 );
867870

@@ -1003,7 +1006,7 @@ vips_convi_build( VipsObject *object )
10031006

10041007
in = convolution->in;
10051008
M = convolution->M;
1006-
convi->n_point = n_point = M->Xsize * M->Ysize;
1009+
convi->n_point = M->Xsize * M->Ysize;
10071010

10081011
if( vips_embed( in, &t[0],
10091012
M->Xsize / 2, M->Ysize / 2,
@@ -1042,12 +1045,15 @@ vips_convi_build( VipsObject *object )
10421045
convi->iM = M = t[1];
10431046

10441047
coeff = VIPS_MATRIX( M, 0, 0 );
1048+
n_point = M->Xsize * M->Ysize;
10451049
if( !(convi->coeff = VIPS_ARRAY( object, n_point, int )) ||
1046-
!(convi->coeff_pos = VIPS_ARRAY( object, n_point, int )) )
1050+
!(convi->coeff_pos =
1051+
VIPS_ARRAY( object, n_point, int )) )
10471052
return( -1 );
10481053

10491054
/* Squeeze out zero mask elements.
10501055
*/
1056+
convi->nnz = 0;
10511057
for( i = 0; i < n_point; i++ )
10521058
if( coeff[i] ) {
10531059
convi->coeff[convi->nnz] = coeff[i];
@@ -1127,7 +1133,7 @@ vips_convi_init( VipsConvi *convi )
11271133
* The output image always has the same #VipsBandFormat as the input image.
11281134
*
11291135
* For #VIPS_FORMAT_UCHAR images, vips_convi() uses a fast vector path based on
1130-
* fixed-point arithmetic. This can produce slightly different results.
1136+
* half-float arithmetic. This can produce slightly different results.
11311137
* Disable the vector path with `--vips-novector` or `VIPS_NOVECTOR` or
11321138
* vips_vector_set_enabled().
11331139
*

0 commit comments

Comments
 (0)