Skip to content

Commit b32cb5e

Browse files
authored
SIMD optimizations with Highway (#3618)
1 parent 57b7b9e commit b32cb5e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+2883
-1525
lines changed

.github/workflows/ci.yml

+3-2
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,17 @@ jobs:
5858
libfftw3-dev libexif-dev libjpeg-turbo8-dev
5959
libpng-dev libwebp-dev libtiff5-dev
6060
libheif-dev libexpat1-dev libcfitsio-dev
61-
libmatio-dev libnifti-dev liborc-0.4-dev
61+
libmatio-dev libnifti-dev libhwy-dev
6262
liblcms2-dev libpoppler-glib-dev librsvg2-dev
6363
libopenexr-dev libpango1.0-dev libarchive-dev
6464
libopenslide-dev libffi-dev libopenjp2-7-dev
6565
libimagequant-dev libcgif-dev
66+
liborc-0.4-dev # FIXME: Remove once libhwy 1.0.5 is available.
6667

6768
- name: Install macOS dependencies
6869
if: runner.os == 'macOS'
6970
run: |
70-
brew install meson ninja fftw fontconfig glib libexif libarchive little-cms2 orc pango pkg-config
71+
brew install meson ninja fftw fontconfig glib libexif libarchive little-cms2 highway pango pkg-config
7172
brew install cfitsio cgif jpeg-xl libheif libimagequant mozjpeg libmatio librsvg libspng libtiff openexr openjpeg openslide poppler webp
7273
7374
- name: Install Clang 14

ChangeLog

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
- add support for 16-bit float TIFFs [DarthSim]
2727
- add direct mode to dzsave [jcupitt]
2828
- require C++11 as a minimum standard [kleisauke]
29+
- add support for SIMD via Highway [kleisauke]
2930

3031
18/9/23 8.14.5
3132

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,10 @@ enabling a package with such a large attack surface.
227227
If available, libvips adds support for text rendering. You need the
228228
package pangocairo in `pkg-config --list-all`.
229229

230-
### orc-0.4
230+
### highway
231231

232-
If available, vips will accelerate some operations with this run-time
233-
compiler.
232+
If present, libvips will accelerate some operations with SIMD. If not, it
233+
will look for the orc-0.4 package.
234234

235235
### matio
236236

fuzz/oss_fuzz_build.sh

+15
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,21 @@ cat > $WORK/lib/pkgconfig/pdfium.pc << EOF
177177
Cflags: -I\${includedir}
178178
EOF
179179

180+
# highway
181+
pushd $SRC/highway
182+
cmake \
183+
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
184+
-DCMAKE_INSTALL_PREFIX=$WORK \
185+
-DBUILD_SHARED_LIBS=0 \
186+
-DBUILD_TESTING=0 \
187+
-DHWY_ENABLE_CONTRIB=0 \
188+
-DHWY_ENABLE_EXAMPLES=0 \
189+
-DHWY_ENABLE_TESTS=0 \
190+
.
191+
make -j$(nproc)
192+
make install
193+
popd
194+
180195
# libvips
181196
# Disable building man pages, gettext po files, tools, and tests
182197
sed -i "/subdir('man')/{N;N;N;d;}" meson.build

libvips/arithmetic/abs.c

+25-68
Original file line numberDiff line numberDiff line change
@@ -80,39 +80,11 @@ typedef VipsUnaryClass VipsAbsClass;
8080

8181
G_DEFINE_TYPE(VipsAbs, vips_abs, VIPS_TYPE_UNARY);
8282

83-
static void *
84-
vips_abs_orc_init_cb(void *a)
85-
{
86-
VipsAbs *abs = (VipsAbs *) a;
87-
VipsArithmeticClass *aclass = VIPS_ARITHMETIC_GET_CLASS(abs);
88-
89-
VipsVector *v;
90-
91-
vips_arithmetic_set_vector(aclass);
92-
93-
v = vips_arithmetic_get_program(aclass, VIPS_FORMAT_CHAR);
94-
vips_vector_asm2(v, "absb", "d1", "s1");
95-
96-
v = vips_arithmetic_get_program(aclass, VIPS_FORMAT_SHORT);
97-
vips_vector_asm2(v, "absw", "d1", "s1");
98-
99-
v = vips_arithmetic_get_program(aclass, VIPS_FORMAT_INT);
100-
vips_vector_asm2(v, "absl", "d1", "s1");
101-
102-
vips_arithmetic_compile(aclass);
103-
104-
return NULL;
105-
}
106-
10783
static int
10884
vips_abs_build(VipsObject *object)
10985
{
110-
static GOnce once = G_ONCE_INIT;
111-
11286
VipsUnary *unary = (VipsUnary *) object;
11387

114-
VIPS_ONCE(&once, vips_abs_orc_init_cb, object);
115-
11688
if (unary->in &&
11789
vips_band_format_isuint(unary->in->BandFmt))
11890
return vips_unary_copy(unary);
@@ -199,50 +171,35 @@ static void
199171
vips_abs_buffer(VipsArithmetic *arithmetic,
200172
VipsPel *out, VipsPel **in, int width)
201173
{
202-
VipsArithmeticClass *class = VIPS_ARITHMETIC_GET_CLASS(arithmetic);
203174
VipsImage *im = arithmetic->ready[0];
204175
const int bands = vips_image_get_bands(im);
205176
int sz = width * bands;
206177

207-
VipsVector *v;
208-
209-
if ((v = vips_arithmetic_get_vector(class,
210-
vips_image_get_format(im)))) {
211-
VipsExecutor ex;
212-
213-
vips_executor_set_program(&ex, v, sz);
214-
vips_executor_set_array(&ex, v->s[0], in[0]);
215-
vips_executor_set_destination(&ex, out);
216-
217-
vips_executor_run(&ex);
218-
}
219-
else {
220-
switch (vips_image_get_format(im)) {
221-
case VIPS_FORMAT_CHAR:
222-
ABS_INT(signed char);
223-
break;
224-
case VIPS_FORMAT_SHORT:
225-
ABS_INT(signed short);
226-
break;
227-
case VIPS_FORMAT_INT:
228-
ABS_INT(signed int);
229-
break;
230-
case VIPS_FORMAT_FLOAT:
231-
ABS_FLOAT(float);
232-
break;
233-
case VIPS_FORMAT_DOUBLE:
234-
ABS_FLOAT(double);
235-
break;
236-
case VIPS_FORMAT_COMPLEX:
237-
ABS_COMPLEX(float);
238-
break;
239-
case VIPS_FORMAT_DPCOMPLEX:
240-
ABS_COMPLEX(double);
241-
break;
242-
243-
default:
244-
g_assert_not_reached();
245-
}
178+
switch (vips_image_get_format(im)) {
179+
case VIPS_FORMAT_CHAR:
180+
ABS_INT(signed char);
181+
break;
182+
case VIPS_FORMAT_SHORT:
183+
ABS_INT(signed short);
184+
break;
185+
case VIPS_FORMAT_INT:
186+
ABS_INT(signed int);
187+
break;
188+
case VIPS_FORMAT_FLOAT:
189+
ABS_FLOAT(float);
190+
break;
191+
case VIPS_FORMAT_DOUBLE:
192+
ABS_FLOAT(double);
193+
break;
194+
case VIPS_FORMAT_COMPLEX:
195+
ABS_COMPLEX(float);
196+
break;
197+
case VIPS_FORMAT_DPCOMPLEX:
198+
ABS_COMPLEX(double);
199+
break;
200+
201+
default:
202+
g_assert_not_reached();
246203
}
247204
}
248205

libvips/arithmetic/add.c

-3
Original file line numberDiff line numberDiff line change
@@ -268,9 +268,6 @@ vips_add_init(VipsAdd *add)
268268
* In other words, the output type is just large enough to hold the whole
269269
* range of possible values.
270270
*
271-
* Operations on integer images are performed using the processor's vector unit,
272-
* if possible. Disable this with --vips-novector or VIPS_NOVECTOR.
273-
*
274271
* See also: vips_subtract(), vips_linear().
275272
*
276273
* Returns: 0 on success, -1 on error

libvips/arithmetic/arithmetic.c

-77
Original file line numberDiff line numberDiff line change
@@ -750,83 +750,6 @@ vips_arithmetic_set_format_table(VipsArithmeticClass *class,
750750
class->format_table = format_table;
751751
}
752752

753-
void
754-
vips_arithmetic_set_vector(VipsArithmeticClass *class)
755-
{
756-
int i;
757-
758-
g_assert(class->format_table);
759-
760-
for (i = 0; i < VIPS_FORMAT_LAST; i++) {
761-
int isize = vips_format_sizeof(i);
762-
int osize = vips_format_sizeof((int) class->format_table[i]);
763-
764-
VipsVector *v;
765-
766-
v = vips_vector_new("arithmetic", osize);
767-
768-
vips_vector_source_name(v, "s1", isize);
769-
vips_vector_source_name(v, "s2", isize);
770-
vips_vector_temporary(v, "t1", osize);
771-
vips_vector_temporary(v, "t2", osize);
772-
773-
class->vectors[i] = v;
774-
}
775-
}
776-
777-
/* Get the stub for this program ... use _get_vector() to get the compiled
778-
* code.
779-
*/
780-
VipsVector *
781-
vips_arithmetic_get_program(VipsArithmeticClass *class, VipsBandFormat fmt)
782-
{
783-
g_assert((int) fmt >= 0 && (int) fmt < VIPS_FORMAT_LAST);
784-
g_assert(!class->vector_program[fmt]);
785-
786-
class->vector_program[fmt] = TRUE;
787-
788-
return class->vectors[fmt];
789-
}
790-
791-
/* Get the compiled code for this type, if available.
792-
*/
793-
VipsVector *
794-
vips_arithmetic_get_vector(VipsArithmeticClass *class, VipsBandFormat fmt)
795-
{
796-
g_assert(fmt >= 0 && fmt < VIPS_FORMAT_LAST);
797-
798-
if (!vips_vector_isenabled() ||
799-
!class->vector_program[fmt])
800-
return NULL;
801-
802-
return class->vectors[fmt];
803-
}
804-
805-
void
806-
vips_arithmetic_compile(VipsArithmeticClass *class)
807-
{
808-
int i;
809-
810-
g_assert(class->format_table);
811-
812-
for (i = 0; i < VIPS_FORMAT_LAST; i++)
813-
if (class->vector_program[i] &&
814-
!vips_vector_compile(class->vectors[i]))
815-
/* If compilation fails, turn off the vector for this
816-
* type.
817-
*/
818-
class->vector_program[i] = FALSE;
819-
820-
#ifdef DEBUG
821-
printf("vips_arithmetic_compile: ");
822-
for (i = 0; i < VIPS_FORMAT_LAST; i++)
823-
if (class->vector_program[i])
824-
printf("%s ",
825-
vips_enum_nick(VIPS_TYPE_BAND_FORMAT, i));
826-
printf("\n");
827-
#endif /*DEBUG*/
828-
}
829-
830753
/* Called from iofuncs to init all operations in this dir. Use a plugin system
831754
* instead?
832755
*/

libvips/arithmetic/parithmetic.h

-15
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ extern "C" {
3636
#endif /*__cplusplus*/
3737

3838
#include <vips/vips.h>
39-
#include <vips/vector.h>
4039

4140
#define VIPS_TYPE_ARITHMETIC (vips_arithmetic_get_type())
4241
#define VIPS_ARITHMETIC(obj) \
@@ -91,14 +90,6 @@ typedef struct _VipsArithmeticClass {
9190
*/
9291
const VipsBandFormat *format_table;
9392

94-
/* A vector program for each input type.
95-
*/
96-
VipsVector *vectors[VIPS_FORMAT_LAST];
97-
98-
/* ... and if we've set a program for this format.
99-
*/
100-
gboolean vector_program[VIPS_FORMAT_LAST];
101-
10293
/* The buffer processor.
10394
*/
10495
VipsArithmeticProcessFn process_line;
@@ -108,12 +99,6 @@ GType vips_arithmetic_get_type(void);
10899

109100
void vips_arithmetic_set_format_table(VipsArithmeticClass *klass,
110101
const VipsBandFormat *format_table);
111-
void vips_arithmetic_set_vector(VipsArithmeticClass *klass);
112-
VipsVector *vips_arithmetic_get_vector(VipsArithmeticClass *klass,
113-
VipsBandFormat fmt);
114-
void vips_arithmetic_compile(VipsArithmeticClass *klass);
115-
VipsVector *vips_arithmetic_get_program(VipsArithmeticClass *klass,
116-
VipsBandFormat fmt);
117102

118103
#ifdef __cplusplus
119104
}

libvips/arithmetic/statistic.h

-2
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
extern "C" {
3636
#endif /*__cplusplus*/
3737

38-
#include <vips/vector.h>
39-
4038
#define VIPS_TYPE_STATISTIC (vips_statistic_get_type())
4139
#define VIPS_STATISTIC(obj) \
4240
(G_TYPE_CHECK_INSTANCE_CAST((obj), \

libvips/conversion/pconversion.h

-2
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
extern "C" {
3636
#endif /*__cplusplus*/
3737

38-
#include <vips/vector.h>
39-
4038
#define VIPS_TYPE_CONVERSION (vips_conversion_get_type())
4139
#define VIPS_CONVERSION(obj) \
4240
(G_TYPE_CHECK_INSTANCE_CAST((obj), \

libvips/convolution/conv.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ vips_conv_init(VipsConv *conv)
197197
*
198198
* For #VIPS_FORMAT_UCHAR images and #VIPS_PRECISION_INTEGER @precision,
199199
* vips_conv() uses a fast vector path based on
200-
* fixed-point arithmetic. This can produce slightly different results.
200+
* half-float arithmetic. This can produce slightly different results.
201201
* Disable the vector path with `--vips-novector` or `VIPS_NOVECTOR` or
202202
* vips_vector_set_enabled().
203203
*

libvips/convolution/conva.c

-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ sys 0m0.100s
9595
#include <math.h>
9696

9797
#include <vips/vips.h>
98-
#include <vips/vector.h>
9998
#include <vips/debug.h>
10099
#include <vips/internal.h>
101100

libvips/convolution/convasep.c

-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@
8383
#include <math.h>
8484

8585
#include <vips/vips.h>
86-
#include <vips/vector.h>
8786
#include <vips/debug.h>
8887
#include <vips/internal.h>
8988

libvips/convolution/convf.c

+6-2
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@
8080
8181
*/
8282

83+
/*
84+
#define DEBUG
85+
*/
86+
8387
#ifdef HAVE_CONFIG_H
8488
#include <config.h>
8589
#endif /*HAVE_CONFIG_H*/
@@ -316,8 +320,8 @@ vips_convf_build(VipsObject *object)
316320
convf->nnz += 1;
317321
}
318322

319-
/* Was the whole mask zero? We must have at least 1 element in there:
320-
* set it to zero.
323+
/* Was the whole mask zero? We must have at least 1 element
324+
* in there: set it to zero.
321325
*/
322326
if (convf->nnz == 0) {
323327
convf->coeff[0] = 0;

0 commit comments

Comments
 (0)