libvips
diff --git a/‎.github/workflows/ci.yml
+3-2 b/‎.github/workflows/ci.yml
+3-2
diff --git a/‎ChangeLog
+1 b/‎ChangeLog
+1
diff --git a/‎README.md
+3-3 b/‎README.md
+3-3
diff --git a/‎fuzz/oss_fuzz_build.sh
+15 b/‎fuzz/oss_fuzz_build.sh
+15
diff --git a/‎libvips/arithmetic/abs.c
+25-68 b/‎libvips/arithmetic/abs.c
+25-68
diff --git a/‎libvips/arithmetic/add.c
-3 b/‎libvips/arithmetic/add.c
-3
diff --git a/‎libvips/arithmetic/arithmetic.c
-77 b/‎libvips/arithmetic/arithmetic.c
-77
diff --git a/‎libvips/arithmetic/parithmetic.h
-15 b/‎libvips/arithmetic/parithmetic.h
-15
diff --git a/‎libvips/arithmetic/statistic.h
-2 b/‎libvips/arithmetic/statistic.h
-2
diff --git a/‎libvips/conversion/pconversion.h
-2 b/‎libvips/conversion/pconversion.h
-2
diff --git a/‎libvips/convolution/conv.c
+1-1 b/‎libvips/convolution/conv.c
+1-1
diff --git a/‎libvips/convolution/conva.c
-1 b/‎libvips/convolution/conva.c
-1
diff --git a/‎libvips/convolution/convasep.c
-1 b/‎libvips/convolution/convasep.c
-1
diff --git a/‎libvips/convolution/convf.c
+6-2 b/‎libvips/convolution/convf.c
+6-2
@@ -58,16 +58,17 @@ jobs:
             libfftw3-dev libexif-dev libjpeg-turbo8-dev
             libpng-dev libwebp-dev libtiff5-dev
             libheif-dev libexpat1-dev libcfitsio-dev
-            libmatio-dev libnifti-dev liborc-0.4-dev
+            libmatio-dev libnifti-dev libhwy-dev
             liblcms2-dev libpoppler-glib-dev librsvg2-dev
             libopenexr-dev libpango1.0-dev libarchive-dev
             libopenslide-dev libffi-dev libopenjp2-7-dev
             libimagequant-dev libcgif-dev
+            liborc-0.4-dev # FIXME: Remove once libhwy 1.0.5 is available.
 
       - name: Install macOS dependencies
         if: runner.os == 'macOS'
         run: |
-          brew install meson ninja fftw fontconfig glib libexif libarchive little-cms2 orc pango pkg-config
+          brew install meson ninja fftw fontconfig glib libexif libarchive little-cms2 highway pango pkg-config
           brew install cfitsio cgif jpeg-xl libheif libimagequant mozjpeg libmatio librsvg libspng libtiff openexr openjpeg openslide poppler webp
 
       - name: Install Clang 14
 
@@ -26,6 +26,7 @@
 - add support for 16-bit float TIFFs [DarthSim]
 - add direct mode to dzsave [jcupitt]
 - require C++11 as a minimum standard [kleisauke]
+- add support for SIMD via Highway [kleisauke]
 
 18/9/23 8.14.5
 
 
@@ -227,10 +227,10 @@ enabling a package with such a large attack surface.
 If available, libvips adds support for text rendering. You need the
 package pangocairo in `pkg-config --list-all`.
 
-### orc-0.4
+### highway
 
-If available, vips will accelerate some operations with this run-time
-compiler.
+If present, libvips will accelerate some operations with SIMD. If not, it
+will look for the orc-0.4 package.
 
 ### matio
 
 
@@ -177,6 +177,21 @@ cat > $WORK/lib/pkgconfig/pdfium.pc << EOF
   Cflags: -I\${includedir}
 EOF
 
+# highway
+pushd $SRC/highway
+cmake \
+  -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+  -DCMAKE_INSTALL_PREFIX=$WORK \
+  -DBUILD_SHARED_LIBS=0 \
+  -DBUILD_TESTING=0 \
+  -DHWY_ENABLE_CONTRIB=0 \
+  -DHWY_ENABLE_EXAMPLES=0 \
+  -DHWY_ENABLE_TESTS=0 \
+  .
+make -j$(nproc)
+make install
+popd
+
 # libvips
 # Disable building man pages, gettext po files, tools, and tests
 sed -i "/subdir('man')/{N;N;N;d;}" meson.build
 
@@ -80,39 +80,11 @@ typedef VipsUnaryClass VipsAbsClass;
 
 G_DEFINE_TYPE(VipsAbs, vips_abs, VIPS_TYPE_UNARY);
 
-static void *
-vips_abs_orc_init_cb(void *a)
-{
-	VipsAbs *abs = (VipsAbs *) a;
-	VipsArithmeticClass *aclass = VIPS_ARITHMETIC_GET_CLASS(abs);
-
-	VipsVector *v;
-
-	vips_arithmetic_set_vector(aclass);
-
-	v = vips_arithmetic_get_program(aclass, VIPS_FORMAT_CHAR);
-	vips_vector_asm2(v, "absb", "d1", "s1");
-
-	v = vips_arithmetic_get_program(aclass, VIPS_FORMAT_SHORT);
-	vips_vector_asm2(v, "absw", "d1", "s1");
-
-	v = vips_arithmetic_get_program(aclass, VIPS_FORMAT_INT);
-	vips_vector_asm2(v, "absl", "d1", "s1");
-
-	vips_arithmetic_compile(aclass);
-
-	return NULL;
-}
-
 static int
 vips_abs_build(VipsObject *object)
 {
-	static GOnce once = G_ONCE_INIT;
-
 	VipsUnary *unary = (VipsUnary *) object;
 
-	VIPS_ONCE(&once, vips_abs_orc_init_cb, object);
-
 	if (unary->in &&
 		vips_band_format_isuint(unary->in->BandFmt))
 		return vips_unary_copy(unary);
@@ -199,50 +171,35 @@ static void
 vips_abs_buffer(VipsArithmetic *arithmetic,
 	VipsPel *out, VipsPel **in, int width)
 {
-	VipsArithmeticClass *class = VIPS_ARITHMETIC_GET_CLASS(arithmetic);
 	VipsImage *im = arithmetic->ready[0];
 	const int bands = vips_image_get_bands(im);
 	int sz = width * bands;
 
-	VipsVector *v;
-
-	if ((v = vips_arithmetic_get_vector(class,
-			 vips_image_get_format(im)))) {
-		VipsExecutor ex;
-
-		vips_executor_set_program(&ex, v, sz);
-		vips_executor_set_array(&ex, v->s[0], in[0]);
-		vips_executor_set_destination(&ex, out);
-
-		vips_executor_run(&ex);
-	}
-	else {
-		switch (vips_image_get_format(im)) {
-		case VIPS_FORMAT_CHAR:
-			ABS_INT(signed char);
-			break;
-		case VIPS_FORMAT_SHORT:
-			ABS_INT(signed short);
-			break;
-		case VIPS_FORMAT_INT:
-			ABS_INT(signed int);
-			break;
-		case VIPS_FORMAT_FLOAT:
-			ABS_FLOAT(float);
-			break;
-		case VIPS_FORMAT_DOUBLE:
-			ABS_FLOAT(double);
-			break;
-		case VIPS_FORMAT_COMPLEX:
-			ABS_COMPLEX(float);
-			break;
-		case VIPS_FORMAT_DPCOMPLEX:
-			ABS_COMPLEX(double);
-			break;
-
-		default:
-			g_assert_not_reached();
-		}
+	switch (vips_image_get_format(im)) {
+	case VIPS_FORMAT_CHAR:
+		ABS_INT(signed char);
+		break;
+	case VIPS_FORMAT_SHORT:
+		ABS_INT(signed short);
+		break;
+	case VIPS_FORMAT_INT:
+		ABS_INT(signed int);
+		break;
+	case VIPS_FORMAT_FLOAT:
+		ABS_FLOAT(float);
+		break;
+	case VIPS_FORMAT_DOUBLE:
+		ABS_FLOAT(double);
+		break;
+	case VIPS_FORMAT_COMPLEX:
+		ABS_COMPLEX(float);
+		break;
+	case VIPS_FORMAT_DPCOMPLEX:
+		ABS_COMPLEX(double);
+		break;
+
+	default:
+		g_assert_not_reached();
 	}
 }
 
 
@@ -268,9 +268,6 @@ vips_add_init(VipsAdd *add)
  * In other words, the output type is just large enough to hold the whole
  * range of possible values.
  *
- * Operations on integer images are performed using the processor's vector unit,
- * if possible. Disable this with --vips-novector or VIPS_NOVECTOR.
- *
  * See also: vips_subtract(), vips_linear().
  *
  * Returns: 0 on success, -1 on error
 
@@ -750,83 +750,6 @@ vips_arithmetic_set_format_table(VipsArithmeticClass *class,
 	class->format_table = format_table;
 }
 
-void
-vips_arithmetic_set_vector(VipsArithmeticClass *class)
-{
-	int i;
-
-	g_assert(class->format_table);
-
-	for (i = 0; i < VIPS_FORMAT_LAST; i++) {
-		int isize = vips_format_sizeof(i);
-		int osize = vips_format_sizeof((int) class->format_table[i]);
-
-		VipsVector *v;
-
-		v = vips_vector_new("arithmetic", osize);
-
-		vips_vector_source_name(v, "s1", isize);
-		vips_vector_source_name(v, "s2", isize);
-		vips_vector_temporary(v, "t1", osize);
-		vips_vector_temporary(v, "t2", osize);
-
-		class->vectors[i] = v;
-	}
-}
-
-/* Get the stub for this program ... use _get_vector() to get the compiled
- * code.
- */
-VipsVector *
-vips_arithmetic_get_program(VipsArithmeticClass *class, VipsBandFormat fmt)
-{
-	g_assert((int) fmt >= 0 && (int) fmt < VIPS_FORMAT_LAST);
-	g_assert(!class->vector_program[fmt]);
-
-	class->vector_program[fmt] = TRUE;
-
-	return class->vectors[fmt];
-}
-
-/* Get the compiled code for this type, if available.
- */
-VipsVector *
-vips_arithmetic_get_vector(VipsArithmeticClass *class, VipsBandFormat fmt)
-{
-	g_assert(fmt >= 0 && fmt < VIPS_FORMAT_LAST);
-
-	if (!vips_vector_isenabled() ||
-		!class->vector_program[fmt])
-		return NULL;
-
-	return class->vectors[fmt];
-}
-
-void
-vips_arithmetic_compile(VipsArithmeticClass *class)
-{
-	int i;
-
-	g_assert(class->format_table);
-
-	for (i = 0; i < VIPS_FORMAT_LAST; i++)
-		if (class->vector_program[i] &&
-			!vips_vector_compile(class->vectors[i]))
-			/* If compilation fails, turn off the vector for this
-			 * type.
-			 */
-			class->vector_program[i] = FALSE;
-
-#ifdef DEBUG
-	printf("vips_arithmetic_compile: ");
-	for (i = 0; i < VIPS_FORMAT_LAST; i++)
-		if (class->vector_program[i])
-			printf("%s ",
-				vips_enum_nick(VIPS_TYPE_BAND_FORMAT, i));
-	printf("\n");
-#endif /*DEBUG*/
-}
-
 /* Called from iofuncs to init all operations in this dir. Use a plugin system
  * instead?
  */
 
@@ -36,7 +36,6 @@ extern "C" {
 #endif /*__cplusplus*/
 
 #include <vips/vips.h>
-#include <vips/vector.h>
 
 #define VIPS_TYPE_ARITHMETIC (vips_arithmetic_get_type())
 #define VIPS_ARITHMETIC(obj) \
@@ -91,14 +90,6 @@ typedef struct _VipsArithmeticClass {
 	 */
 	const VipsBandFormat *format_table;
 
-	/* A vector program for each input type.
-	 */
-	VipsVector *vectors[VIPS_FORMAT_LAST];
-
-	/* ... and if we've set a program for this format.
-	 */
-	gboolean vector_program[VIPS_FORMAT_LAST];
-
 	/* The buffer processor.
 	 */
 	VipsArithmeticProcessFn process_line;
@@ -108,12 +99,6 @@ GType vips_arithmetic_get_type(void);
 
 void vips_arithmetic_set_format_table(VipsArithmeticClass *klass,
 	const VipsBandFormat *format_table);
-void vips_arithmetic_set_vector(VipsArithmeticClass *klass);
-VipsVector *vips_arithmetic_get_vector(VipsArithmeticClass *klass,
-	VipsBandFormat fmt);
-void vips_arithmetic_compile(VipsArithmeticClass *klass);
-VipsVector *vips_arithmetic_get_program(VipsArithmeticClass *klass,
-	VipsBandFormat fmt);
 
 #ifdef __cplusplus
 }
 
@@ -35,8 +35,6 @@
 extern "C" {
 #endif /*__cplusplus*/
 
-#include <vips/vector.h>
-
 #define VIPS_TYPE_STATISTIC (vips_statistic_get_type())
 #define VIPS_STATISTIC(obj) \
 	(G_TYPE_CHECK_INSTANCE_CAST((obj), \
 
@@ -35,8 +35,6 @@
 extern "C" {
 #endif /*__cplusplus*/
 
-#include <vips/vector.h>
-
 #define VIPS_TYPE_CONVERSION (vips_conversion_get_type())
 #define VIPS_CONVERSION(obj) \
 	(G_TYPE_CHECK_INSTANCE_CAST((obj), \
 
@@ -197,7 +197,7 @@ vips_conv_init(VipsConv *conv)
  *
  * For #VIPS_FORMAT_UCHAR images and #VIPS_PRECISION_INTEGER @precision,
  * vips_conv() uses a fast vector path based on
- * fixed-point arithmetic. This can produce slightly different results.
+ * half-float arithmetic. This can produce slightly different results.
  * Disable the vector path with `--vips-novector` or `VIPS_NOVECTOR` or
  * vips_vector_set_enabled().
  *
 
@@ -95,7 +95,6 @@ sys	0m0.100s
 #include <math.h>
 
 #include <vips/vips.h>
-#include <vips/vector.h>
 #include <vips/debug.h>
 #include <vips/internal.h>
 
 
@@ -83,7 +83,6 @@
 #include <math.h>
 
 #include <vips/vips.h>
-#include <vips/vector.h>
 #include <vips/debug.h>
 #include <vips/internal.h>
 
 
@@ -80,6 +80,10 @@
 
  */
 
+/*
+#define DEBUG
+ */
+
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif /*HAVE_CONFIG_H*/
@@ -316,8 +320,8 @@ vips_convf_build(VipsObject *object)
 			convf->nnz += 1;
 		}
 
-	/* Was the whole mask zero? We must have at least 1 element in there:
-	 * set it to zero.
+	/* Was the whole mask zero? We must have at least 1 element
+	 * in there: set it to zero.
 	 */
 	if (convf->nnz == 0) {
 		convf->coeff[0] = 0;
Original file line number	Diff line number	Diff line change
`@@ -268,9 +268,6 @@ vips_add_init(VipsAdd *add)`
`268`	`268`	`* In other words, the output type is just large enough to hold the whole`
`269`	`269`	`* range of possible values.`
`270`	`270`	`*`
`271`		`- * Operations on integer images are performed using the processor's vector unit,`
`272`		`- * if possible. Disable this with --vips-novector or VIPS_NOVECTOR.`
`273`		`- *`
`274`	`271`	`* See also: vips_subtract(), vips_linear().`
`275`	`272`	`*`
`276`	`273`	`* Returns: 0 on success, -1 on error`
Original file line number	Diff line number	Diff line change
`@@ -197,7 +197,7 @@ vips_conv_init(VipsConv *conv)`
`197`	`197`	`*`
`198`	`198`	`* For #VIPS_FORMAT_UCHAR images and #VIPS_PRECISION_INTEGER @precision,`
`199`	`199`	`* vips_conv() uses a fast vector path based on`
`200`		`- * fixed-point arithmetic. This can produce slightly different results.`
	`200`	`+ * half-float arithmetic. This can produce slightly different results.`
`201`	`201`	* Disable the vector path with `--vips-novector` or `VIPS_NOVECTOR` or
`202`	`202`	`* vips_vector_set_enabled().`
`203`	`203`	`*`