From 28578f966459fcf84294419d259cf7d25de49b2c Mon Sep 17 00:00:00 2001
From: Yasuhiro Matsumoto <mattn.jp@gmail.com>
Date: Tue, 27 Jul 2021 06:18:35 +0000
Subject: [PATCH 001/200] all: fix typos

Change-Id: I0c64540bc7848773955a517c20c7dc9d45cbe618
GitHub-Last-Rev: a97430e47c17183bebaf28f0f5bd3d10f91bbf5b
GitHub-Pull-Request: golang/arch#5
Reviewed-on: https://go-review.googlesource.com/c/arch/+/337169
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
---
 arm64/arm64asm/plan9x.go | 2 +-
 x86/x86csv/reader.go     | 2 +-
 x86/x86spec/spec.go      | 2 +-
 x86/xeddata/operand.go   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arm64/arm64asm/plan9x.go b/arm64/arm64asm/plan9x.go
index 3aaf0b2a..f4eef8c0 100644
--- a/arm64/arm64asm/plan9x.go
+++ b/arm64/arm64asm/plan9x.go
@@ -500,7 +500,7 @@ SHA256SU0
 SHA256SU1
 `)
 
-// floating point instrcutions without "F" prefix.
+// floating point instructions without "F" prefix.
 var fOpsWithoutFPrefix = map[Op]bool{
 	LDP: true,
 	STP: true,
diff --git a/x86/x86csv/reader.go b/x86/x86csv/reader.go
index ed59e31a..5c48ae9b 100644
--- a/x86/x86csv/reader.go
+++ b/x86/x86csv/reader.go
@@ -24,7 +24,7 @@ func NewReader(r io.Reader) *Reader {
 
 // ReadAll reads all remaining rows from r.
 //
-// If error is occured, still returns all rows
+// If error has occurred, still returns all rows
 // that have been read during method execution.
 //
 // A successful call returns err == nil, not err == io.EOF.
diff --git a/x86/x86spec/spec.go b/x86/x86spec/spec.go
index b49e006f..25267941 100644
--- a/x86/x86spec/spec.go
+++ b/x86/x86spec/spec.go
@@ -36,7 +36,7 @@
 //
 // 4. The instruction encoding. For example, "C1 /4 ib".
 //
-// 5. The validity of the instruction in 32-bit (aka compatiblity, legacy) mode.
+// 5. The validity of the instruction in 32-bit (aka compatibility, legacy) mode.
 //
 // 6. The validity of the instruction in 64-bit mode.
 //
diff --git a/x86/xeddata/operand.go b/x86/xeddata/operand.go
index e934ed73..1632828d 100644
--- a/x86/xeddata/operand.go
+++ b/x86/xeddata/operand.go
@@ -66,7 +66,7 @@ type Operand struct {
 	// Possible values:
 	//   EVEX.b context {
 	//     TXT=ZEROSTR  - zeroing
-	//     TXT=SAESTR   - surpress all exceptions
+	//     TXT=SAESTR   - suppress all exceptions
 	//     TXT=ROUNDC   - rounding
 	//     TXT=BCASTSTR - broadcasting
 	//   }

From ebb09ed340f18f7e2a2200f1adf792992c448346 Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Mon, 17 May 2021 14:15:56 -0500
Subject: [PATCH 002/200] ppc64: add ISA level information to pp64.csv

Re-purpose the metadata field to hold the ISA level. This
field is currently unused.

Note, lq/stq/scv/rfscv appear out-of-order in appendix
F of ISA 3.1.

Change-Id: I96211bf75305a29c0805d95489eee132444d919c
Reviewed-on: https://go-review.googlesource.com/c/arch/+/342969
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.org>
Trust: Carlos Eduardo Seo <carlos.seo@linaro.org>
---
 ppc64/pp64.csv | 2323 ++++++++++++++++++++++++------------------------
 1 file changed, 1162 insertions(+), 1161 deletions(-)

diff --git a/ppc64/pp64.csv b/ppc64/pp64.csv
index 6f7b7fcf..3150cade 100644
--- a/ppc64/pp64.csv
+++ b/ppc64/pp64.csv
@@ -5,1171 +5,1172 @@
 #
 # Each line in the CSV section contains 4 fields:
 #
-#	instruction mnemonic encoding tags
+#	instruction mnemonic encoding isa-level
 #
 # The instruction is the headline from the manual.
 # The mnemonic is the instruction mnemonics, separated by | characters.
 # The encoding is the encoding, a sequence of name@startbit| describing each bit field in turn or
 #    a list of sequences of the form (,sequence)+. A leading comma is used to signify an
 #    instruction encoding requiring multiple instruction words.
-# The tags are additional metadata, currently always empty.
+# The fourth field represents the ISA version where the instruction was introduced as
+# stated in Appendix F. of ISA 3.1
 #
-"Byte-Reverse Doubleword X-form","brd RA,RS","31@0|RS@6|RA@11|///@16|187@21|/@31|",""
-"Byte-Reverse Halfword X-form","brh RA,RS","31@0|RS@6|RA@11|///@16|219@21|/@31|",""
-"Byte-Reverse Word X-form","brw RA,RS","31@0|RS@6|RA@11|///@16|155@21|/@31|",""
-"Centrifuge Doubleword X-form","cfuged RA,RS,RB","31@0|RS@6|RA@11|RB@16|220@21|/@31|",""
-"Count Leading Zeros Doubleword under bit Mask X-form","cntlzdm RA,RS,RB","31@0|RS@6|RA@11|RB@16|59@21|/@31|",""
-"Count Trailing Zeros Doubleword under bit Mask X-form","cnttzdm RA,RS,RB","31@0|RS@6|RA@11|RB@16|571@21|/@31|",""
-"DFP Convert From Fixed Quadword Quad X-form","dcffixqq FRTp,VRB","63@0|FRTp@6|0@11|VRB@16|994@21|/@31|",""
-"DFP Convert To Fixed Quadword Quad X-form","dctfixqq VRT,FRBp","63@0|VRT@6|1@11|FRBp@16|994@21|/@31|",""
-"Load VSX Vector Special Value Quadword X-form","lxvkq XT,UIM","60@0|T@6|31@11|UIM@16|360@21|TX@31|",""
-"Load VSX Vector Paired DQ-form","lxvp XTp,DQ(RA)","6@0|Tp@6|TX@10|RA@11|DQ@16|0@28|",""
-"Load VSX Vector Paired Indexed X-form","lxvpx XTp,RA,RB","31@0|Tp@6|TX@10|RA@11|RB@16|333@21|/@31|",""
-"Load VSX Vector Rightmost Byte Indexed X-form","lxvrbx XT,RA,RB","31@0|T@6|RA@11|RB@16|13@21|TX@31|",""
-"Load VSX Vector Rightmost Doubleword Indexed X-form","lxvrdx XT,RA,RB","31@0|T@6|RA@11|RB@16|109@21|TX@31|",""
-"Load VSX Vector Rightmost Halfword Indexed X-form","lxvrhx XT,RA,RB","31@0|T@6|RA@11|RB@16|45@21|TX@31|",""
-"Load VSX Vector Rightmost Word Indexed X-form","lxvrwx XT,RA,RB","31@0|T@6|RA@11|RB@16|77@21|TX@31|",""
-"Move to VSR Byte Mask VX-form","mtvsrbm VRT,RB","4@0|VRT@6|16@11|RB@16|1602@21|",""
-"Move To VSR Byte Mask Immediate DX-form","mtvsrbmi VRT,bm","4@0|VRT@6|b1@11|b0@16|10@26|b2@31|",""
-"Move to VSR Doubleword Mask VX-form","mtvsrdm VRT,RB","4@0|VRT@6|19@11|RB@16|1602@21|",""
-"Move to VSR Halfword Mask VX-form","mtvsrhm VRT,RB","4@0|VRT@6|17@11|RB@16|1602@21|",""
-"Move to VSR Quadword Mask VX-form","mtvsrqm VRT,RB","4@0|VRT@6|20@11|RB@16|1602@21|",""
-"Move to VSR Word Mask VX-form","mtvsrwm VRT,RB","4@0|VRT@6|18@11|RB@16|1602@21|",""
-"Prefixed Add Immediate MLS:D-form","paddi RT,RA,SI,R",",1@0|2@6|0@8|//@9|R@11|//@12|si0@14|,14@0|RT@6|RA@11|si1@16|",""
-"Parallel Bits Deposit Doubleword X-form","pdepd RA,RS,RB","31@0|RS@6|RA@11|RB@16|156@21|/@31|",""
-"Parallel Bits Extract Doubleword X-form","pextd RA,RS,RB","31@0|RS@6|RA@11|RB@16|188@21|/@31|",""
-"Prefixed Load Byte and Zero MLS:D-form","plbz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,34@0|RT@6|RA@11|d1@16|",""
-"Prefixed Load Doubleword 8LS:D-form","pld RT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,57@0|RT@6|RA@11|d1@16|",""
-"Prefixed Load Floating-Point Double MLS:D-form","plfd FRT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,50@0|FRT@6|RA@11|d1@16|",""
-"Prefixed Load Floating-Point Single MLS:D-form","plfs FRT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,48@0|FRT@6|RA@11|d1@16|",""
-"Prefixed Load Halfword Algebraic MLS:D-form","plha RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,42@0|RT@6|RA@11|d1@16|",""
-"Prefixed Load Halfword and Zero MLS:D-form","plhz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,40@0|RT@6|RA@11|d1@16|",""
-"Prefixed Load Quadword 8LS:D-form","plq RTp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,56@0|RTp@6|RA@11|d1@16|",""
-"Prefixed Load Word Algebraic 8LS:D-form","plwa RT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,41@0|RT@6|RA@11|d1@16|",""
-"Prefixed Load Word and Zero MLS:D-form","plwz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,32@0|RT@6|RA@11|d1@16|",""
-"Prefixed Load VSX Scalar Doubleword 8LS:D-form","plxsd VRT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,42@0|VRT@6|RA@11|d1@16|",""
-"Prefixed Load VSX Scalar Single-Precision 8LS:D-form","plxssp VRT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,43@0|VRT@6|RA@11|d1@16|",""
-"Prefixed Load VSX Vector 8LS:D-form","plxv XT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,25@0|TX@5|T@6|RA@11|d1@16|",""
-"Prefixed Load VSX Vector Paired 8LS:D-form","plxvp XTp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,58@0|Tp@6|TX@10|RA@11|d1@16|",""
-"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) MMIRR:XX3-form","pmxvbf16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|51@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvbf16ger2nn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|242@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvbf16ger2np AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|114@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvbf16ger2pn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|178@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvbf16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|50@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) MMIRR:XX3-form","pmxvf16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|19@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf16ger2nn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|210@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf16ger2np AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|82@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf16ger2pn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|146@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|18@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) MMIRR:XX3-form","pmxvf32ger AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|27@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf32gernn AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|218@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf32gernp AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|90@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf32gerpn AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|154@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf32gerpp AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|26@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) MMIRR:XX3-form","pmxvf64ger AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|59@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf64gernn AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|250@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf64gernp AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|122@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf64gerpn AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|186@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf64gerpp AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|58@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) MMIRR:XX3-form","pmxvi16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|75@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|107@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation MMIRR:XX3-form","pmxvi16ger2s AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi16ger2spp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|42@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) MMIRR:XX3-form","pmxvi4ger8 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi4ger8pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|34@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) MMIRR:XX3-form","pmxvi8ger4 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|3@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi8ger4pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|2@21|AX@29|BX@30|/@31|",""
-"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi8ger4spp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|99@21|AX@29|BX@30|/@31|",""
-"Prefixed Nop MRR:*-form","pnop",",1@0|3@6|0@8|///@12|0@14|//@31|,///@0|",""
-"Prefixed Store Byte MLS:D-form","pstb RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,38@0|RS@6|RA@11|d1@16|",""
-"Prefixed Store Doubleword 8LS:D-form","pstd RS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,61@0|RS@6|RA@11|d1@16|",""
-"Prefixed Store Floating-Point Double MLS:D-form","pstfd FRS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,54@0|FRS@6|RA@11|d1@16|",""
-"Prefixed Store Floating-Point Single MLS:D-form","pstfs FRS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,52@0|FRS@6|RA@11|d1@16|",""
-"Prefixed Store Halfword MLS:D-form","psth RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,44@0|RS@6|RA@11|d1@16|",""
-"Prefixed Store Quadword 8LS:D-form","pstq RSp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,60@0|RSp@6|RA@11|d1@16|",""
-"Prefixed Store Word MLS:D-form","pstw RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,36@0|RS@6|RA@11|d1@16|",""
-"Prefixed Store VSX Scalar Doubleword 8LS:D-form","pstxsd VRS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,46@0|VRS@6|RA@11|d1@16|",""
-"Prefixed Store VSX Scalar Single-Precision 8LS:D-form","pstxssp VRS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,47@0|VRS@6|RA@11|d1@16|",""
-"Prefixed Store VSX Vector 8LS:D-form","pstxv XS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,27@0|SX@5|S@6|RA@11|d1@16|",""
-"Prefixed Store VSX Vector Paired 8LS:D-form","pstxvp XSp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,62@0|Sp@6|SX@10|RA@11|d1@16|",""
-"Set Boolean Condition X-form","setbc RT,BI","31@0|RT@6|BI@11|///@16|384@21|/@31|",""
-"Set Boolean Condition Reverse X-form","setbcr RT,BI","31@0|RT@6|BI@11|///@16|416@21|/@31|",""
-"Set Negative Boolean Condition X-form","setnbc RT,BI","31@0|RT@6|BI@11|///@16|448@21|/@31|",""
-"Set Negative Boolean Condition Reverse X-form","setnbcr RT,BI","31@0|RT@6|BI@11|///@16|480@21|/@31|",""
-"Store VSX Vector Paired DQ-form","stxvp XSp,DQ(RA)","6@0|Sp@6|SX@10|RA@11|DQ@16|1@28|",""
-"Store VSX Vector Paired Indexed X-form","stxvpx XSp,RA,RB","31@0|Sp@6|SX@10|RA@11|RB@16|461@21|/@31|",""
-"Store VSX Vector Rightmost Byte Indexed X-form","stxvrbx XS,RA,RB","31@0|S@6|RA@11|RB@16|141@21|SX@31|",""
-"Store VSX Vector Rightmost Doubleword Indexed X-form","stxvrdx XS,RA,RB","31@0|S@6|RA@11|RB@16|237@21|SX@31|",""
-"Store VSX Vector Rightmost Halfword Indexed X-form","stxvrhx XS,RA,RB","31@0|S@6|RA@11|RB@16|173@21|SX@31|",""
-"Store VSX Vector Rightmost Word Indexed X-form","stxvrwx XS,RA,RB","31@0|S@6|RA@11|RB@16|205@21|SX@31|",""
-"Vector Centrifuge Doubleword VX-form","vcfuged VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1357@21|",""
-"Vector Clear Leftmost Bytes VX-form","vclrlb VRT,VRA,RB","4@0|VRT@6|VRA@11|RB@16|397@21|",""
-"Vector Clear Rightmost Bytes VX-form","vclrrb VRT,VRA,RB","4@0|VRT@6|VRA@11|RB@16|461@21|",""
-"Vector Count Leading Zeros Doubleword under bit Mask VX-form","vclzdm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1924@21|",""
-"Vector Compare Equal Quadword VC-form","vcmpequq VRT,VRA,VRB (Rc=0)|vcmpequq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|455@22|",""
-"Vector Compare Greater Than Signed Quadword VC-form","vcmpgtsq VRT,VRA,VRB (Rc=0)|vcmpgtsq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|903@22|",""
-"Vector Compare Greater Than Unsigned Quadword VC-form","vcmpgtuq VRT,VRA,VRB (Rc=0)|vcmpgtuq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|647@22|",""
-"Vector Compare Signed Quadword VX-form","vcmpsq BF,VRA,VRB","4@0|BF@6|//@9|VRA@11|VRB@16|321@21|",""
-"Vector Compare Unsigned Quadword VX-form","vcmpuq BF,VRA,VRB","4@0|BF@6|//@9|VRA@11|VRB@16|257@21|",""
-"Vector Count Mask Bits Byte VX-form","vcntmbb RT,VRB,MP","4@0|RT@6|12@11|MP@15|VRB@16|1602@21|",""
-"Vector Count Mask Bits Doubleword VX-form","vcntmbd RT,VRB,MP","4@0|RT@6|15@11|MP@15|VRB@16|1602@21|",""
-"Vector Count Mask Bits Halfword VX-form","vcntmbh RT,VRB,MP","4@0|RT@6|13@11|MP@15|VRB@16|1602@21|",""
-"Vector Count Mask Bits Word VX-form","vcntmbw RT,VRB,MP","4@0|RT@6|14@11|MP@15|VRB@16|1602@21|",""
-"Vector Count Trailing Zeros Doubleword under bit Mask VX-form","vctzdm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1988@21|",""
-"Vector Divide Extended Signed Doubleword VX-form","vdivesd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|971@21|",""
-"Vector Divide Extended Signed Quadword VX-form","vdivesq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|779@21|",""
-"Vector Divide Extended Signed Word VX-form","vdivesw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|907@21|",""
-"Vector Divide Extended Unsigned Doubleword VX-form","vdiveud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|715@21|",""
-"Vector Divide Extended Unsigned Quadword VX-form","vdiveuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|523@21|",""
-"Vector Divide Extended Unsigned Word VX-form","vdiveuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|651@21|",""
-"Vector Divide Signed Doubleword VX-form","vdivsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|459@21|",""
-"Vector Divide Signed Quadword VX-form","vdivsq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|267@21|",""
-"Vector Divide Signed Word VX-form","vdivsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|395@21|",""
-"Vector Divide Unsigned Doubleword VX-form","vdivud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|203@21|",""
-"Vector Divide Unsigned Quadword VX-form","vdivuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|11@21|",""
-"Vector Divide Unsigned Word VX-form","vdivuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|139@21|",""
-"Vector Expand Byte Mask VX-form","vexpandbm VRT,VRB","4@0|VRT@6|0@11|VRB@16|1602@21|",""
-"Vector Expand Doubleword Mask VX-form","vexpanddm VRT,VRB","4@0|VRT@6|3@11|VRB@16|1602@21|",""
-"Vector Expand Halfword Mask VX-form","vexpandhm VRT,VRB","4@0|VRT@6|1@11|VRB@16|1602@21|",""
-"Vector Expand Quadword Mask VX-form","vexpandqm VRT,VRB","4@0|VRT@6|4@11|VRB@16|1602@21|",""
-"Vector Expand Word Mask VX-form","vexpandwm VRT,VRB","4@0|VRT@6|2@11|VRB@16|1602@21|",""
-"Vector Extract Double Doubleword to VSR using GPR-specified Left-Index VA-form","vextddvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|30@26|",""
-"Vector Extract Double Doubleword to VSR using GPR-specified Right-Index VA-form","vextddvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|31@26|",""
-"Vector Extract Double Unsigned Byte to VSR using GPR-specified Left-Index VA-form","vextdubvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|24@26|",""
-"Vector Extract Double Unsigned Byte to VSR using GPR-specified Right-Index VA-form","vextdubvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|25@26|",""
-"Vector Extract Double Unsigned Halfword to VSR using GPR-specified Left-Index VA-form","vextduhvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|26@26|",""
-"Vector Extract Double Unsigned Halfword to VSR using GPR-specified Right-Index VA-form","vextduhvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|27@26|",""
-"Vector Extract Double Unsigned Word to VSR using GPR-specified Left-Index VA-form","vextduwvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|28@26|",""
-"Vector Extract Double Unsigned Word to VSR using GPR-specified Right-Index VA-form","vextduwvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|29@26|",""
-"Vector Extract Byte Mask VX-form","vextractbm RT,VRB","4@0|RT@6|8@11|VRB@16|1602@21|",""
-"Vector Extract Doubleword Mask VX-form","vextractdm RT,VRB","4@0|RT@6|11@11|VRB@16|1602@21|",""
-"Vector Extract Halfword Mask VX-form","vextracthm RT,VRB","4@0|RT@6|9@11|VRB@16|1602@21|",""
-"Vector Extract Quadword Mask VX-form","vextractqm RT,VRB","4@0|RT@6|12@11|VRB@16|1602@21|",""
-"Vector Extract Word Mask VX-form","vextractwm RT,VRB","4@0|RT@6|10@11|VRB@16|1602@21|",""
-"Vector Extend Sign Doubleword to Quadword VX-form","vextsd2q VRT,VRB","4@0|VRT@6|27@11|VRB@16|1538@21|",""
-"Vector Gather every Nth Bit VX-form","vgnb RT,VRB,N","4@0|RT@6|//@11|N@13|VRB@16|1228@21|",""
-"Vector Insert Byte from GPR using GPR-specified Left-Index VX-form","vinsblx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|527@21|",""
-"Vector Insert Byte from GPR using GPR-specified Right-Index VX-form","vinsbrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|783@21|",""
-"Vector Insert Byte from VSR using GPR-specified Left-Index VX-form","vinsbvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|15@21|",""
-"Vector Insert Byte from VSR using GPR-specified Right-Index VX-form","vinsbvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|271@21|",""
-"Vector Insert Doubleword from GPR using immediate-specified index VX-form","vinsd VRT,RB,UIM","4@0|VRT@6|/@11|UIM@12|RB@16|463@21|",""
-"Vector Insert Doubleword from GPR using GPR-specified Left-Index VX-form","vinsdlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|719@21|",""
-"Vector Insert Doubleword from GPR using GPR-specified Right-Index VX-form","vinsdrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|975@21|",""
-"Vector Insert Halfword from GPR using GPR-specified Left-Index VX-form","vinshlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|591@21|",""
-"Vector Insert Halfword from GPR using GPR-specified Right-Index VX-form","vinshrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|847@21|",""
-"Vector Insert Halfword from VSR using GPR-specified Left-Index VX-form","vinshvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|79@21|",""
-"Vector Insert Halfword from VSR using GPR-specified Right-Index VX-form","vinshvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|335@21|",""
-"Vector Insert Word from GPR using immediate-specified index VX-form","vinsw VRT,RB,UIM","4@0|VRT@6|/@11|UIM@12|RB@16|207@21|",""
-"Vector Insert Word from GPR using GPR-specified Left-Index VX-form","vinswlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|655@21|",""
-"Vector Insert Word from GPR using GPR-specified Right-Index VX-form","vinswrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|911@21|",""
-"Vector Insert Word from VSR using GPR-specified Left-Index VX-form","vinswvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|143@21|",""
-"Vector Insert Word from VSR using GPR-specified Left-Index VX-form","vinswvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|399@21|",""
-"Vector Modulo Signed Doubleword VX-form","vmodsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1995@21|",""
-"Vector Modulo Signed Quadword VX-form","vmodsq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1803@21|",""
-"Vector Modulo Signed Word VX-form","vmodsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1931@21|",""
-"Vector Modulo Unsigned Doubleword VX-form","vmodud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1739@21|",""
-"Vector Modulo Unsigned Quadword VX-form","vmoduq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1547@21|",""
-"Vector Modulo Unsigned Word VX-form","vmoduw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1675@21|",""
-"Vector Multiply-Sum & write Carry-out Unsigned Doubleword VA-form","vmsumcud VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|23@26|",""
-"Vector Multiply Even Signed Doubleword VX-form","vmulesd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|968@21|",""
-"Vector Multiply Even Unsigned Doubleword VX-form","vmuleud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|712@21|",""
-"Vector Multiply High Signed Doubleword VX-form","vmulhsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|969@21|",""
-"Vector Multiply High Signed Word VX-form","vmulhsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|905@21|",""
-"Vector Multiply High Unsigned Doubleword VX-form","vmulhud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|713@21|",""
-"Vector Multiply High Unsigned Word VX-form","vmulhuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|649@21|",""
-"Vector Multiply Low Doubleword VX-form","vmulld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|457@21|",""
-"Vector Multiply Odd Signed Doubleword VX-form","vmulosd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|456@21|",""
-"Vector Multiply Odd Unsigned Doubleword VX-form","vmuloud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|200@21|",""
-"Vector Parallel Bits Deposit Doubleword VX-form","vpdepd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1485@21|",""
-"Vector Parallel Bits Extract Doubleword VX-form","vpextd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1421@21|",""
-"Vector Rotate Left Quadword VX-form","vrlq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|5@21|",""
-"Vector Rotate Left Quadword then Mask Insert VX-form","vrlqmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|69@21|",""
-"Vector Rotate Left Quadword then AND with Mask VX-form","vrlqnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|325@21|",""
-"Vector Shift Left Double by Bit Immediate VN-form","vsldbi VRT,VRA,VRB,SH","4@0|VRT@6|VRA@11|VRB@16|0@21|SH@23|22@26|",""
-"Vector Shift Left Quadword VX-form","vslq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|261@21|",""
-"Vector Shift Right Algebraic Quadword VX-form","vsraq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|773@21|",""
-"Vector Shift Right Double by Bit Immediate VN-form","vsrdbi VRT,VRA,VRB,SH","4@0|VRT@6|VRA@11|VRB@16|1@21|SH@23|22@26|",""
-"Vector Shift Right Quadword VX-form","vsrq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|517@21|",""
-"Vector String Isolate Byte Left-justified VX-form","vstribl VRT,VRB (Rc=0)|vstribl. VRT,VRB (Rc=1)","4@0|VRT@6|0@11|VRB@16|Rc@21|13@22|",""
-"Vector String Isolate Byte Right-justified VX-form","vstribr VRT,VRB (Rc=0)|vstribr. VRT,VRB (Rc=1)","4@0|VRT@6|1@11|VRB@16|Rc@21|13@22|",""
-"Vector String Isolate Halfword Left-justified VX-form","vstrihl VRT,VRB (Rc=0)|vstrihl. VRT,VRB (Rc=1)","4@0|VRT@6|2@11|VRB@16|Rc@21|13@22|",""
-"Vector String Isolate Halfword Right-justified VX-form","vstrihr VRT,VRB (Rc=0)|vstrihr. VRT,VRB (Rc=1)","4@0|VRT@6|3@11|VRB@16|Rc@21|13@22|",""
-"VSX Scalar Compare Equal Quad-Precision X-form","xscmpeqqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|68@21|/@31|",""
-"VSX Scalar Compare Greater Than or Equal Quad-Precision X-form","xscmpgeqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|196@21|/@31|",""
-"VSX Scalar Compare Greater Than Quad-Precision X-form","xscmpgtqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|228@21|/@31|",""
-"VSX Scalar Convert with round to zero Quad-Precision to Signed Quadword X-form","xscvqpsqz VRT,VRB","63@0|VRT@6|8@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Quadword X-form","xscvqpuqz VRT,VRB","63@0|VRT@6|0@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Convert with round Signed Quadword to Quad-Precision X-form","xscvsqqp VRT,VRB","63@0|VRT@6|11@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Convert with round Unsigned Quadword to Quad-Precision X-form","xscvuqqp VRT,VRB","63@0|VRT@6|3@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Maximum Type-C Quad-Precision X-form","xsmaxcqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|676@21|/@31|",""
-"VSX Scalar Minimum Type-C Quad-Precision X-form","xsmincqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|740@21|/@31|",""
-"VSX Vector bfloat16 GER (Rank-2 Update) XX3-form","xvbf16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|51@21|AX@29|BX@30|/@31|",""
-"VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative accumulate XX3-form","xvbf16ger2nn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|242@21|AX@29|BX@30|/@31|",""
-"VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive accumulate XX3-form","xvbf16ger2np AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|114@21|AX@29|BX@30|/@31|",""
-"VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative accumulate XX3-form","xvbf16ger2pn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|178@21|AX@29|BX@30|/@31|",""
-"VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate XX3-form","xvbf16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|50@21|AX@29|BX@30|/@31|",""
-"VSX Vector Convert bfloat16 to Single-Precision format XX2-form","xvcvbf16spn XT,XB","60@0|T@6|16@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Convert with round Single-Precision to bfloat16 format XX2-form","xvcvspbf16 XT,XB","60@0|T@6|17@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector 16-bit Floating-Point GER (rank-2 update) XX3-form","xvf16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|19@21|AX@29|BX@30|/@31|",""
-"VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate XX3-form","xvf16ger2nn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|210@21|AX@29|BX@30|/@31|",""
-"VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Positive accumulate XX3-form","xvf16ger2np AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|82@21|AX@29|BX@30|/@31|",""
-"VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Negative accumulate XX3-form","xvf16ger2pn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|146@21|AX@29|BX@30|/@31|",""
-"VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Positive accumulate XX3-form","xvf16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|18@21|AX@29|BX@30|/@31|",""
-"VSX Vector 32-bit Floating-Point GER (rank-1 update) XX3-form","xvf32ger AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|27@21|AX@29|BX@30|/@31|",""
-"VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate XX3-form","xvf32gernn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|218@21|AX@29|BX@30|/@31|",""
-"VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate XX3-form","xvf32gernp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|90@21|AX@29|BX@30|/@31|",""
-"VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate XX3-form","xvf32gerpn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|154@21|AX@29|BX@30|/@31|",""
-"VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate XX3-form","xvf32gerpp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|26@21|AX@29|BX@30|/@31|",""
-"VSX Vector 64-bit Floating-Point GER (rank-1 update) XX3-form","xvf64ger AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|59@21|AX@29|BX@30|/@31|",""
-"VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate XX3-form","xvf64gernn AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|250@21|AX@29|BX@30|/@31|",""
-"VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate XX3-form","xvf64gernp AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|122@21|AX@29|BX@30|/@31|",""
-"VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate XX3-form","xvf64gerpn AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|186@21|AX@29|BX@30|/@31|",""
-"VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate XX3-form","xvf64gerpp AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|58@21|AX@29|BX@30|/@31|",""
-"VSX Vector 16-bit Signed Integer GER (rank-2 update) XX3-form","xvi16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|75@21|AX@29|BX@30|/@31|",""
-"VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate XX3-form","xvi16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|107@21|AX@29|BX@30|/@31|",""
-"VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation XX3-form","xvi16ger2s AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|",""
-"VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate XX3-form","xvi16ger2spp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|42@21|AX@29|BX@30|/@31|",""
-"VSX Vector 4-bit Signed Integer GER (rank-8 update) XX3-form","xvi4ger8 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|",""
-"VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate XX3-form","xvi4ger8pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|34@21|AX@29|BX@30|/@31|",""
-"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) XX3-form","xvi8ger4 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|3@21|AX@29|BX@30|/@31|",""
-"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate XX3-form","xvi8ger4pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|2@21|AX@29|BX@30|/@31|",""
-"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate XX3-form","xvi8ger4spp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|99@21|AX@29|BX@30|/@31|",""
-"VSX Vector Test Least-Significant Bit by Byte XX2-form","xvtlsbb BF,XB","60@0|BF@6|//@9|2@11|B@16|475@21|BX@30|/@31|",""
-"VSX Vector Blend Variable Byte 8RR:XX4-form","xxblendvb XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|0@26|CX@28|AX@29|BX@30|TX@31|",""
-"VSX Vector Blend Variable Doubleword 8RR:XX4-form","xxblendvd XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|3@26|CX@28|AX@29|BX@30|TX@31|",""
-"VSX Vector Blend Variable Halfword 8RR:XX4-form","xxblendvh XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|1@26|CX@28|AX@29|BX@30|TX@31|",""
-"VSX Vector Blend Variable Word 8RR:XX4-form","xxblendvw XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|2@26|CX@28|AX@29|BX@30|TX@31|",""
-"VSX Vector Evaluate 8RR-XX4-form","xxeval XT,XA,XB,XC,IMM",",1@0|1@6|0@8|//@12|///@14|IMM@24|,34@0|T@6|A@11|B@16|C@21|1@26|CX@28|AX@29|BX@30|TX@31|",""
-"VSX Vector Generate PCV from Byte Mask X-form","xxgenpcvbm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|916@21|TX@31|",""
-"VSX Vector Generate PCV from Doubleword Mask X-form","xxgenpcvdm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|949@21|TX@31|",""
-"VSX Vector Generate PCV from Halfword Mask X-form","xxgenpcvhm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|917@21|TX@31|",""
-"VSX Vector Generate PCV from Word Mask X-form","xxgenpcvwm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|948@21|TX@31|",""
-"VSX Move From Accumulator X-form","xxmfacc AS","31@0|AS@6|//@9|0@11|///@16|177@21|/@31|",""
-"VSX Move To Accumulator X-form","xxmtacc AT","31@0|AT@6|//@9|1@11|///@16|177@21|/@31|",""
-"VSX Vector Permute Extended 8RR:XX4-form","xxpermx XT,XA,XB,XC,UIM",",1@0|1@6|0@8|//@12|///@14|UIM@29|,34@0|T@6|A@11|B@16|C@21|0@26|CX@28|AX@29|BX@30|TX@31|",""
-"VSX Set Accumulator to Zero X-form","xxsetaccz AT","31@0|AT@6|//@9|3@11|///@16|177@21|/@31|",""
-"VSX Vector Splat Immediate32 Doubleword Indexed 8RR:D-form","xxsplti32dx XT,IX,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|0@11|IX@14|TX@15|imm1@16|",""
-"VSX Vector Splat Immediate Double-Precision 8RR:D-form","xxspltidp XT,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|2@11|TX@15|imm1@16|",""
-"VSX Vector Splat Immediate Word 8RR:D-form","xxspltiw XT,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|3@11|TX@15|imm1@16|",""
-"Ultravisor Message Clear X-form","msgclru RB","31@0|///@6|///@11|RB@16|110@21|/@31|",""
-"Ultravisor Message SendX-form","msgsndu RB","31@0|///@6|///@11|RB@16|78@21|/@31|",""
-"Ultravisor Return From Interrupt Doubleword XL-form","urfid","19@0|///@6|///@11|///@16|306@21|/@31|",""
-"Add Extended using alternate carry bit Z23-form","addex RT,RA,RB,CY","31@0|RT@6|RA@11|RB@16|CY@21|170@23|/@31|",""
-"Move From FPSCR Control & Set DRN X-form","mffscdrn FRT,FRB","63@0|FRT@6|20@11|FRB@16|583@21|/@31|",""
-"Move From FPSCR Control & Set DRN Immediate X-form","mffscdrni FRT,DRM","63@0|FRT@6|21@11|//@16|DRM@18|583@21|/@31|",""
-"Move From FPSCR & Clear Enables X-form","mffsce FRT","63@0|FRT@6|1@11|///@16|583@21|/@31|",""
-"Move From FPSCR Control & Set RN X-form","mffscrn FRT,FRB","63@0|FRT@6|22@11|FRB@16|583@21|/@31|",""
-"Move From FPSCR Control & Set RN Immediate X-form","mffscrni FRT,RM","63@0|FRT@6|23@11|///@16|RM@19|583@21|/@31|",""
-"Move From FPSCR Lightweight X-form","mffsl FRT","63@0|FRT@6|24@11|///@16|583@21|/@31|",""
-"SLB Invalidate All Global X-form","slbiag RS, L","31@0|RS@6|///@11|L@15|///@16|850@21|/@31|",""
-"Vector Multiply-Sum Unsigned Doubleword Modulo VA-form","vmsumudm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|35@26|",""
-"Add PC Immediate Shifted DX-form","addpcis RT,D","19@0|RT@6|d1@11|d0@16|2@26|d2@31|",""
-"Decimal Convert From National VX-form","bcdcfn. VRT,VRB,PS","4@0|VRT@6|7@11|VRB@16|1@21|PS@22|385@23|",""
-"Decimal Convert From Signed Quadword VX-form","bcdcfsq. VRT,VRB,PS","4@0|VRT@6|2@11|VRB@16|1@21|PS@22|385@23|",""
-"Decimal Convert From Zoned VX-form","bcdcfz. VRT,VRB,PS","4@0|VRT@6|6@11|VRB@16|1@21|PS@22|385@23|",""
-"Decimal Copy Sign VX-form","bcdcpsgn. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|833@21|",""
-"Decimal Convert To National VX-form","bcdctn. VRT,VRB","4@0|VRT@6|5@11|VRB@16|1@21|/@22|385@23|",""
-"Decimal Convert To Signed Quadword VX-form","bcdctsq. VRT,VRB","4@0|VRT@6|0@11|VRB@16|1@21|/@22|385@23|",""
-"Decimal Convert To Zoned VX-form","bcdctz. VRT,VRB,PS","4@0|VRT@6|4@11|VRB@16|1@21|PS@22|385@23|",""
-"Decimal Shift VX-form","bcds. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|193@23|",""
-"Decimal Set Sign VX-form","bcdsetsgn. VRT,VRB,PS","4@0|VRT@6|31@11|VRB@16|1@21|PS@22|385@23|",""
-"Decimal Shift and Round VX-form","bcdsr. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|449@23|",""
-"Decimal Truncate VX-form","bcdtrunc. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|257@23|",""
-"Decimal Unsigned Shift VX-form","bcdus. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1@21|/@22|129@23|",""
-"Decimal Unsigned Truncate VX-form","bcdutrunc. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1@21|/@22|321@23|",""
-"Compare Equal Byte X-form","cmpeqb BF,RA,RB","31@0|BF@6|//@9|RA@11|RB@16|224@21|/@31|",""
-"Compare Ranged Byte X-form","cmprb BF,L,RA,RB","31@0|BF@6|/@9|L@10|RA@11|RB@16|192@21|/@31|",""
-"Count Trailing Zeros Doubleword X-form","cnttzd RA,RS (Rc=0)|cnttzd. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|570@21|Rc@31|",""
-"Count Trailing Zeros Word X-form","cnttzw RA,RS (Rc=0)|cnttzw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|538@21|Rc@31|",""
-"Copy X-form","copy RA,RB","31@0|///@6|1@10|RA@11|RB@16|774@21|/@31|",""
-"Copy-Paste Abort X-form","cpabort","31@0|///@6|///@11|///@16|838@21|/@31|",""
-"Deliver A Random Number X-form","darn RT,L","31@0|RT@6|///@11|L@14|///@16|755@21|/@31|",""
-"DFP Test Significance Immediate X-form","dtstsfi BF,UIM,FRB","59@0|BF@6|/@9|UIM@10|FRB@16|675@21|/@31|",""
-"DFP Test Significance Immediate Quad X-form","dtstsfiq BF,UIM,FRBp","63@0|BF@6|/@9|UIM@10|FRBp@16|675@21|/@31|",""
-"Extend Sign Word and Shift Left Immediate XS-form","extswsli RA,RS,SH (Rc=0)|extswsli. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|sh@16|445@21|sh@30|Rc@31|",""
-"Load Doubleword ATomic X-form","ldat RT,RA,FC","31@0|RT@6|RA@11|FC@16|614@21|/@31|",""
-"Load Word ATomic X-form","lwat RT,RA,FC","31@0|RT@6|RA@11|FC@16|582@21|/@31|",""
-"Load VSX Scalar Doubleword DS-form","lxsd VRT,DS(RA)","57@0|VRT@6|RA@11|DS@16|2@30|",""
-"Load VSX Scalar as Integer Byte & Zero Indexed X-form","lxsibzx XT,RA,RB","31@0|T@6|RA@11|RB@16|781@21|TX@31|",""
-"Load VSX Scalar as Integer Halfword & Zero Indexed X-form","lxsihzx XT,RA,RB","31@0|T@6|RA@11|RB@16|813@21|TX@31|",""
-"Load VSX Scalar Single-Precision DS-form","lxssp VRT,DS(RA)","57@0|VRT@6|RA@11|DS@16|3@30|",""
-"Load VSX Vector DQ-form","lxv XT,DQ(RA)","61@0|T@6|RA@11|DQ@16|TX@28|1@29|",""
-"Load VSX Vector Byte*16 Indexed X-form","lxvb16x XT,RA,RB","31@0|T@6|RA@11|RB@16|876@21|TX@31|",""
-"Load VSX Vector Halfword*8 Indexed X-form","lxvh8x XT,RA,RB","31@0|T@6|RA@11|RB@16|812@21|TX@31|",""
-"Load VSX Vector with Length X-form","lxvl XT,RA,RB","31@0|T@6|RA@11|RB@16|269@21|TX@31|",""
-"Load VSX Vector with Length Left-justified X-form","lxvll XT,RA,RB","31@0|T@6|RA@11|RB@16|301@21|TX@31|",""
-"Load VSX Vector Word & Splat Indexed X-form","lxvwsx XT,RA,RB","31@0|T@6|RA@11|RB@16|364@21|TX@31|",""
-"Load VSX Vector Indexed X-form","lxvx XT,RA,RB","31@0|T@6|RA@11|RB@16|4@21|/@25|12@26|TX@31|",""
-"Multiply-Add High Doubleword VA-form","maddhd RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|48@26|",""
-"Multiply-Add High Doubleword Unsigned VA-form","maddhdu RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|49@26|",""
-"Multiply-Add Low Doubleword VA-form","maddld RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|51@26|",""
-"Move to CR from XER Extended X-form","mcrxrx BF","31@0|BF@6|//@9|///@11|///@16|576@21|/@31|",""
-"Move From VSR Lower Doubleword X-form","mfvsrld RA,XS","31@0|S@6|RA@11|///@16|307@21|SX@31|",""
-"Modulo Signed Doubleword X-form","modsd RT,RA,RB","31@0|RT@6|RA@11|RB@16|777@21|/@31|",""
-"Modulo Signed Word X-form","modsw RT,RA,RB","31@0|RT@6|RA@11|RB@16|779@21|/@31|",""
-"Modulo Unsigned Doubleword X-form","modud RT,RA,RB","31@0|RT@6|RA@11|RB@16|265@21|/@31|",""
-"Modulo Unsigned Word X-form","moduw RT,RA,RB","31@0|RT@6|RA@11|RB@16|267@21|/@31|",""
-"Message Synchronize X-form","msgsync","31@0|///@6|///@11|///@16|886@21|/@31|",""
-"Move To VSR Double Doubleword X-form","mtvsrdd XT,RA,RB","31@0|T@6|RA@11|RB@16|435@21|TX@31|",""
-"Move To VSR Word & Splat X-form","mtvsrws XT,RA","31@0|T@6|RA@11|///@16|403@21|TX@31|",""
-"Paste X-form","paste. RA,RB,L","31@0|///@6|L@10|RA@11|RB@16|902@21|1@31|",""
-"Set Boolean X-form","setb RT,BFA","31@0|RT@6|BFA@11|//@14|///@16|128@21|/@31|",""
-"SLB Invalidate Entry Global X-form","slbieg RS,RB","31@0|RS@6|///@11|RB@16|466@21|/@31|",""
-"SLB Synchronize X-form","slbsync","31@0|///@6|///@11|///@16|338@21|/@31|",""
-"Store Doubleword ATomic X-form","stdat RS,RA,FC","31@0|RS@6|RA@11|FC@16|742@21|/@31|",""
-"Stop XL-form","stop","19@0|///@6|///@11|///@16|370@21|/@31|",""
-"Store Word ATomic X-form","stwat RS,RA,FC","31@0|RS@6|RA@11|FC@16|710@21|/@31|",""
-"Store VSX Scalar Doubleword DS-form","stxsd VRS,DS(RA)","61@0|VRS@6|RA@11|DS@16|2@30|",""
-"Store VSX Scalar as Integer Byte Indexed X-form","stxsibx XS,RA,RB","31@0|S@6|RA@11|RB@16|909@21|SX@31|",""
-"Store VSX Scalar as Integer Halfword Indexed X-form","stxsihx XS,RA,RB","31@0|S@6|RA@11|RB@16|941@21|SX@31|",""
-"Store VSX Scalar Single DS-form","stxssp VRS,DS(RA)","61@0|VRS@6|RA@11|DS@16|3@30|",""
-"Store VSX Vector DQ-form","stxv XS,DQ(RA)","61@0|S@6|RA@11|DQ@16|SX@28|5@29|",""
-"Store VSX Vector Byte*16 Indexed X-form","stxvb16x XS,RA,RB","31@0|S@6|RA@11|RB@16|1004@21|SX@31|",""
-"Store VSX Vector Halfword*8 Indexed X-form","stxvh8x XS,RA,RB","31@0|S@6|RA@11|RB@16|940@21|SX@31|",""
-"Store VSX Vector with Length X-form","stxvl XS,RA,RB","31@0|S@6|RA@11|RB@16|397@21|SX@31|",""
-"Store VSX Vector with Length Left-justified X-form","stxvll XS,RA,RB","31@0|S@6|RA@11|RB@16|429@21|SX@31|",""
-"Store VSX Vector Indexed X-form","stxvx XS,RA,RB","31@0|S@6|RA@11|RB@16|396@21|SX@31|",""
-"Vector Absolute Difference Unsigned Byte VX-form","vabsdub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1027@21|",""
-"Vector Absolute Difference Unsigned Halfword VX-form","vabsduh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1091@21|",""
-"Vector Absolute Difference Unsigned Word VX-form","vabsduw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1155@21|",""
-"Vector Bit Permute Doubleword VX-form","vbpermd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1484@21|",""
-"Vector Count Leading Zero Least-Significant Bits Byte VX-form","vclzlsbb RT,VRB","4@0|RT@6|0@11|VRB@16|1538@21|",""
-"Vector Compare Not Equal Byte VC-form","vcmpneb VRT,VRA,VRB (Rc=0)|vcmpneb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|7@22|",""
-"Vector Compare Not Equal Halfword VC-form","vcmpneh VRT,VRA,VRB (Rc=0)|vcmpneh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|71@22|",""
-"Vector Compare Not Equal Word VC-form","vcmpnew VRT,VRA,VRB (Rc=0)|vcmpnew. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|135@22|",""
-"Vector Compare Not Equal or Zero Byte VC-form","vcmpnezb VRT,VRA,VRB (Rc=0)|vcmpnezb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|263@22|",""
-"Vector Compare Not Equal or Zero Halfword VC-form","vcmpnezh VRT,VRA,VRB (Rc=0)|vcmpnezh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|327@22|",""
-"Vector Compare Not Equal or Zero Word VC-form","vcmpnezw VRT,VRA,VRB (Rc=0)|vcmpnezw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|391@22|",""
-"Vector Count Trailing Zeros Byte VX-form","vctzb VRT,VRB","4@0|VRT@6|28@11|VRB@16|1538@21|",""
-"Vector Count Trailing Zeros Doubleword VX-form","vctzd VRT,VRB","4@0|VRT@6|31@11|VRB@16|1538@21|",""
-"Vector Count Trailing Zeros Halfword VX-form","vctzh VRT,VRB","4@0|VRT@6|29@11|VRB@16|1538@21|",""
-"Vector Count Trailing Zero Least-Significant Bits Byte VX-form","vctzlsbb RT,VRB","4@0|RT@6|1@11|VRB@16|1538@21|",""
-"Vector Count Trailing Zeros Word VX-form","vctzw VRT,VRB","4@0|VRT@6|30@11|VRB@16|1538@21|",""
-"Vector Extract Doubleword to VSR using immediate-specified index VX-form","vextractd VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|717@21|",""
-"Vector Extract Unsigned Byte to VSR using immediate-specified index VX-form","vextractub VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|525@21|",""
-"Vector Extract Unsigned Halfword to VSR using immediate-specified index VX-form","vextractuh VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|589@21|",""
-"Vector Extract Unsigned Word to VSR using immediate-specified index VX-form","vextractuw VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|653@21|",""
-"Vector Extend Sign Byte To Doubleword VX-form","vextsb2d VRT,VRB","4@0|VRT@6|24@11|VRB@16|1538@21|",""
-"Vector Extend Sign Byte To Word VX-form","vextsb2w VRT,VRB","4@0|VRT@6|16@11|VRB@16|1538@21|",""
-"Vector Extend Sign Halfword To Doubleword VX-form","vextsh2d VRT,VRB","4@0|VRT@6|25@11|VRB@16|1538@21|",""
-"Vector Extend Sign Halfword To Word VX-form","vextsh2w VRT,VRB","4@0|VRT@6|17@11|VRB@16|1538@21|",""
-"Vector Extend Sign Word To Doubleword VX-form","vextsw2d VRT,VRB","4@0|VRT@6|26@11|VRB@16|1538@21|",""
-"Vector Extract Unsigned Byte to GPR using GPR-specified Left-Index VX-form","vextublx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1549@21|",""
-"Vector Extract Unsigned Byte to GPR using GPR-specified Right-Index VX-form","vextubrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1805@21|",""
-"Vector Extract Unsigned Halfword to GPR using GPR-specified Left-Index VX-form","vextuhlx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1613@21|",""
-"Vector Extract Unsigned Halfword to GPR using GPR-specified Right-Index VX-form","vextuhrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1869@21|",""
-"Vector Extract Unsigned Word to GPR using GPR-specified Left-Index VX-form","vextuwlx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1677@21|",""
-"Vector Extract Unsigned Word to GPR using GPR-specified Right-Index VX-form","vextuwrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1933@21|",""
-"Vector Insert Byte from VSR using immediate-specified index VX-form","vinsertb VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|781@21|",""
-"Vector Insert Doubleword from VSR using immediate-specified index VX-form","vinsertd VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|973@21|",""
-"Vector Insert Halfword from VSR using immediate-specified index VX-form","vinserth VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|845@21|",""
-"Vector Insert Word from VSR using immediate-specified index VX-form","vinsertw VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|909@21|",""
-"Vector Multiply-by-10 & write Carry-out Unsigned Quadword VX-form","vmul10cuq VRT,VRA","4@0|VRT@6|VRA@11|///@16|1@21|",""
-"Vector Multiply-by-10 Extended & write Carry-out Unsigned Quadword VX-form","vmul10ecuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|65@21|",""
-"Vector Multiply-by-10 Extended Unsigned Quadword VX-form","vmul10euq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|577@21|",""
-"Vector Multiply-by-10 Unsigned Quadword VX-form","vmul10uq VRT,VRA","4@0|VRT@6|VRA@11|///@16|513@21|",""
-"Vector Negate Doubleword VX-form","vnegd VRT,VRB","4@0|VRT@6|7@11|VRB@16|1538@21|",""
-"Vector Negate Word VX-form","vnegw VRT,VRB","4@0|VRT@6|6@11|VRB@16|1538@21|",""
-"Vector Permute Right-indexed VA-form","vpermr VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|59@26|",""
-"Vector Parity Byte Doubleword VX-form","vprtybd VRT,VRB","4@0|VRT@6|9@11|VRB@16|1538@21|",""
-"Vector Parity Byte Quadword VX-form","vprtybq VRT,VRB","4@0|VRT@6|10@11|VRB@16|1538@21|",""
-"Vector Parity Byte Word VX-form","vprtybw VRT,VRB","4@0|VRT@6|8@11|VRB@16|1538@21|",""
-"Vector Rotate Left Doubleword then Mask Insert VX-form","vrldmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|197@21|",""
-"Vector Rotate Left Doubleword then AND with Mask VX-form","vrldnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|453@21|",""
-"Vector Rotate Left Word then Mask Insert VX-form","vrlwmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|133@21|",""
-"Vector Rotate Left Word then AND with Mask VX-form","vrlwnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|389@21|",""
-"Vector Shift Left Variable VX-form","vslv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1860@21|",""
-"Vector Shift Right Variable VX-form","vsrv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1796@21|",""
-"Wait X-form","wait WC,PL","31@0|??@6|/@8|WC@9|///@11|PL@14|///@16|30@21|/@31|",""
-"VSX Scalar Absolute Quad-Precision X-form","xsabsqp VRT,VRB","63@0|VRT@6|0@11|VRB@16|804@21|/@31|",""
-"VSX Scalar Add Quad-Precision [using round to Odd] X-form","xsaddqp VRT,VRA,VRB (RO=0)|xsaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|4@21|RO@31|",""
-"VSX Scalar Compare Equal Double-Precision XX3-form","xscmpeqdp XT,XA,XB","60@0|T@6|A@11|B@16|3@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Compare Exponents Double-Precision XX3-form","xscmpexpdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|59@21|AX@29|BX@30|/@31|",""
-"VSX Scalar Compare Exponents Quad-Precision X-form","xscmpexpqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|164@21|/@31|",""
-"VSX Scalar Compare Greater Than or Equal Double-Precision XX3-form","xscmpgedp XT,XA,XB","60@0|T@6|A@11|B@16|19@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Compare Greater Than Double-Precision XX3-form","xscmpgtdp XT,XA,XB","60@0|T@6|A@11|B@16|11@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Compare Ordered Quad-Precision X-form","xscmpoqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|132@21|/@31|",""
-"VSX Scalar Compare Unordered Quad-Precision X-form","xscmpuqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|644@21|/@31|",""
-"VSX Scalar Copy Sign Quad-Precision X-form","xscpsgnqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|100@21|/@31|",""
-"VSX Scalar Convert with round Double-Precision to Half-Precision format XX2-form","xscvdphp XT,XB","60@0|T@6|17@11|B@16|347@21|BX@30|TX@31|",""
-"VSX Scalar Convert Double-Precision to Quad-Precision format X-form","xscvdpqp VRT,VRB","63@0|VRT@6|22@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Convert Half-Precision to Double-Precision format XX2-form","xscvhpdp XT,XB","60@0|T@6|16@11|B@16|347@21|BX@30|TX@31|",""
-"VSX Scalar Convert with round Quad-Precision to Double-Precision format [using round to Odd] X-form","xscvqpdp VRT,VRB (RO=0)|xscvqpdpo VRT,VRB (RO=1)","63@0|VRT@6|20@11|VRB@16|836@21|RO@31|",""
-"VSX Scalar Convert with round to zero Quad-Precision to Signed Doubleword format X-form","xscvqpsdz VRT,VRB","63@0|VRT@6|25@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Convert with round to zero Quad-Precision to Signed Word format X-form","xscvqpswz VRT,VRB","63@0|VRT@6|9@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Doubleword format X-form","xscvqpudz VRT,VRB","63@0|VRT@6|17@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Word format X-form","xscvqpuwz VRT,VRB","63@0|VRT@6|1@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Convert Signed Doubleword to Quad-Precision format X-form","xscvsdqp VRT,VRB","63@0|VRT@6|10@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Convert Unsigned Doubleword to Quad-Precision format X-form","xscvudqp VRT,VRB","63@0|VRT@6|2@11|VRB@16|836@21|/@31|",""
-"VSX Scalar Divide Quad-Precision [using round to Odd] X-form","xsdivqp VRT,VRA,VRB (RO=0)|xsdivqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|548@21|RO@31|",""
-"VSX Scalar Insert Exponent Double-Precision X-form","xsiexpdp XT,RA,RB","60@0|T@6|RA@11|RB@16|918@21|TX@31|",""
-"VSX Scalar Insert Exponent Quad-Precision X-form","xsiexpqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|868@21|/@31|",""
-"VSX Scalar Multiply-Add Quad-Precision [using round to Odd] X-form","xsmaddqp VRT,VRA,VRB (RO=0)|xsmaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|388@21|RO@31|",""
-"VSX Scalar Maximum Type-C Double-Precision XX3-form","xsmaxcdp XT,XA,XB","60@0|T@6|A@11|B@16|128@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Maximum Type-J Double-Precision XX3-form","xsmaxjdp XT,XA,XB","60@0|T@6|A@11|B@16|144@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Minimum Type-C Double-Precision XX3-form","xsmincdp XT,XA,XB","60@0|T@6|A@11|B@16|136@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Minimum Type-J Double-Precision XX3-form","xsminjdp XT,XA,XB","60@0|T@6|A@11|B@16|152@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply-Subtract Quad-Precision [using round to Odd] X-form","xsmsubqp VRT,VRA,VRB (RO=0)|xsmsubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|420@21|RO@31|",""
-"VSX Scalar Multiply Quad-Precision [using round to Odd] X-form","xsmulqp VRT,VRA,VRB (RO=0)|xsmulqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|36@21|RO@31|",""
-"VSX Scalar Negative Absolute Quad-Precision X-form","xsnabsqp VRT,VRB","63@0|VRT@6|8@11|VRB@16|804@21|TX@31|",""
-"VSX Scalar Negate Quad-Precision X-form","xsnegqp VRT,VRB","63@0|VRT@6|16@11|VRB@16|804@21|/@31|",""
-"VSX Scalar Negative Multiply-Add Quad-Precision [using round to Odd] X-form","xsnmaddqp VRT,VRA,VRB (RO=0)|xsnmaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|452@21|RO@31|",""
-"VSX Scalar Negative Multiply-Subtract Quad-Precision [using round to Odd] X-form","xsnmsubqp VRT,VRA,VRB (RO=0)|xsnmsubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|484@21|RO@31|",""
-"VSX Scalar Round to Quad-Precision Integer [with Inexact] Z23-form","xsrqpi R,VRT,VRB,RMC (EX=0)|xsrqpix R,VRT,VRB,RMC (EX=1)","63@0|VRT@6|///@11|R@15|VRB@16|RMC@21|5@23|EX@31|",""
-"VSX Scalar Round Quad-Precision to Double-Extended Precision Z23-form","xsrqpxp R,VRT,VRB,RMC","63@0|VRT@6|///@11|R@15|VRB@16|RMC@21|37@23|/@31|",""
-"VSX Scalar Square Root Quad-Precision [using round to Odd] X-form","xssqrtqp VRT,VRB (RO=0)|xssqrtqpo VRT,VRB (RO=1)","63@0|VRT@6|27@11|VRB@16|804@21|RO@31|",""
-"VSX Scalar Subtract Quad-Precision [using round to Odd] X-form","xssubqp VRT,VRA,VRB (RO=0)|xssubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|516@21|RO@31|",""
-"VSX Scalar Test Data Class Double-Precision XX2-form","xststdcdp BF,XB,DCMX","60@0|BF@6|DCMX@9|B@16|362@21|BX@30|/@31|",""
-"VSX Scalar Test Data Class Quad-Precision X-form","xststdcqp BF,VRB,DCMX","63@0|BF@6|DCMX@9|VRB@16|708@21|/@31|",""
-"VSX Scalar Test Data Class Single-Precision XX2-form","xststdcsp BF,XB,DCMX","60@0|BF@6|DCMX@9|B@16|298@21|BX@30|/@31|",""
-"VSX Scalar Extract Exponent Double-Precision XX2-form","xsxexpdp RT,XB","60@0|RT@6|0@11|B@16|347@21|BX@30|/@31|",""
-"VSX Scalar Extract Exponent Quad-Precision X-form","xsxexpqp VRT,VRB","63@0|VRT@6|2@11|VRB@16|804@21|/@31|",""
-"VSX Scalar Extract Significand Double-Precision XX2-form","xsxsigdp RT,XB","60@0|RT@6|1@11|B@16|347@21|BX@30|/@31|",""
-"VSX Scalar Extract Significand Quad-Precision X-form","xsxsigqp VRT,VRB","63@0|VRT@6|18@11|VRB@16|804@21|/@31|",""
-"VSX Vector Convert Half-Precision to Single-Precision format XX2-form","xvcvhpsp XT,XB","60@0|T@6|24@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Convert with round Single-Precision to Half-Precision format XX2-form","xvcvsphp XT,XB","60@0|T@6|25@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Insert Exponent Double-Precision XX3-form","xviexpdp XT,XA,XB","60@0|T@6|A@11|B@16|248@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Insert Exponent Single-Precision XX3-form","xviexpsp XT,XA,XB","60@0|T@6|A@11|B@16|216@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Test Data Class Double-Precision XX2-form","xvtstdcdp XT,XB,DCMX","60@0|T@6|dx@11|B@16|15@21|dc@25|5@26|dm@29|BX@30|TX@31|",""
-"VSX Vector Test Data Class Single-Precision XX2-form","xvtstdcsp XT,XB,DCMX","60@0|T@6|dx@11|B@16|13@21|dc@25|5@26|dm@29|BX@30|TX@31|",""
-"VSX Vector Extract Exponent Double-Precision XX2-form","xvxexpdp XT,XB","60@0|T@6|0@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Extract Exponent Single-Precision XX2-form","xvxexpsp XT,XB","60@0|T@6|8@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Extract Significand Double-Precision XX2-form","xvxsigdp XT,XB","60@0|T@6|1@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Extract Significand Single-Precision XX2-form","xvxsigsp XT,XB","60@0|T@6|9@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Byte-Reverse Doubleword XX2-form","xxbrd XT,XB","60@0|T@6|23@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Byte-Reverse Halfword XX2-form","xxbrh XT,XB","60@0|T@6|7@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Byte-Reverse Quadword XX2-form","xxbrq XT,XB","60@0|T@6|31@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Byte-Reverse Word XX2-form","xxbrw XT,XB","60@0|T@6|15@11|B@16|475@21|BX@30|TX@31|",""
-"VSX Vector Extract Unsigned Word XX2-form","xxextractuw XT,XB,UIM","60@0|T@6|/@11|UIM@12|B@16|165@21|BX@30|TX@31|",""
-"VSX Vector Insert Word XX2-form","xxinsertw XT,XB,UIM","60@0|T@6|/@11|UIM@12|B@16|181@21|BX@30|TX@31|",""
-"VSX Vector Permute XX3-form","xxperm XT,XA,XB","60@0|T@6|A@11|B@16|26@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Permute Right-indexed XX3-form","xxpermr XT,XA,XB","60@0|T@6|A@11|B@16|58@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Splat Immediate Byte X-form","xxspltib XT,IMM8","60@0|T@6|0@11|IMM8@13|360@21|TX@31|",""
-"Decimal Add Modulo VX-form","bcdadd. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|1@23|",""
-"Decimal Subtract Modulo VX-form","bcdsub. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|65@23|",""
-"Branch Conditional to Branch Target Address Register XL-form","bctar BO,BI,BH (LK=0)|bctarl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|560@21|LK@31|",""
-"Clear BHRB X-form","clrbhrb","31@0|///@6|///@11|///@16|430@21|/@31|",""
-"Floating Merge Even Word X-form","fmrgew FRT,FRA,FRB","63@0|FRT@6|FRA@11|FRB@16|966@21|/@31|",""
-"Floating Merge Odd Word X-form","fmrgow FRT,FRA,FRB","63@0|FRT@6|FRA@11|FRB@16|838@21|/@31|",""
-"Instruction Cache Block Touch X-form","icbt CT, RA, RB","31@0|/@6|CT@7|RA@11|RB@16|22@21|/@31|",""
-"Load Quadword And Reserve Indexed X-form","lqarx RTp,RA,RB,EH","31@0|RTp@6|RA@11|RB@16|276@21|EH@31|",""
-"Load VSX Scalar as Integer Word Algebraic Indexed X-form","lxsiwax XT,RA,RB","31@0|T@6|RA@11|RB@16|76@21|TX@31|",""
-"Load VSX Scalar as Integer Word & Zero Indexed X-form","lxsiwzx XT,RA,RB","31@0|T@6|RA@11|RB@16|12@21|TX@31|",""
-"Load VSX Scalar Single-Precision Indexed X-form","lxsspx XT,RA,RB","31@0|T@6|RA@11|RB@16|524@21|TX@31|",""
-"Move From BHRB XFX-form","mfbhrbe RT,BHRBE","31@0|RT@6|BHRBE@11|302@21|/@31|",""
-"Move From VSR Doubleword X-form","mfvsrd RA,XS","31@0|S@6|RA@11|///@16|51@21|SX@31|",""
-"Move From VSR Word and Zero X-form","mfvsrwz RA,XS","31@0|S@6|RA@11|///@16|115@21|SX@31|",""
-"Message Clear X-form","msgclr RB","31@0|///@6|///@11|RB@16|238@21|/@31|",""
-"Message Clear Privileged X-form","msgclrp RB","31@0|///@6|///@11|RB@16|174@21|/@31|",""
-"Message Send X-form","msgsnd RB","31@0|///@6|///@11|RB@16|206@21|/@31|",""
-"Message Send Privileged X-form","msgsndp RB","31@0|///@6|///@11|RB@16|142@21|/@31|",""
-"Move To VSR Doubleword X-form","mtvsrd XT,RA","31@0|T@6|RA@11|///@16|179@21|TX@31|",""
-"Move To VSR Word Algebraic X-form","mtvsrwa XT,RA","31@0|T@6|RA@11|///@16|211@21|TX@31|",""
-"Move To VSR Word and Zero X-form","mtvsrwz XT,RA","31@0|T@6|RA@11|///@16|243@21|TX@31|",""
-"Return from Event Based Branch XL-form","rfebb S","19@0|///@6|///@11|///@16|S@20|146@21|/@31|",""
-"Store Quadword Conditional Indexed X-form","stqcx. RSp,RA,RB","31@0|RSp@6|RA@11|RB@16|182@21|1@31|",""
-"Store VSX Scalar as Integer Word Indexed X-form","stxsiwx XS,RA,RB","31@0|S@6|RA@11|RB@16|140@21|SX@31|",""
-"Store VSX Scalar Single-Precision Indexed X-form","stxsspx XS,RA,RB","31@0|S@6|RA@11|RB@16|652@21|SX@31|",""
-"Vector Add & write Carry Unsigned Quadword VX-form","vaddcuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|320@21|",""
-"Vector Add Extended & write Carry Unsigned Quadword VA-form","vaddecuq VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|61@26|",""
-"Vector Add Extended Unsigned Quadword Modulo VA-form","vaddeuqm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|60@26|",""
-"Vector Add Unsigned Doubleword Modulo VX-form","vaddudm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|192@21|",""
-"Vector Add Unsigned Quadword Modulo VX-form","vadduqm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|256@21|",""
-"Vector Bit Permute Quadword VX-form","vbpermq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1356@21|",""
-"Vector AES Cipher VX-form","vcipher VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1288@21|",""
-"Vector AES Cipher Last VX-form","vcipherlast VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1289@21|",""
-"Vector Count Leading Zeros Byte VX-form","vclzb VRT,VRB","4@0|VRT@6|///@11|VRB@16|1794@21|",""
-"Vector Count Leading Zeros Doubleword VX-form","vclzd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1986@21|",""
-"Vector Count Leading Zeros Halfword VX-form","vclzh VRT,VRB","4@0|VRT@6|///@11|VRB@16|1858@21|",""
-"Vector Count Leading Zeros Word VX-form","vclzw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1922@21|",""
-"Vector Compare Equal Unsigned Doubleword VC-form","vcmpequd VRT,VRA,VRB (Rc=0)|vcmpequd. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|199@22|",""
-"Vector Compare Greater Than Signed Doubleword VC-form","vcmpgtsd VRT,VRA,VRB (Rc=0)|vcmpgtsd. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|967@22|",""
-"Vector Compare Greater Than Unsigned Doubleword VC-form","vcmpgtud VRT,VRA,VRB (Rc=0)|vcmpgtud. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|711@22|",""
-"Vector Logical Equivalence VX-form","veqv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1668@21|",""
-"Vector Gather Bits by Bytes by Doubleword VX-form","vgbbd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1292@21|",""
-"Vector Maximum Signed Doubleword VX-form","vmaxsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|450@21|",""
-"Vector Maximum Unsigned Doubleword VX-form","vmaxud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|194@21|",""
-"Vector Minimum Signed Doubleword VX-form","vminsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|962@21|",""
-"Vector Minimum Unsigned Doubleword VX-form","vminud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|706@21|",""
-"Vector Merge Even Word VX-form","vmrgew VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1932@21|",""
-"Vector Merge Odd Word VX-form","vmrgow VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1676@21|",""
-"Vector Multiply Even Signed Word VX-form","vmulesw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|904@21|",""
-"Vector Multiply Even Unsigned Word VX-form","vmuleuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|648@21|",""
-"Vector Multiply Odd Signed Word VX-form","vmulosw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|392@21|",""
-"Vector Multiply Odd Unsigned Word VX-form","vmulouw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|136@21|",""
-"Vector Multiply Unsigned Word Modulo VX-form","vmuluwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|137@21|",""
-"Vector Logical NAND VX-form","vnand VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1412@21|",""
-"Vector AES Inverse Cipher VX-form","vncipher VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1352@21|",""
-"Vector AES Inverse Cipher Last VX-form","vncipherlast VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1353@21|",""
-"Vector Logical OR with Complement VX-form","vorc VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1348@21|",""
-"Vector Permute & Exclusive-OR VA-form","vpermxor VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|45@26|",""
-"Vector Pack Signed Doubleword Signed Saturate VX-form","vpksdss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1486@21|",""
-"Vector Pack Signed Doubleword Unsigned Saturate VX-form","vpksdus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1358@21|",""
-"Vector Pack Unsigned Doubleword Unsigned Modulo VX-form","vpkudum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1102@21|",""
-"Vector Pack Unsigned Doubleword Unsigned Saturate VX-form","vpkudus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1230@21|",""
-"Vector Polynomial Multiply-Sum Byte VX-form","vpmsumb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1032@21|",""
-"Vector Polynomial Multiply-Sum Doubleword VX-form","vpmsumd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1224@21|",""
-"Vector Polynomial Multiply-Sum Halfword VX-form","vpmsumh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1096@21|",""
-"Vector Polynomial Multiply-Sum Word VX-form","vpmsumw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1160@21|",""
-"Vector Population Count Byte VX-form","vpopcntb VRT,VRB","4@0|VRT@6|///@11|VRB@16|1795@21|",""
-"Vector Population Count Doubleword VX-form","vpopcntd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1987@21|",""
-"Vector Population Count Halfword VX-form","vpopcnth VRT,VRB","4@0|VRT@6|///@11|VRB@16|1859@21|",""
-"Vector Population Count Word VX-form","vpopcntw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1923@21|",""
-"Vector Rotate Left Doubleword VX-form","vrld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|196@21|",""
-"Vector AES SubBytes VX-form","vsbox VRT,VRA","4@0|VRT@6|VRA@11|///@16|1480@21|",""
-"Vector SHA-512 Sigma Doubleword VX-form","vshasigmad VRT,VRA,ST,SIX","4@0|VRT@6|VRA@11|ST@16|SIX@17|1730@21|",""
-"Vector SHA-256 Sigma Word VX-form","vshasigmaw VRT,VRA,ST,SIX","4@0|VRT@6|VRA@11|ST@16|SIX@17|1666@21|",""
-"Vector Shift Left Doubleword VX-form","vsld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1476@21|",""
-"Vector Shift Right Algebraic Doubleword VX-form","vsrad VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|964@21|",""
-"Vector Shift Right Doubleword VX-form","vsrd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1732@21|",""
-"Vector Subtract & write Carry-out Unsigned Quadword VX-form","vsubcuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1344@21|",""
-"Vector Subtract Extended & write Carry-out Unsigned Quadword VA-form","vsubecuq VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|63@26|",""
-"Vector Subtract Extended Unsigned Quadword Modulo VA-form","vsubeuqm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|62@26|",""
-"Vector Subtract Unsigned Doubleword Modulo VX-form","vsubudm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1216@21|",""
-"Vector Subtract Unsigned Quadword Modulo VX-form","vsubuqm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1280@21|",""
-"Vector Unpack High Signed Word VX-form","vupkhsw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1614@21|",""
-"Vector Unpack Low Signed Word VX-form","vupklsw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1742@21|",""
-"VSX Scalar Add Single-Precision XX3-form","xsaddsp XT,XA,XB","60@0|T@6|A@11|B@16|0@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Convert Scalar Single-Precision to Vector Single-Precision format Non-signalling XX2-form","xscvdpspn XT,XB","60@0|T@6|///@11|B@16|267@21|BX@30|TX@31|",""
-"VSX Scalar Convert Single-Precision to Double-Precision format Non-signalling XX2-form","xscvspdpn XT,XB","60@0|T@6|///@11|B@16|331@21|BX@30|TX@31|",""
-"VSX Scalar Convert with round Signed Doubleword to Single-Precision format XX2-form","xscvsxdsp XT,XB","60@0|T@6|///@11|B@16|312@21|BX@30|TX@31|",""
-"VSX Scalar Convert with round Unsigned Doubleword to Single-Precision XX2-form","xscvuxdsp XT,XB","60@0|T@6|///@11|B@16|296@21|BX@30|TX@31|",""
-"VSX Scalar Divide Single-Precision XX3-form","xsdivsp XT,XA,XB","60@0|T@6|A@11|B@16|24@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply-Add Type-A Single-Precision XX3-form","xsmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|1@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply-Add Type-M Single-Precision XX3-form","xsmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|9@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply-Subtract Type-A Single-Precision XX3-form","xsmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|17@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply-Subtract Type-M Single-Precision XX3-form","xsmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|25@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply Single-Precision XX3-form","xsmulsp XT,XA,XB","60@0|T@6|A@11|B@16|16@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Negative Multiply-Add Type-A Single-Precision XX3-form","xsnmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|129@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Negative Multiply-Add Type-M Single-Precision XX3-form","xsnmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|137@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Negative Multiply-Subtract Type-A Single-Precision XX3-form","xsnmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|145@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Negative Multiply-Subtract Type-M Single-Precision XX3-form","xsnmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|153@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Reciprocal Estimate Single-Precision XX2-form","xsresp XT,XB","60@0|T@6|///@11|B@16|26@21|BX@30|TX@31|",""
-"VSX Scalar Round to Single-Precision XX2-form","xsrsp XT,XB","60@0|T@6|///@11|B@16|281@21|BX@30|TX@31|",""
-"VSX Scalar Reciprocal Square Root Estimate Single-Precision XX2-form","xsrsqrtesp XT,XB","60@0|T@6|///@11|B@16|10@21|BX@30|TX@31|",""
-"VSX Scalar Square Root Single-Precision XX2-form","xssqrtsp XT,XB","60@0|T@6|///@11|B@16|11@21|BX@30|TX@31|",""
-"VSX Scalar Subtract Single-Precision XX3-form","xssubsp XT,XA,XB","60@0|T@6|A@11|B@16|8@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Logical Equivalence XX3-form","xxleqv XT,XA,XB","60@0|T@6|A@11|B@16|186@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Logical NAND XX3-form","xxlnand XT,XA,XB","60@0|T@6|A@11|B@16|178@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Logical OR with Complement XX3-form","xxlorc XT,XA,XB","60@0|T@6|A@11|B@16|170@21|AX@29|BX@30|TX@31|",""
-"Add and Generate Sixes XO-form","addg6s RT,RA,RB","31@0|RT@6|RA@11|RB@16|/@21|74@22|/@31|",""
-"Bit Permute Doubleword X-form","bpermd RA,RS,RB","31@0|RS@6|RA@11|RB@16|252@21|/@31|",""
-"Convert Binary Coded Decimal To Declets X-form","cbcdtd RA, RS","31@0|RS@6|RA@11|///@16|314@21|/@31|",""
-"Convert Declets To Binary Coded Decimal X-form","cdtbcd RA, RS","31@0|RS@6|RA@11|///@16|282@21|/@31|",""
-"DFP Convert From Fixed X-form","dcffix FRT,FRB (Rc=0)|dcffix. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|802@21|Rc@31|",""
-"Divide Doubleword Extended XO-form","divde RT,RA,RB (OE=0 Rc=0)|divde. RT,RA,RB (OE=0 Rc=1)|divdeo RT,RA,RB (OE=1 Rc=0)|divdeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|425@22|Rc@31|",""
-"Divide Doubleword Extended Unsigned XO-form","divdeu RT,RA,RB (OE=0 Rc=0)|divdeu. RT,RA,RB (OE=0 Rc=1)|divdeuo RT,RA,RB (OE=1 Rc=0)|divdeuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|393@22|Rc@31|",""
-"Divide Word Extended XO-form","divwe RT,RA,RB (OE=0 Rc=0)|divwe. RT,RA,RB (OE=0 Rc=1)|divweo RT,RA,RB (OE=1 Rc=0)|divweo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|427@22|Rc@31|",""
-"Divide Word Extended Unsigned XO-form","divweu RT,RA,RB (OE=0 Rc=0)|divweu. RT,RA,RB (OE=0 Rc=1)|divweuo RT,RA,RB (OE=1 Rc=0)|divweuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|395@22|Rc@31|",""
-"Floating Convert with round Signed Doubleword to Single-Precision format X-form","fcfids FRT,FRB (Rc=0)|fcfids. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|846@21|Rc@31|",""
-"Floating Convert with round Unsigned Doubleword to Double-Precision format X-form","fcfidu FRT,FRB (Rc=0)|fcfidu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|974@21|Rc@31|",""
-"Floating Convert with round Unsigned Doubleword to Single-Precision format X-form","fcfidus FRT,FRB (Rc=0)|fcfidus. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|974@21|Rc@31|",""
-"Floating Convert with round Double-Precision To Unsigned Doubleword format X-form","fctidu FRT,FRB (Rc=0)|fctidu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|942@21|Rc@31|",""
-"Floating Convert with truncate Double-Precision To Unsigned Doubleword format X-form","fctiduz FRT,FRB (Rc=0)|fctiduz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|943@21|Rc@31|",""
-"Floating Convert with round Double-Precision To Unsigned Word format X-form","fctiwu FRT,FRB (Rc=0)|fctiwu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|142@21|Rc@31|",""
-"Floating Convert with truncate Double-Precision To Unsigned Word format X-form","fctiwuz FRT,FRB (Rc=0)|fctiwuz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|143@21|Rc@31|",""
-"Floating Test for software Divide X-form","ftdiv BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|128@21|/@31|",""
-"Floating Test for software Square Root X-form","ftsqrt BF,FRB","63@0|BF@6|//@9|///@11|FRB@16|160@21|/@31|",""
-"Load Byte And Reserve Indexed X-form","lbarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|52@21|EH@31|",""
-"Load Doubleword Byte-Reverse Indexed X-form","ldbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|532@21|/@31|",""
-"Load Floating-Point as Integer Word & Zero Indexed X-form","lfiwzx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|887@21|/@31|",""
-"Load Halfword And Reserve Indexed Xform","lharx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|116@21|EH@31|",""
-"Load VSX Scalar Doubleword Indexed X-form","lxsdx XT,RA,RB","31@0|T@6|RA@11|RB@16|588@21|TX@31|",""
-"Load VSX Vector Doubleword*2 Indexed X-form","lxvd2x XT,RA,RB","31@0|T@6|RA@11|RB@16|844@21|TX@31|",""
-"Load VSX Vector Doubleword & Splat Indexed X-form","lxvdsx XT,RA,RB","31@0|T@6|RA@11|RB@16|332@21|TX@31|",""
-"Load VSX Vector Word*4 Indexed X-form","lxvw4x XT,RA,RB","31@0|T@6|RA@11|RB@16|780@21|TX@31|",""
-"Population Count Doubleword X-form","popcntd RA, RS","31@0|RS@6|RA@11|///@16|506@21|/@31|",""
-"Population Count Words X-form","popcntw RA, RS","31@0|RS@6|RA@11|///@16|378@21|/@31|",""
-"Store Byte Conditional Indexed X-form","stbcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|694@21|1@31|",""
-"Store Doubleword Byte-Reverse Indexed X-form","stdbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|660@21|/@31|",""
-"Store Halfword Conditional Indexed X-form","sthcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|726@21|1@31|",""
-"Store VSX Scalar Doubleword Indexed X-form","stxsdx XS,RA,RB","31@0|S@6|RA@11|RB@16|716@21|SX@31|",""
-"Store VSX Vector Doubleword*2 Indexed X-form","stxvd2x XS,RA,RB","31@0|S@6|RA@11|RB@16|972@21|SX@31|",""
-"Store VSX Vector Word*4 Indexed X-form","stxvw4x XS,RA,RB","31@0|S@6|RA@11|RB@16|908@21|SX@31|",""
-"VSX Scalar Absolute Double-Precision XX2-form","xsabsdp XT,XB","60@0|T@6|///@11|B@16|345@21|BX@30|TX@31|",""
-"VSX Scalar Add Double-Precision XX3-form","xsadddp XT,XA,XB","60@0|T@6|A@11|B@16|32@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Compare Ordered Double-Precision XX3-form","xscmpodp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|",""
-"VSX Scalar Compare Unordered Double-Precision XX3-form","xscmpudp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|",""
-"VSX Scalar Copy Sign Double-Precision XX3-form","xscpsgndp XT,XA,XB","60@0|T@6|A@11|B@16|176@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Convert with round Double-Precision to Single-Precision format XX2-form","xscvdpsp XT,XB","60@0|T@6|///@11|B@16|265@21|BX@30|TX@31|",""
-"VSX Scalar Convert with round to zero Double-Precision to Signed Doubleword format XX2-form","xscvdpsxds XT,XB","60@0|T@6|///@11|B@16|344@21|BX@30|TX@31|",""
-"VSX Scalar Convert with round to zero Double-Precision to Signed Word format XX2-form","xscvdpsxws XT,XB","60@0|T@6|///@11|B@16|88@21|BX@30|TX@31|",""
-"VSX Scalar Convert with round to zero Double-Precision to Unsigned Doubleword format XX2-form","xscvdpuxds XT,XB","60@0|T@6|///@11|B@16|328@21|BX@30|TX@31|",""
-"VSX Scalar Convert with round to zero Double-Precision to Unsigned Word format XX2-form","xscvdpuxws XT,XB","60@0|T@6|///@11|B@16|72@21|BX@30|TX@31|",""
-"VSX Scalar Convert Single-Precision to Double-Precision format XX2-form","xscvspdp XT,XB","60@0|T@6|///@11|B@16|329@21|BX@30|TX@31|",""
-"VSX Scalar Convert with round Signed Doubleword to Double-Precision format XX2-form","xscvsxddp XT,XB","60@0|T@6|///@11|B@16|376@21|BX@30|TX@31|",""
-"VSX Scalar Convert with round Unsigned Doubleword to Double-Precision format XX2-form","xscvuxddp XT,XB","60@0|T@6|///@11|B@16|360@21|BX@30|TX@31|",""
-"VSX Scalar Divide Double-Precision XX3-form","xsdivdp XT,XA,XB","60@0|T@6|A@11|B@16|56@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply-Add Type-A Double-Precision XX3-form","xsmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|33@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply-Add Type-M Double-Precision XX3-form","xsmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|41@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Maximum Double-Precision XX3-form","xsmaxdp XT,XA,XB","60@0|T@6|A@11|B@16|160@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Minimum Double-Precision XX3-form","xsmindp XT,XA,XB","60@0|T@6|A@11|B@16|168@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply-Subtract Type-A Double-Precision XX3-form","xsmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|49@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply-Subtract Type-M Double-Precision XX3-form","xsmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|57@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Multiply Double-Precision XX3-form","xsmuldp XT,XA,XB","60@0|T@6|A@11|B@16|48@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Negative Absolute Double-Precision XX2-form","xsnabsdp XT,XB","60@0|T@6|///@11|B@16|361@21|BX@30|TX@31|",""
-"VSX Scalar Negate Double-Precision XX2-form","xsnegdp XT,XB","60@0|T@6|///@11|B@16|377@21|BX@30|TX@31|",""
-"VSX Scalar Negative Multiply-Add Type-A Double-Precision XX3-form","xsnmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|161@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Negative Multiply-Add Type-M Double-Precision XX3-form","xsnmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|169@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Negative Multiply-Subtract Type-A Double-Precision XX3-form","xsnmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|177@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Negative Multiply-Subtract Type-M Double-Precision XX3-form","xsnmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|185@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Round to Double-Precision Integer using round to Nearest Away XX2-form","xsrdpi XT,XB","60@0|T@6|///@11|B@16|73@21|BX@30|TX@31|",""
-"VSX Scalar Round to Double-Precision Integer exact using Current rounding mode XX2-form","xsrdpic XT,XB","60@0|T@6|///@11|B@16|107@21|BX@30|TX@31|",""
-"VSX Scalar Round to Double-Precision Integer using round toward -Infinity XX2-form","xsrdpim XT,XB","60@0|T@6|///@11|B@16|121@21|BX@30|TX@31|",""
-"VSX Scalar Round to Double-Precision Integer using round toward +Infinity XX2-form","xsrdpip XT,XB","60@0|T@6|///@11|B@16|105@21|BX@30|TX@31|",""
-"VSX Scalar Round to Double-Precision Integer using round toward Zero XX2-form","xsrdpiz XT,XB","60@0|T@6|///@11|B@16|89@21|BX@30|TX@31|",""
-"VSX Scalar Reciprocal Estimate Double-Precision XX2-form","xsredp XT,XB","60@0|T@6|///@11|B@16|90@21|BX@30|TX@31|",""
-"VSX Scalar Reciprocal Square Root Estimate Double-Precision XX2-form","xsrsqrtedp XT,XB","60@0|T@6|///@11|B@16|74@21|BX@30|TX@31|",""
-"VSX Scalar Square Root Double-Precision XX2-form","xssqrtdp XT,XB","60@0|T@6|///@11|B@16|75@21|BX@30|TX@31|",""
-"VSX Scalar Subtract Double-Precision XX3-form","xssubdp XT,XA,XB","60@0|T@6|A@11|B@16|40@21|AX@29|BX@30|TX@31|",""
-"VSX Scalar Test for software Divide Double-Precision XX3-form","xstdivdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|61@21|AX@29|BX@30|/@31|",""
-"VSX Scalar Test for software Square Root Double-Precision XX2-form","xstsqrtdp BF,XB","60@0|BF@6|//@9|///@11|B@16|106@21|BX@30|/@31|",""
-"VSX Vector Absolute Value Double-Precision XX2-form","xvabsdp XT,XB","60@0|T@6|///@11|B@16|473@21|BX@30|TX@31|",""
-"VSX Vector Absolute Value Single-Precision XX2-form","xvabssp XT,XB","60@0|T@6|///@11|B@16|409@21|BX@30|TX@31|",""
-"VSX Vector Add Double-Precision XX3-form","xvadddp XT,XA,XB","60@0|T@6|A@11|B@16|96@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Add Single-Precision XX3-form","xvaddsp XT,XA,XB","60@0|T@6|A@11|B@16|64@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Compare Equal To Double-Precision XX3-form","xvcmpeqdp XT,XA,XB (Rc=0)|xvcmpeqdp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|99@22|AX@29|BX@30|TX@31|",""
-"VSX Vector Compare Equal To Single-Precision XX3-form","xvcmpeqsp XT,XA,XB (Rc=0)|xvcmpeqsp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|67@22|AX@29|BX@30|TX@31|",""
-"VSX Vector Compare Greater Than or Equal To Double-Precision XX3-form","xvcmpgedp XT,XA,XB (Rc=0)|xvcmpgedp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|115@22|AX@29|BX@30|TX@31|",""
-"VSX Vector Compare Greater Than or Equal To Single-Precision XX3-form","xvcmpgesp XT,XA,XB (Rc=0)|xvcmpgesp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|83@22|AX@29|BX@30|TX@31|",""
-"VSX Vector Compare Greater Than Double-Precision XX3-form","xvcmpgtdp XT,XA,XB (Rc=0)|xvcmpgtdp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|107@22|AX@29|BX@30|TX@31|",""
-"VSX Vector Compare Greater Than Single-Precision XX3-form","xvcmpgtsp XT,XA,XB (Rc=0)|xvcmpgtsp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|75@22|AX@29|BX@30|TX@31|",""
-"VSX Vector Copy Sign Double-Precision XX3-form","xvcpsgndp XT,XA,XB","60@0|T@6|A@11|B@16|240@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Copy Sign Single-Precision XX3-form","xvcpsgnsp XT,XA,XB","60@0|T@6|A@11|B@16|208@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Convert with round Double-Precision to Single-Precision format XX2-form","xvcvdpsp XT,XB","60@0|T@6|///@11|B@16|393@21|BX@30|TX@31|",""
-"VSX Vector Convert with round to zero Double-Precision to Signed Doubleword format XX2-form","xvcvdpsxds XT,XB","60@0|T@6|///@11|B@16|472@21|BX@30|TX@31|",""
-"VSX Vector Convert with round to zero Double-Precision to Signed Word format XX2-form","xvcvdpsxws XT,XB","60@0|T@6|///@11|B@16|216@21|BX@30|TX@31|",""
-"VSX Vector Convert with round to zero Double-Precision to Unsigned Doubleword format XX2-form","xvcvdpuxds XT,XB","60@0|T@6|///@11|B@16|456@21|BX@30|TX@31|",""
-"VSX Vector Convert with round to zero Double-Precision to Unsigned Word format XX2-form","xvcvdpuxws XT,XB","60@0|T@6|///@11|B@16|200@21|BX@30|TX@31|",""
-"VSX Vector Convert Single-Precision to Double-Precision format XX2-form","xvcvspdp XT,XB","60@0|T@6|///@11|B@16|457@21|BX@30|TX@31|",""
-"VSX Vector Convert with round to zero Single-Precision to Signed Doubleword format XX2-form","xvcvspsxds XT,XB","60@0|T@6|///@11|B@16|408@21|BX@30|TX@31|",""
-"VSX Vector Convert with round to zero Single-Precision to Signed Word format XX2-form","xvcvspsxws XT,XB","60@0|T@6|///@11|B@16|152@21|BX@30|TX@31|",""
-"VSX Vector Convert with round to zero Single-Precision to Unsigned Doubleword format XX2-form","xvcvspuxds XT,XB","60@0|T@6|///@11|B@16|392@21|BX@30|TX@31|",""
-"VSX Vector Convert with round to zero Single-Precision to Unsigned Word format XX2-form","xvcvspuxws XT,XB","60@0|T@6|///@11|B@16|136@21|BX@30|TX@31|",""
-"VSX Vector Convert with round Signed Doubleword to Double-Precision format XX2-form","xvcvsxddp XT,XB","60@0|T@6|///@11|B@16|504@21|BX@30|TX@31|",""
-"VSX Vector Convert with round Signed Doubleword to Single-Precision format XX2-form","xvcvsxdsp XT,XB","60@0|T@6|///@11|B@16|440@21|BX@30|TX@31|",""
-"VSX Vector Convert Signed Word to Double-Precision format XX2-form","xvcvsxwdp XT,XB","60@0|T@6|///@11|B@16|248@21|BX@30|TX@31|",""
-"VSX Vector Convert with round Signed Word to Single-Precision format XX2-form","xvcvsxwsp XT,XB","60@0|T@6|///@11|B@16|184@21|BX@30|TX@31|",""
-"VSX Vector Convert with round Unsigned Doubleword to Double-Precision format XX2-form","xvcvuxddp XT,XB","60@0|T@6|///@11|B@16|488@21|BX@30|TX@31|",""
-"VSX Vector Convert with round Unsigned Doubleword to Single-Precision format XX2-form","xvcvuxdsp XT,XB","60@0|T@6|///@11|B@16|424@21|BX@30|TX@31|",""
-"VSX Vector Convert Unsigned Word to Double-Precision format XX2-form","xvcvuxwdp XT,XB","60@0|T@6|///@11|B@16|232@21|BX@30|TX@31|",""
-"VSX Vector Convert with round Unsigned Word to Single-Precision format XX2-form","xvcvuxwsp XT,XB","60@0|T@6|///@11|B@16|168@21|BX@30|TX@31|",""
-"VSX Vector Divide Double-Precision XX3-form","xvdivdp XT,XA,XB","60@0|T@6|A@11|B@16|120@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Divide Single-Precision XX3-form","xvdivsp XT,XA,XB","60@0|T@6|A@11|B@16|88@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Multiply-Add Type-A Double-Precision XX3-form","xvmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|97@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Multiply-Add Type-A Single-Precision XX3-form","xvmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|65@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Multiply-Add Type-M Double-Precision XX3-form","xvmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|105@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Multiply-Add Type-M Single-Precision XX3-form","xvmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|73@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Maximum Double-Precision XX3-form","xvmaxdp XT,XA,XB","60@0|T@6|A@11|B@16|224@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Maximum Single-Precision XX3-form","xvmaxsp XT,XA,XB","60@0|T@6|A@11|B@16|192@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Minimum Double-Precision XX3-form","xvmindp XT,XA,XB","60@0|T@6|A@11|B@16|232@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Minimum Single-Precision XX3-form","xvminsp XT,XA,XB","60@0|T@6|A@11|B@16|200@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Multiply-Subtract Type-A Double-Precision XX3-form","xvmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|113@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Multiply-Subtract Type-A Single-Precision XX3-form","xvmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|81@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Multiply-Subtract Type-M Double-Precision XX3-form","xvmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|121@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Multiply-Subtract Type-M Single-Precision XX3-form","xvmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|89@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Multiply Double-Precision XX3-form","xvmuldp XT,XA,XB","60@0|T@6|A@11|B@16|112@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Multiply Single-Precision XX3-form","xvmulsp XT,XA,XB","60@0|T@6|A@11|B@16|80@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Negative Absolute Double-Precision XX2-form","xvnabsdp XT,XB","60@0|T@6|///@11|B@16|489@21|BX@30|TX@31|",""
-"VSX Vector Negative Absolute Single-Precision XX2-form","xvnabssp XT,XB","60@0|T@6|///@11|B@16|425@21|BX@30|TX@31|",""
-"VSX Vector Negate Double-Precision XX2-form","xvnegdp XT,XB","60@0|T@6|///@11|B@16|505@21|BX@30|TX@31|",""
-"VSX Vector Negate Single-Precision XX2-form","xvnegsp XT,XB","60@0|T@6|///@11|B@16|441@21|BX@30|TX@31|",""
-"VSX Vector Negative Multiply-Add Type-A Double-Precision XX3-form","xvnmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|225@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Negative Multiply-Add Type-A Single-Precision XX3-form","xvnmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|193@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Negative Multiply-Add Type-M Double-Precision XX3-form","xvnmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|233@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Negative Multiply-Add Type-M Single-Precision XX3-form","xvnmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|201@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Negative Multiply-Subtract Type-A Double-Precision XX3-form","xvnmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|241@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Negative Multiply-Subtract Type-A Single-Precision XX3-form","xvnmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|209@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Negative Multiply-Subtract Type-M Double-Precision XX3-form","xvnmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|249@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Negative Multiply-Subtract Type-M Single-Precision XX3-form","xvnmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|217@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Round to Double-Precision Integer using round to Nearest Away XX2-form","xvrdpi XT,XB","60@0|T@6|///@11|B@16|201@21|BX@30|TX@31|",""
-"VSX Vector Round to Double-Precision Integer Exact using Current rounding mode XX2-form","xvrdpic XT,XB","60@0|T@6|///@11|B@16|235@21|BX@30|TX@31|",""
-"VSX Vector Round to Double-Precision Integer using round toward -Infinity XX2-form","xvrdpim XT,XB","60@0|T@6|///@11|B@16|249@21|BX@30|TX@31|",""
-"VSX Vector Round to Double-Precision Integer using round toward +Infinity XX2-form","xvrdpip XT,XB","60@0|T@6|///@11|B@16|233@21|BX@30|TX@31|",""
-"VSX Vector Round to Double-Precision Integer using round toward Zero XX2-form","xvrdpiz XT,XB","60@0|T@6|///@11|B@16|217@21|BX@30|TX@31|",""
-"VSX Vector Reciprocal Estimate Double-Precision XX2-form","xvredp XT,XB","60@0|T@6|///@11|B@16|218@21|BX@30|TX@31|",""
-"VSX Vector Reciprocal Estimate Single-Precision XX2-form","xvresp XT,XB","60@0|T@6|///@11|B@16|154@21|BX@30|TX@31|",""
-"VSX Vector Round to Single-Precision Integer using round to Nearest Away XX2-form","xvrspi XT,XB","60@0|T@6|///@11|B@16|137@21|BX@30|TX@31|",""
-"VSX Vector Round to Single-Precision Integer Exact using Current rounding mode XX2-form","xvrspic XT,XB","60@0|T@6|///@11|B@16|171@21|BX@30|TX@31|",""
-"VSX Vector Round to Single-Precision Integer using round toward -Infinity XX2-form","xvrspim XT,XB","60@0|T@6|///@11|B@16|185@21|BX@30|TX@31|",""
-"VSX Vector Round to Single-Precision Integer using round toward +Infinity XX2-form","xvrspip XT,XB","60@0|T@6|///@11|B@16|169@21|BX@30|TX@31|",""
-"VSX Vector Round to Single-Precision Integer using round toward Zero XX2-form","xvrspiz XT,XB","60@0|T@6|///@11|B@16|153@21|BX@30|TX@31|",""
-"VSX Vector Reciprocal Square Root Estimate Double-Precision XX2-form","xvrsqrtedp XT,XB","60@0|T@6|///@11|B@16|202@21|BX@30|TX@31|",""
-"VSX Vector Reciprocal Square Root Estimate Single-Precision XX2-form","xvrsqrtesp XT,XB","60@0|T@6|///@11|B@16|138@21|BX@30|TX@31|",""
-"VSX Vector Square Root Double-Precision XX2-form","xvsqrtdp XT,XB","60@0|T@6|///@11|B@16|203@21|BX@30|TX@31|",""
-"VSX Vector Square Root Single-Precision XX2-form","xvsqrtsp XT,XB","60@0|T@6|///@11|B@16|139@21|BX@30|TX@31|",""
-"VSX Vector Subtract Double-Precision XX3-form","xvsubdp XT,XA,XB","60@0|T@6|A@11|B@16|104@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Subtract Single-Precision XX3-form","xvsubsp XT,XA,XB","60@0|T@6|A@11|B@16|72@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Test for software Divide Double-Precision XX3-form","xvtdivdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|125@21|AX@29|BX@30|/@31|",""
-"VSX Vector Test for software Divide Single-Precision XX3-form","xvtdivsp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|93@21|AX@29|BX@30|/@31|",""
-"VSX Vector Test for software Square Root Double-Precision XX2-form","xvtsqrtdp BF,XB","60@0|BF@6|//@9|///@11|B@16|234@21|BX@30|/@31|",""
-"VSX Vector Test for software Square Root Single-Precision XX2-form","xvtsqrtsp BF,XB","60@0|BF@6|//@9|///@11|B@16|170@21|BX@30|/@31|",""
-"VSX Vector Logical AND XX3-form","xxland XT,XA,XB","60@0|T@6|A@11|B@16|130@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Logical AND with Complement XX3-form","xxlandc XT,XA,XB","60@0|T@6|A@11|B@16|138@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Logical NOR XX3-form","xxlnor XT,XA,XB","60@0|T@6|A@11|B@16|162@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Logical OR XX3-form","xxlor XT,XA,XB","60@0|T@6|A@11|B@16|146@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Logical XOR XX3-form","xxlxor XT,XA,XB","60@0|T@6|A@11|B@16|154@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Merge High Word XX3-form","xxmrghw XT,XA,XB","60@0|T@6|A@11|B@16|18@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Merge Low Word XX3-form","xxmrglw XT,XA,XB","60@0|T@6|A@11|B@16|50@21|AX@29|BX@30|TX@31|",""
-"VSX Vector Permute Doubleword Immediate XX3-form","xxpermdi XT,XA,XB,DM","60@0|T@6|A@11|B@16|0@21|DM@22|10@24|AX@29|BX@30|TX@31|",""
-"VSX Vector Select XX4-form","xxsel XT,XA,XB,XC","60@0|T@6|A@11|B@16|C@21|3@26|CX@28|AX@29|BX@30|TX@31|",""
-"VSX Vector Shift Left Double by Word Immediate XX3-form","xxsldwi XT,XA,XB,SHW","60@0|T@6|A@11|B@16|0@21|SHW@22|2@24|AX@29|BX@30|TX@31|",""
-"VSX Vector Splat Word XX2-form","xxspltw XT,XB,UIM","60@0|T@6|///@11|UIM@14|B@16|164@21|BX@30|TX@31|",""
-"Compare Bytes X-form","cmpb RA,RS,RB","31@0|RS@6|RA@11|RB@16|508@21|/@31|",""
-"DFP Add X-form","dadd FRT,FRA,FRB (Rc=0)|dadd. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|2@21|Rc@31|",""
-"DFP Add Quad X-form","daddq FRTp,FRAp,FRBp (Rc=0)|daddq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|2@21|Rc@31|",""
-"DFP Convert From Fixed Quad X-form","dcffixq FRTp,FRB (Rc=0)|dcffixq. FRTp,FRB (Rc=1)","63@0|FRTp@6|///@11|FRB@16|802@21|Rc@31|",""
-"DFP Compare Ordered X-form","dcmpo BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|130@21|/@31|",""
-"DFP Compare Ordered Quad X-form","dcmpoq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|130@21|/@31|",""
-"DFP Compare Unordered X-form","dcmpu BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|642@21|/@31|",""
-"DFP Compare Unordered Quad X-form","dcmpuq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|642@21|/@31|",""
-"DFP Convert To DFP Long X-form","dctdp FRT,FRB (Rc=0)|dctdp. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|258@21|Rc@31|",""
-"DFP Convert To Fixed X-form","dctfix FRT,FRB (Rc=0)|dctfix. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|290@21|Rc@31|",""
-"DFP Convert To Fixed Quad X-form","dctfixq FRT,FRBp (Rc=0)|dctfixq. FRT,FRBp (Rc=1)","63@0|FRT@6|///@11|FRBp@16|290@21|Rc@31|",""
-"DFP Convert To DFP Extended X-form","dctqpq FRTp,FRB (Rc=0)|dctqpq. FRTp,FRB (Rc=1)","63@0|FRTp@6|///@11|FRB@16|258@21|Rc@31|",""
-"DFP Decode DPD To BCD X-form","ddedpd SP,FRT,FRB (Rc=0)|ddedpd. SP,FRT,FRB (Rc=1)","59@0|FRT@6|SP@11|///@13|FRB@16|322@21|Rc@31|",""
-"DFP Decode DPD To BCD Quad X-form","ddedpdq SP,FRTp,FRBp (Rc=0)|ddedpdq. SP,FRTp,FRBp (Rc=1)","63@0|FRTp@6|SP@11|///@13|FRBp@16|322@21|Rc@31|",""
-"DFP Divide X-form","ddiv FRT,FRA,FRB (Rc=0)|ddiv. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|546@21|Rc@31|",""
-"DFP Divide Quad X-form","ddivq FRTp,FRAp,FRBp (Rc=0)|ddivq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|546@21|Rc@31|",""
-"DFP Encode BCD To DPD X-form","denbcd S,FRT,FRB (Rc=0)|denbcd. S,FRT,FRB (Rc=1)","59@0|FRT@6|S@11|///@12|FRB@16|834@21|Rc@31|",""
-"DFP Encode BCD To DPD Quad X-form","denbcdq S,FRTp,FRBp (Rc=0)|denbcdq. S,FRTp,FRBp (Rc=1)","63@0|FRTp@6|S@11|///@12|FRBp@16|834@21|Rc@31|",""
-"DFP Insert Biased Exponent X-form","diex FRT,FRA,FRB (Rc=0)|diex. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|866@21|Rc@31|",""
-"DFP Insert Biased Exponent Quad X-form","diexq FRTp,FRA,FRBp|diexq. FRTp,FRA,FRBp (Rc=1)","63@0|FRTp@6|FRA@11|FRBp@16|866@21|Rc@31|",""
-"DFP Multiply X-form","dmul FRT,FRA,FRB (Rc=0)|dmul. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|34@21|Rc@31|",""
-"DFP Multiply Quad X-form","dmulq FRTp,FRAp,FRBp (Rc=0)|dmulq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|34@21|Rc@31|",""
-"DFP Quantize Z23-form","dqua FRT,FRA,FRB,RMC (Rc=0)|dqua. FRT,FRA,FRB,RMC (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|RMC@21|3@23|Rc@31|",""
-"DFP Quantize Immediate Z23-form","dquai TE,FRT,FRB,RMC (Rc=0)|dquai. TE,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|TE@11|FRB@16|RMC@21|67@23|Rc@31|",""
-"DFP Quantize Immediate Quad Z23-form","dquaiq TE,FRTp,FRBp,RMC (Rc=0)|dquaiq. TE,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|TE@11|FRBp@16|RMC@21|67@23|Rc@31|",""
-"DFP Quantize Quad Z23-form","dquaq FRTp,FRAp,FRBp,RMC (Rc=0)|dquaq. FRTp,FRAp,FRBp,RMC (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|RMC@21|3@23|Rc@31|",""
-"DFP Round To DFP Long X-form","drdpq FRTp,FRBp (Rc=0)|drdpq. FRTp,FRBp (Rc=1)","63@0|FRTp@6|///@11|FRBp@16|770@21|Rc@31|",""
-"DFP Round To FP Integer Without Inexact Z23-form","drintn R,FRT,FRB,RMC (Rc=0)|drintn. R,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|///@11|R@15|FRB@16|RMC@21|227@23|Rc@31|",""
-"DFP Round To FP Integer Without Inexact Quad Z23-form","drintnq R,FRTp,FRBp,RMC (Rc=0)|drintnq. R,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|///@11|R@15|FRBp@16|RMC@21|227@23|Rc@31|",""
-"DFP Round To FP Integer With Inexact Z23-form","drintx R,FRT,FRB,RMC (Rc=0)|drintx. R,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|///@11|R@15|FRB@16|RMC@21|99@23|Rc@31|",""
-"DFP Round To FP Integer With Inexact Quad Z23-form","drintxq R,FRTp,FRBp,RMC (Rc=0)|drintxq. R,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|///@11|R@15|FRBp@16|RMC@21|99@23|Rc@31|",""
-"DFP Reround Z23-form","drrnd FRT,FRA,FRB,RMC (Rc=0)|drrnd. FRT,FRA,FRB,RMC (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|RMC@21|35@23|Rc@31|",""
-"DFP Reround Quad Z23-form","drrndq FRTp,FRA,FRBp,RMC (Rc=0)|drrndq. FRTp,FRA,FRBp,RMC (Rc=1)","63@0|FRTp@6|FRA@11|FRBp@16|RMC@21|35@23|Rc@31|",""
-"DFP Round To DFP Short X-form","drsp FRT,FRB (Rc=0)|drsp. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|770@21|Rc@31|",""
-"DFP Shift Significand Left Immediate Z22-form","dscli FRT,FRA,SH (Rc=0)|dscli. FRT,FRA,SH (Rc=1)","59@0|FRT@6|FRA@11|SH@16|66@22|Rc@31|",""
-"DFP Shift Significand Left Immediate Quad Z22-form","dscliq FRTp,FRAp,SH (Rc=0)|dscliq. FRTp,FRAp,SH (Rc=1)","63@0|FRTp@6|FRAp@11|SH@16|66@22|Rc@31|",""
-"DFP Shift Significand Right Immediate Z22-form","dscri FRT,FRA,SH (Rc=0)|dscri. FRT,FRA,SH (Rc=1)","59@0|FRT@6|FRA@11|SH@16|98@22|Rc@31|",""
-"DFP Shift Significand Right Immediate Quad Z22-form","dscriq FRTp,FRAp,SH (Rc=0)|dscriq. FRTp,FRAp,SH (Rc=1)","63@0|FRTp@6|FRAp@11|SH@16|98@22|Rc@31|",""
-"DFP Subtract X-form","dsub FRT,FRA,FRB (Rc=0)|dsub. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|514@21|Rc@31|",""
-"DFP Subtract Quad X-form","dsubq FRTp,FRAp,FRBp (Rc=0)|dsubq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|514@21|Rc@31|",""
-"DFP Test Data Class Z22-form","dtstdc BF,FRA,DCM","59@0|BF@6|//@9|FRA@11|DCM@16|194@22|/@31|",""
-"DFP Test Data Class Quad Z22-form","dtstdcq BF,FRAp,DCM","63@0|BF@6|//@9|FRAp@11|DCM@16|194@22|/@31|",""
-"DFP Test Data Group Z22-form","dtstdg BF,FRA,DGM","59@0|BF@6|//@9|FRA@11|DGM@16|226@22|/@31|",""
-"DFP Test Data Group Quad Z22-form","dtstdgq BF,FRAp,DGM","63@0|BF@6|//@9|FRAp@11|DGM@16|226@22|/@31|",""
-"DFP Test Exponent X-form","dtstex BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|162@21|/@31|",""
-"DFP Test Exponent Quad X-form","dtstexq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|162@21|/@31|",""
-"DFP Test Significance X-form","dtstsf BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|674@21|/@31|",""
-"DFP Test Significance Quad X-form","dtstsfq BF,FRA,FRBp","63@0|BF@6|//@9|FRA@11|FRBp@16|674@21|/@31|",""
-"DFP Extract Biased Exponent X-form","dxex FRT,FRB (Rc=0)|dxex. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|354@21|Rc@31|",""
-"DFP Extract Biased Exponent Quad X-form","dxexq FRT,FRBp (Rc=0)|dxexq. FRT,FRBp (Rc=1)","63@0|FRT@6|///@11|FRBp@16|354@21|Rc@31|",""
-"Floating Copy Sign X-form","fcpsgn FRT, FRA, FRB (Rc=0)|fcpsgn. FRT, FRA, FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|8@21|Rc@31|",""
-"Load Byte & Zero Caching Inhibited Indexed X-form","lbzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|853@21|/@31|",""
-"Load Doubleword Caching Inhibited Indexed X-form","ldcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|885@21|/@31|",""
-"Load Floating-Point Double Pair DS-form","lfdp FRTp,DS(RA)","57@0|FRTp@6|RA@11|DS@16|0@30|",""
-"Load Floating-Point Double Pair Indexed X-form","lfdpx FRTp,RA,RB","31@0|FRTp@6|RA@11|RB@16|791@21|/@31|",""
-"Load Floating-Point as Integer Word Algebraic Indexed X-form","lfiwax FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|855@21|/@31|",""
-"Load Halfword & Zero Caching Inhibited Indexed X-form","lhzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|821@21|/@31|",""
-"Load Word & Zero Caching Inhibited Indexed X-form","lwzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|789@21|/@31|",""
-"Parity Doubleword X-form","prtyd RA,RS","31@0|RS@6|RA@11|///@16|186@21|/@31|",""
-"Parity Word X-form","prtyw RA,RS","31@0|RS@6|RA@11|///@16|154@21|/@31|",""
-"SLB Find Entry ESID X-form","slbfee. RT,RB","31@0|RT@6|///@11|RB@16|979@21|1@31|",""
-"Store Byte Caching Inhibited Indexed X-form","stbcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|981@21|/@31|",""
-"Store Doubleword Caching Inhibited Indexed X-form","stdcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|1013@21|/@31|",""
-"Store Floating-Point Double Pair DS-form","stfdp FRSp,DS(RA)","61@0|FRSp@6|RA@11|DS@16|0@30|",""
-"Store Floating-Point Double Pair Indexed X-form","stfdpx FRSp,RA,RB","31@0|FRSp@6|RA@11|RB@16|919@21|/@31|",""
-"Store Halfword Caching Inhibited Indexed X-form","sthcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|949@21|/@31|",""
-"Store Word Caching Inhibited Indexed X-form","stwcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|917@21|/@31|",""
-"Integer Select A-form","isel RT,RA,RB,BC","31@0|RT@6|RA@11|RB@16|BC@21|15@26|/@31|",""
-"Load Vector Element Byte Indexed X-form","lvebx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|7@21|/@31|",""
-"Load Vector Element Halfword Indexed X-form","lvehx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|39@21|/@31|",""
-"Load Vector Element Word Indexed X-form","lvewx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|71@21|/@31|",""
-"Load Vector for Shift Left Indexed X-form","lvsl VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|6@21|/@31|",""
-"Load Vector for Shift Right Indexed X-form","lvsr VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|38@21|/@31|",""
-"Load Vector Indexed X-form","lvx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|103@21|/@31|",""
-"Load Vector Indexed Last X-form","lvxl VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|359@21|/@31|",""
-"Move From Vector Status and Control Register VX-form","mfvscr VRT","4@0|VRT@6|///@11|///@16|1540@21|",""
-"Move To Vector Status and Control Register VX-form","mtvscr VRB","4@0|///@6|///@11|VRB@16|1604@21|",""
-"Store Vector Element Byte Indexed X-form","stvebx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|135@21|/@31|",""
-"Store Vector Element Halfword Indexed X-form","stvehx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|167@21|/@31|",""
-"Store Vector Element Word Indexed X-form","stvewx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|199@21|/@31|",""
-"Store Vector Indexed X-form","stvx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|231@21|/@31|",""
-"Store Vector Indexed Last X-form","stvxl VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|487@21|/@31|",""
-"TLB Invalidate Entry Local X-form","tlbiel RB,RS,RIC,PRS,R","31@0|RS@6|/@11|RIC@12|PRS@14|R@15|RB@16|274@21|/@31|",""
-"Vector Add & write Carry Unsigned Word VX-form","vaddcuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|384@21|",""
-"Vector Add Floating-Point VX-form","vaddfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|10@21|",""
-"Vector Add Signed Byte Saturate VX-form","vaddsbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|768@21|",""
-"Vector Add Signed Halfword Saturate VX-form","vaddshs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|832@21|",""
-"Vector Add Signed Word Saturate VX-form","vaddsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|896@21|",""
-"Vector Add Unsigned Byte Modulo VX-form","vaddubm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|0@21|",""
-"Vector Add Unsigned Byte Saturate VX-form","vaddubs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|512@21|",""
-"Vector Add Unsigned Halfword Modulo VX-form","vadduhm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|64@21|",""
-"Vector Add Unsigned Halfword Saturate VX-form","vadduhs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|576@21|",""
-"Vector Add Unsigned Word Modulo VX-form","vadduwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|128@21|",""
-"Vector Add Unsigned Word Saturate VX-form","vadduws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|640@21|",""
-"Vector Logical AND VX-form","vand VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1028@21|",""
-"Vector Logical AND with Complement VX-form","vandc VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1092@21|",""
-"Vector Average Signed Byte VX-form","vavgsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1282@21|",""
-"Vector Average Signed Halfword VX-form","vavgsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1346@21|",""
-"Vector Average Signed Word VX-form","vavgsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1410@21|",""
-"Vector Average Unsigned Byte VX-form","vavgub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1026@21|",""
-"Vector Average Unsigned Halfword VX-form","vavguh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1090@21|",""
-"Vector Average Unsigned Word VX-form","vavguw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1154@21|",""
-"Vector Convert with round to nearest From Signed Word to floating-point format VX-form","vcfsx VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|842@21|",""
-"Vector Convert with round to nearest From Unsigned Word to floating-point format VX-form","vcfux VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|778@21|",""
-"Vector Compare Bounds Floating-Point VC-form","vcmpbfp VRT,VRA,VRB (Rc=0)|vcmpbfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|966@22|",""
-"Vector Compare Equal Floating-Point VC-form","vcmpeqfp VRT,VRA,VRB (Rc=0)|vcmpeqfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|198@22|",""
-"Vector Compare Equal Unsigned Byte VC-form","vcmpequb VRT,VRA,VRB (Rc=0)|vcmpequb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|6@22|",""
-"Vector Compare Equal Unsigned Halfword VC-form","vcmpequh VRT,VRA,VRB (Rc=0)|vcmpequh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|70@22|",""
-"Vector Compare Equal Unsigned Word VC-form","vcmpequw VRT,VRA,VRB (Rc=0)|vcmpequw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|134@22|",""
-"Vector Compare Greater Than or Equal Floating-Point VC-form","vcmpgefp VRT,VRA,VRB (Rc=0)|vcmpgefp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|454@22|",""
-"Vector Compare Greater Than Floating-Point VC-form","vcmpgtfp VRT,VRA,VRB (Rc=0)|vcmpgtfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|710@22|",""
-"Vector Compare Greater Than Signed Byte VC-form","vcmpgtsb VRT,VRA,VRB (Rc=0)|vcmpgtsb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|774@22|",""
-"Vector Compare Greater Than Signed Halfword VC-form","vcmpgtsh VRT,VRA,VRB (Rc=0)|vcmpgtsh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|838@22|",""
-"Vector Compare Greater Than Signed Word VC-form","vcmpgtsw VRT,VRA,VRB (Rc=0)|vcmpgtsw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|902@22|",""
-"Vector Compare Greater Than Unsigned Byte VC-form","vcmpgtub VRT,VRA,VRB (Rc=0)|vcmpgtub. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|518@22|",""
-"Vector Compare Greater Than Unsigned Halfword VC-form","vcmpgtuh VRT,VRA,VRB (Rc=0)|vcmpgtuh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|582@22|",""
-"Vector Compare Greater Than Unsigned Word VC-form","vcmpgtuw VRT,VRA,VRB (Rc=0)|vcmpgtuw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|646@22|",""
-"Vector Convert with round to zero from floating-point To Signed Word format Saturate VX-form","vctsxs VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|970@21|",""
-"Vector Convert with round to zero from floating-point To Unsigned Word format Saturate VX-form","vctuxs VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|906@21|",""
-"Vector 2 Raised to the Exponent Estimate Floating-Point VX-form","vexptefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|394@21|",""
-"Vector Log Base 2 Estimate Floating-Point VX-form","vlogefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|458@21|",""
-"Vector Multiply-Add Floating-Point VA-form","vmaddfp VRT,VRA,VRC,VRB","4@0|VRT@6|VRA@11|VRB@16|VRC@21|46@26|",""
-"Vector Maximum Floating-Point VX-form","vmaxfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1034@21|",""
-"Vector Maximum Signed Byte VX-form","vmaxsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|258@21|",""
-"Vector Maximum Signed Halfword VX-form","vmaxsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|322@21|",""
-"Vector Maximum Signed Word VX-form","vmaxsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|386@21|",""
-"Vector Maximum Unsigned Byte VX-form","vmaxub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|2@21|",""
-"Vector Maximum Unsigned Halfword VX-form","vmaxuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|66@21|",""
-"Vector Maximum Unsigned Word VX-form","vmaxuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|130@21|",""
-"Vector Multiply-High-Add Signed Halfword Saturate VA-form","vmhaddshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|32@26|",""
-"Vector Multiply-High-Round-Add Signed Halfword Saturate VA-form","vmhraddshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|33@26|",""
-"Vector Minimum Floating-Point VX-form","vminfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1098@21|",""
-"Vector Minimum Signed Byte VX-form","vminsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|770@21|",""
-"Vector Minimum Signed Halfword VX-form","vminsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|834@21|",""
-"Vector Minimum Signed Word VX-form","vminsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|898@21|",""
-"Vector Minimum Unsigned Byte VX-form","vminub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|514@21|",""
-"Vector Minimum Unsigned Halfword VX-form","vminuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|578@21|",""
-"Vector Minimum Unsigned Word VX-form","vminuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|642@21|",""
-"Vector Multiply-Low-Add Unsigned Halfword Modulo VA-form","vmladduhm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|34@26|",""
-"Vector Merge High Byte VX-form","vmrghb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|12@21|",""
-"Vector Merge High Halfword VX-form","vmrghh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|76@21|",""
-"Vector Merge High Word VX-form","vmrghw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|140@21|",""
-"Vector Merge Low Byte VX-form","vmrglb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|268@21|",""
-"Vector Merge Low Halfword VX-form","vmrglh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|332@21|",""
-"Vector Merge Low Word VX-form","vmrglw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|396@21|",""
-"Vector Multiply-Sum Mixed Byte Modulo VA-form","vmsummbm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|37@26|",""
-"Vector Multiply-Sum Signed Halfword Modulo VA-form","vmsumshm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|40@26|",""
-"Vector Multiply-Sum Signed Halfword Saturate VA-form","vmsumshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|41@26|",""
-"Vector Multiply-Sum Unsigned Byte Modulo VA-form","vmsumubm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|36@26|",""
-"Vector Multiply-Sum Unsigned Halfword Modulo VA-form","vmsumuhm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|38@26|",""
-"Vector Multiply-Sum Unsigned Halfword Saturate VA-form","vmsumuhs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|39@26|",""
-"Vector Multiply Even Signed Byte VX-form","vmulesb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|776@21|",""
-"Vector Multiply Even Signed Halfword VX-form","vmulesh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|840@21|",""
-"Vector Multiply Even Unsigned Byte VX-form","vmuleub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|520@21|",""
-"Vector Multiply Even Unsigned Halfword VX-form","vmuleuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|584@21|",""
-"Vector Multiply Odd Signed Byte VX-form","vmulosb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|264@21|",""
-"Vector Multiply Odd Signed Halfword VX-form","vmulosh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|328@21|",""
-"Vector Multiply Odd Unsigned Byte VX-form","vmuloub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|8@21|",""
-"Vector Multiply Odd Unsigned Halfword VX-form","vmulouh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|72@21|",""
-"Vector Negative Multiply-Subtract Floating-Point VA-form","vnmsubfp VRT,VRA,VRC,VRB","4@0|VRT@6|VRA@11|VRB@16|VRC@21|47@26|",""
-"Vector Logical NOR VX-form","vnor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1284@21|",""
-"Vector Logical OR VX-form","vor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1156@21|",""
-"Vector Permute VA-form","vperm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|43@26|",""
-"Vector Pack Pixel VX-form","vpkpx VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|782@21|",""
-"Vector Pack Signed Halfword Signed Saturate VX-form","vpkshss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|398@21|",""
-"Vector Pack Signed Halfword Unsigned Saturate VX-form","vpkshus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|270@21|",""
-"Vector Pack Signed Word Signed Saturate VX-form","vpkswss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|462@21|",""
-"Vector Pack Signed Word Unsigned Saturate VX-form","vpkswus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|334@21|",""
-"Vector Pack Unsigned Halfword Unsigned Modulo VX-form","vpkuhum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|14@21|",""
-"Vector Pack Unsigned Halfword Unsigned Saturate VX-form","vpkuhus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|142@21|",""
-"Vector Pack Unsigned Word Unsigned Modulo VX-form","vpkuwum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|78@21|",""
-"Vector Pack Unsigned Word Unsigned Saturate VX-form","vpkuwus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|206@21|",""
-"Vector Reciprocal Estimate Floating-Point VX-form","vrefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|266@21|",""
-"Vector Round to Floating-Point Integer toward -Infinity VX-form","vrfim VRT,VRB","4@0|VRT@6|///@11|VRB@16|714@21|",""
-"Vector Round to Floating-Point Integer Nearest VX-form","vrfin VRT,VRB","4@0|VRT@6|///@11|VRB@16|522@21|",""
-"Vector Round to Floating-Point Integer toward +Infinity VX-form","vrfip VRT,VRB","4@0|VRT@6|///@11|VRB@16|650@21|",""
-"Vector Round to Floating-Point Integer toward Zero VX-form","vrfiz VRT,VRB","4@0|VRT@6|///@11|VRB@16|586@21|",""
-"Vector Rotate Left Byte VX-form","vrlb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|4@21|",""
-"Vector Rotate Left Halfword VX-form","vrlh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|68@21|",""
-"Vector Rotate Left Word VX-form","vrlw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|132@21|",""
-"Vector Reciprocal Square Root Estimate Floating-Point VX-form","vrsqrtefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|330@21|",""
-"Vector Select VA-form","vsel VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|42@26|",""
-"Vector Shift Left VX-form","vsl VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|452@21|",""
-"Vector Shift Left Byte VX-form","vslb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|260@21|",""
-"Vector Shift Left Double by Octet Immediate VA-form","vsldoi VRT,VRA,VRB,SHB","4@0|VRT@6|VRA@11|VRB@16|/@21|SHB@22|44@26|",""
-"Vector Shift Left Halfword VX-form","vslh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|324@21|",""
-"Vector Shift Left by Octet VX-form","vslo VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1036@21|",""
-"Vector Shift Left Word VX-form","vslw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|388@21|",""
-"Vector Splat Byte VX-form","vspltb VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|524@21|",""
-"Vector Splat Halfword VX-form","vsplth VRT,VRB,UIM","4@0|VRT@6|//@11|UIM@13|VRB@16|588@21|",""
-"Vector Splat Immediate Signed Byte VX-form","vspltisb VRT,SIM","4@0|VRT@6|SIM@11|///@16|780@21|",""
-"Vector Splat Immediate Signed Halfword VX-form","vspltish VRT,SIM","4@0|VRT@6|SIM@11|///@16|844@21|",""
-"Vector Splat Immediate Signed Word VX-form","vspltisw VRT,SIM","4@0|VRT@6|SIM@11|///@16|908@21|",""
-"Vector Splat Word VX-form","vspltw VRT,VRB,UIM","4@0|VRT@6|///@11|UIM@14|VRB@16|652@21|",""
-"Vector Shift Right VX-form","vsr VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|708@21|",""
-"Vector Shift Right Algebraic Byte VX-form","vsrab VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|772@21|",""
-"Vector Shift Right Algebraic Halfword VX-form","vsrah VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|836@21|",""
-"Vector Shift Right Algebraic Word VX-form","vsraw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|900@21|",""
-"Vector Shift Right Byte VX-form","vsrb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|516@21|",""
-"Vector Shift Right Halfword VX-form","vsrh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|580@21|",""
-"Vector Shift Right by Octet VX-form","vsro VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1100@21|",""
-"Vector Shift Right Word VX-form","vsrw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|644@21|",""
-"Vector Subtract & Write Carry-out Unsigned Word VX-form","vsubcuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1408@21|",""
-"Vector Subtract Floating-Point VX-form","vsubfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|74@21|",""
-"Vector Subtract Signed Byte Saturate VX-form","vsubsbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1792@21|",""
-"Vector Subtract Signed Halfword Saturate VX-form","vsubshs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1856@21|",""
-"Vector Subtract Signed Word Saturate VX-form","vsubsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1920@21|",""
-"Vector Subtract Unsigned Byte Modulo VX-form","vsububm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1024@21|",""
-"Vector Subtract Unsigned Byte Saturate VX-form","vsububs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1536@21|",""
-"Vector Subtract Unsigned Halfword Modulo VX-form","vsubuhm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1088@21|",""
-"Vector Subtract Unsigned Halfword Saturate VX-form","vsubuhs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1600@21|",""
-"Vector Subtract Unsigned Word Modulo VX-form","vsubuwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1152@21|",""
-"Vector Subtract Unsigned Word Saturate VX-form","vsubuws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1664@21|",""
-"Vector Sum across Half Signed Word Saturate VX-form","vsum2sws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1672@21|",""
-"Vector Sum across Quarter Signed Byte Saturate VX-form","vsum4sbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1800@21|",""
-"Vector Sum across Quarter Signed Halfword Saturate VX-form","vsum4shs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1608@21|",""
-"Vector Sum across Quarter Unsigned Byte Saturate VX-form","vsum4ubs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1544@21|",""
-"Vector Sum across Signed Word Saturate VX-form","vsumsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1928@21|",""
-"Vector Unpack High Pixel VX-form","vupkhpx VRT,VRB","4@0|VRT@6|///@11|VRB@16|846@21|",""
-"Vector Unpack High Signed Byte VX-form","vupkhsb VRT,VRB","4@0|VRT@6|///@11|VRB@16|526@21|",""
-"Vector Unpack High Signed Halfword VX-form","vupkhsh VRT,VRB","4@0|VRT@6|///@11|VRB@16|590@21|",""
-"Vector Unpack Low Pixel VX-form","vupklpx VRT,VRB","4@0|VRT@6|///@11|VRB@16|974@21|",""
-"Vector Unpack Low Signed Byte VX-form","vupklsb VRT,VRB","4@0|VRT@6|///@11|VRB@16|654@21|",""
-"Vector Unpack Low Signed Halfword VX-form","vupklsh VRT,VRB","4@0|VRT@6|///@11|VRB@16|718@21|",""
-"Vector Logical XOR VX-form","vxor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1220@21|",""
-"Floating Reciprocal Estimate A-form","fre FRT,FRB (Rc=0)|fre. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|24@26|Rc@31|",""
-"Floating Round to Integer Minus X-form","frim FRT,FRB (Rc=0)|frim. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|488@21|Rc@31|",""
-"Floating Round to Integer Nearest X-form","frin FRT,FRB (Rc=0)|frin. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|392@21|Rc@31|",""
-"Floating Round to Integer Plus X-form","frip FRT,FRB (Rc=0)|frip. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|456@21|Rc@31|",""
-"Floating Round to Integer Toward Zero X-form","friz FRT,FRB (Rc=0)|friz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|424@21|Rc@31|",""
-"Floating Reciprocal Square Root Estimate Single A-form","frsqrtes FRT,FRB (Rc=0)|frsqrtes. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|26@26|Rc@31|",""
-"Return From Interrupt Doubleword Hypervisor XL-form","hrfid","19@0|///@6|///@11|///@16|274@21|/@31|",""
-"Population Count Bytes X-form","popcntb RA, RS","31@0|RS@6|RA@11|///@16|122@21|/@31|",""
-"Move From One Condition Register Field XFX-form","mfocrf RT,FXM","31@0|RT@6|1@11|FXM@12|/@20|19@21|/@31|",""
-"Move To One Condition Register Field XFX-form","mtocrf FXM,RS","31@0|RS@6|1@11|FXM@12|/@20|144@21|/@31|",""
-"SLB Move From Entry ESID X-form","slbmfee RT,RB","31@0|RT@6|///@11|L@15|RB@16|915@21|/@31|",""
-"SLB Move From Entry VSID X-form","slbmfev RT,RB","31@0|RT@6|///@11|L@15|RB@16|851@21|/@31|",""
-"SLB Move To Entry X-form","slbmte RS,RB","31@0|RS@6|///@11|RB@16|402@21|/@31|",""
-"Return From System Call Vectored XL-form","rfscv","19@0|///@6|///@11|///@16|82@21|/@31|",""
-"System Call Vectored SC-form","scv LEV","17@0|///@6|///@11|///@16|LEV@20|///@27|0@30|1@31|",""
-"Load Quadword DQ-form","lq RTp,DQ(RA)","56@0|RTp@6|RA@11|DQ@16|///@28|",""
-"Store Quadword DS-form","stq RSp,DS(RA)","62@0|RSp@6|RA@11|DS@16|2@30|",""
-"Count Leading Zeros Doubleword X-form","cntlzd RA,RS (Rc=0)|cntlzd. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|58@21|Rc@31|",""
-"Data Cache Block Flush X-form","dcbf RA,RB,L","31@0|//@6|L@8|RA@11|RB@16|86@21|/@31|",""
-"Data Cache Block Store X-form","dcbst RA,RB","31@0|///@6|RA@11|RB@16|54@21|/@31|",""
-"Data Cache Block Touch X-form","dcbt RA,RB,TH","31@0|TH@6|RA@11|RB@16|278@21|/@31|",""
-"Data Cache Block Touch for Store X-form","dcbtst RA,RB,TH","31@0|TH@6|RA@11|RB@16|246@21|/@31|",""
-"Divide Doubleword XO-form","divd RT,RA,RB (OE=0 Rc=0)|divd. RT,RA,RB (OE=0 Rc=1)|divdo RT,RA,RB (OE=1 Rc=0)|divdo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|489@22|Rc@31|",""
-"Divide Doubleword Unsigned XO-form","divdu RT,RA,RB (OE=0 Rc=0)|divdu. RT,RA,RB (OE=0 Rc=1)|divduo RT,RA,RB (OE=1 Rc=0)|divduo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|457@22|Rc@31|",""
-"Divide Word XO-form","divw RT,RA,RB (OE=0 Rc=0)|divw. RT,RA,RB (OE=0 Rc=1)|divwo RT,RA,RB (OE=1 Rc=0)|divwo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|491@22|Rc@31|",""
-"Divide Word Unsigned XO-form","divwu RT,RA,RB (OE=0 Rc=0)|divwu. RT,RA,RB (OE=0 Rc=1)|divwuo RT,RA,RB (OE=1 Rc=0)|divwuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|459@22|Rc@31|",""
-"Enforce In-order Execution of I/O X-form","eieio","31@0|///@6|///@11|///@16|854@21|/@31|",""
-"Extend Sign Byte X-form","extsb RA,RS (Rc=0)|extsb. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|954@21|Rc@31|",""
-"Extend Sign Word X-form","extsw RA,RS (Rc=0)|extsw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|986@21|Rc@31|",""
-"Floating Add Single A-form","fadds FRT,FRA,FRB (Rc=0)|fadds. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|21@26|Rc@31|",""
-"Floating Convert with round Signed Doubleword to Double-Precision format X-form","fcfid FRT,FRB (Rc=0)|fcfid. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|846@21|Rc@31|",""
-"Floating Convert with round Double-Precision To Signed Doubleword format X-form","fctid FRT,FRB (Rc=0)|fctid. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|814@21|Rc@31|",""
-"Floating Convert with truncate Double-Precision To Signed Doubleword format X-form","fctidz FRT,FRB (Rc=0)|fctidz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|815@21|Rc@31|",""
-"Floating Divide Single A-form","fdivs FRT,FRA,FRB (Rc=0)|fdivs. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|18@26|Rc@31|",""
-"Floating Multiply-Add Single A-form","fmadds FRT,FRA,FRC,FRB (Rc=0)|fmadds. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|29@26|Rc@31|",""
-"Floating Multiply-Subtract Single A-form","fmsubs FRT,FRA,FRC,FRB (Rc=0)|fmsubs. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|28@26|Rc@31|",""
-"Floating Multiply Single A-form","fmuls FRT,FRA,FRC (Rc=0)|fmuls. FRT,FRA,FRC (Rc=1)","59@0|FRT@6|FRA@11|///@16|FRC@21|25@26|Rc@31|",""
-"Floating Negative Multiply-Add Single A-form","fnmadds FRT,FRA,FRC,FRB (Rc=0)|fnmadds. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|31@26|Rc@31|",""
-"Floating Negative Multiply-Subtract Single A-form","fnmsubs FRT,FRA,FRC,FRB (Rc=0)|fnmsubs. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|30@26|Rc@31|",""
-"Floating Reciprocal Estimate Single A-form","fres FRT,FRB (Rc=0)|fres. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|24@26|Rc@31|",""
-"Floating Reciprocal Square Root Estimate A-form","frsqrte FRT,FRB (Rc=0)|frsqrte. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|26@26|Rc@31|",""
-"Floating Select A-form","fsel FRT,FRA,FRC,FRB (Rc=0)|fsel. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|23@26|Rc@31|",""
-"Floating Square Root Single A-form","fsqrts FRT,FRB (Rc=0)|fsqrts. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|22@26|Rc@31|",""
-"Floating Subtract Single A-form","fsubs FRT,FRA,FRB (Rc=0)|fsubs. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|20@26|Rc@31|",""
-"Instruction Cache Block Invalidate X-form","icbi RA,RB","31@0|///@6|RA@11|RB@16|982@21|/@31|",""
-"Load Doubleword DS-form","ld RT,DS(RA)","58@0|RT@6|RA@11|DS@16|0@30|",""
-"Load Doubleword And Reserve Indexed X-form","ldarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|84@21|EH@31|",""
-"Load Doubleword with Update DS-form","ldu RT,DS(RA)","58@0|RT@6|RA@11|DS@16|1@30|",""
-"Load Doubleword with Update Indexed X-form","ldux RT,RA,RB","31@0|RT@6|RA@11|RB@16|53@21|/@31|",""
-"Load Doubleword Indexed X-form","ldx RT,RA,RB","31@0|RT@6|RA@11|RB@16|21@21|/@31|",""
-"Load Word Algebraic DS-form","lwa RT,DS(RA)","58@0|RT@6|RA@11|DS@16|2@30|",""
-"Load Word & Reserve Indexed X-form","lwarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|20@21|EH@31|",""
-"Load Word Algebraic with Update Indexed X-form","lwaux RT,RA,RB","31@0|RT@6|RA@11|RB@16|373@21|/@31|",""
-"Load Word Algebraic Indexed X-form","lwax RT,RA,RB","31@0|RT@6|RA@11|RB@16|341@21|/@31|",""
-"Move From Time Base XFX-form","mftb RT,TBR","31@0|RT@6|tbr@11|371@21|/@31|",""
-"Move To MSR Doubleword X-form","mtmsrd RS,L","31@0|RS@6|///@11|L@15|///@16|178@21|/@31|",""
-"Multiply High Doubleword XO-form","mulhd RT,RA,RB (Rc=0)|mulhd. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|73@22|Rc@31|",""
-"Multiply High Doubleword Unsigned XO-form","mulhdu RT,RA,RB (Rc=0)|mulhdu. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|9@22|Rc@31|",""
-"Multiply High Word XO-form","mulhw RT,RA,RB (Rc=0)|mulhw. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|75@22|Rc@31|",""
-"Multiply High Word Unsigned XO-form","mulhwu RT,RA,RB (Rc=0)|mulhwu. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|11@22|Rc@31|",""
-"Multiply Low Doubleword XO-form","mulld RT,RA,RB (OE=0 Rc=0)|mulld. RT,RA,RB (OE=0 Rc=1)|mulldo RT,RA,RB (OE=1 Rc=0)|mulldo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|233@22|Rc@31|",""
-"Return from Interrupt Doubleword XL-form","rfid","19@0|///@6|///@11|///@16|18@21|/@31|",""
-"Rotate Left Doubleword then Clear Left MDS-form","rldcl RA,RS,RB,MB (Rc=0)|rldcl. RA,RS,RB,MB (Rc=1)","30@0|RS@6|RA@11|RB@16|mb@21|8@27|Rc@31|",""
-"Rotate Left Doubleword then Clear Right MDS-form","rldcr RA,RS,RB,ME (Rc=0)|rldcr. RA,RS,RB,ME (Rc=1)","30@0|RS@6|RA@11|RB@16|me@21|9@27|Rc@31|",""
-"Rotate Left Doubleword Immediate then Clear MD-form","rldic RA,RS,SH,MB (Rc=0)|rldic. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|2@27|sh@30|Rc@31|",""
-"Rotate Left Doubleword Immediate then Clear Left MD-form","rldicl RA,RS,SH,MB (Rc=0)|rldicl. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|0@27|sh@30|Rc@31|",""
-"Rotate Left Doubleword Immediate then Clear Right MD-form","rldicr RA,RS,SH,ME (Rc=0)|rldicr. RA,RS,SH,ME (Rc=1)","30@0|RS@6|RA@11|sh@16|me@21|1@27|sh@30|Rc@31|",""
-"Rotate Left Doubleword Immediate then Mask Insert MD-form","rldimi RA,RS,SH,MB (Rc=0)|rldimi. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|3@27|sh@30|Rc@31|",""
-"System Call SC-form","sc LEV","17@0|///@6|///@11|///@16|LEV@20|///@27|1@30|/@31|",""
-"SLB Invalidate All X-form","slbia IH","31@0|//@6|IH@8|///@11|///@16|498@21|/@31|",""
-"SLB Invalidate Entry X-form","slbie RB","31@0|///@6|///@11|RB@16|434@21|/@31|",""
-"Shift Left Doubleword X-form","sld RA,RS,RB (Rc=0)|sld. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|27@21|Rc@31|",""
-"Shift Right Algebraic Doubleword X-form","srad RA,RS,RB (Rc=0)|srad. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|794@21|Rc@31|",""
-"Shift Right Algebraic Doubleword Immediate XS-form","sradi RA,RS,SH (Rc=0)|sradi. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|sh@16|413@21|sh@30|Rc@31|",""
-"Shift Right Doubleword X-form","srd RA,RS,RB (Rc=0)|srd. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|539@21|Rc@31|",""
-"Store Doubleword DS-form","std RS,DS(RA)","62@0|RS@6|RA@11|DS@16|0@30|",""
-"Store Doubleword Conditional Indexed X-form","stdcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|214@21|1@31|",""
-"Store Doubleword with Update DS-form","stdu RS,DS(RA)","62@0|RS@6|RA@11|DS@16|1@30|",""
-"Store Doubleword with Update Indexed X-form","stdux RS,RA,RB","31@0|RS@6|RA@11|RB@16|181@21|/@31|",""
-"Store Doubleword Indexed X-form","stdx RS,RA,RB","31@0|RS@6|RA@11|RB@16|149@21|/@31|",""
-"Store Floating-Point as Integer Word Indexed X-form","stfiwx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|983@21|/@31|",""
-"Store Word Conditional Indexed X-form","stwcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|150@21|1@31|",""
-"Subtract From XO-form","subf RT,RA,RB (OE=0 Rc=0)|subf. RT,RA,RB (OE=0 Rc=1)|subfo RT,RA,RB (OE=1 Rc=0)|subfo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|40@22|Rc@31|",""
-"Trap Doubleword X-form","td TO,RA,RB","31@0|TO@6|RA@11|RB@16|68@21|/@31|",""
-"Trap Doubleword Immediate D-form","tdi TO,RA,SI","2@0|TO@6|RA@11|SI@16|",""
-"TLB Synchronize X-form","tlbsync","31@0|///@6|///@11|///@16|566@21|/@31|",""
-"Floating Convert with round Double-Precision To Signed Word format X-form","fctiw FRT,FRB (Rc=0)|fctiw. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|14@21|Rc@31|",""
-"Floating Convert with truncate Double-Precision To Signed Word fomat X-form","fctiwz FRT,FRB (Rc=0)|fctiwz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|15@21|Rc@31|",""
-"Floating Square Root A-form","fsqrt FRT,FRB (Rc=0)|fsqrt. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|22@26|Rc@31|",""
-"Add XO-form","add RT,RA,RB (OE=0 Rc=0)|add. RT,RA,RB (OE=0 Rc=1)|addo RT,RA,RB (OE=1 Rc=0)|addo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|266@22|Rc@31|",""
-"Add Carrying XO-form","addc RT,RA,RB (OE=0 Rc=0)|addc. RT,RA,RB (OE=0 Rc=1)|addco RT,RA,RB (OE=1 Rc=0)|addco. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|10@22|Rc@31|",""
-"Add Extended XO-form","adde RT,RA,RB (OE=0 Rc=0)|adde. RT,RA,RB (OE=0 Rc=1)|addeo RT,RA,RB (OE=1 Rc=0)|addeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|138@22|Rc@31|",""
-"Add Immediate D-form","addi RT,RA,SI|li RT,SI (RA=0)","14@0|RT@6|RA@11|SI@16|",""
-"Add Immediate Carrying D-formy","addic RT,RA,SI","12@0|RT@6|RA@11|SI@16|",""
-"Add Immediate Carrying and Record D-form","addic. RT,RA,SI","13@0|RT@6|RA@11|SI@16|",""
-"Add Immediate Shifted D-form","addis RT,RA,SI|lis RT,SI (RA=0)","15@0|RT@6|RA@11|SI@16|",""
-"Add to Minus One Extended XO-form","addme RT,RA (OE=0 Rc=0)|addme. RT,RA (OE=0 Rc=1)|addmeo RT,RA (OE=1 Rc=0)|addmeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|234@22|Rc@31|",""
-"Add to Zero Extended XO-form","addze RT,RA (OE=0 Rc=0)|addze. RT,RA (OE=0 Rc=1)|addzeo RT,RA (OE=1 Rc=0)|addzeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|202@22|Rc@31|",""
-"AND X-form","and RA,RS,RB (Rc=0)|and. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|28@21|Rc@31|",""
-"AND with Complement X-form","andc RA,RS,RB (Rc=0)|andc. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|60@21|Rc@31|",""
-"AND Immediate D-form","andi. RA,RS,UI","28@0|RS@6|RA@11|UI@16|",""
-"AND Immediate Shifted D-form","andis. RA,RS,UI","29@0|RS@6|RA@11|UI@16|",""
-"Branch I-form","b target_addr (AA=0 LK=0)|ba target_addr (AA=1 LK=0)|bl target_addr (AA=0 LK=1)|bla target_addr (AA=1 LK=1)","18@0|LI@6|AA@30|LK@31|",""
-"Branch Conditional B-form","bc BO,BI,target_addr (AA=0 LK=0)|bca BO,BI,target_addr (AA=1 LK=0)|bcl BO,BI,target_addr (AA=0 LK=1)|bcla BO,BI,target_addr (AA=1 LK=1)","16@0|BO@6|BI@11|BD@16|AA@30|LK@31|",""
-"Branch Conditional to Count Register XL-form","bcctr BO,BI,BH (LK=0)|bcctrl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|528@21|LK@31|",""
-"Branch Conditional to Link Register XL-form","bclr BO,BI,BH (LK=0)|bclrl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|16@21|LK@31|",""
-"Compare X-form","cmp BF,L,RA,RB|cmpw BF,RA,RB (L=0)|cmpd BF,RA,RB (L=1)","31@0|BF@6|/@9|L@10|RA@11|RB@16|0@21|/@31|",""
-"Compare Immediate D-form","cmpi BF,L,RA,SI|cmpwi BF,RA,SI (L=0)|cmpdi BF,RA,SI (L=1)","11@0|BF@6|/@9|L@10|RA@11|SI@16|",""
-"Compare Logical X-form","cmpl BF,L,RA,RB|cmplw BF,RA,RB (L=0)|cmpld BF,RA,RB (L=1)","31@0|BF@6|/@9|L@10|RA@11|RB@16|32@21|/@31|",""
-"Compare Logical Immediate D-form","cmpli BF,L,RA,UI|cmplwi BF,RA,UI (L=0)|cmpldi BF,RA,UI (L=1)","10@0|BF@6|/@9|L@10|RA@11|UI@16|",""
-"Count Leading Zeros Word X-form","cntlzw RA,RS (Rc=0)|cntlzw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|26@21|Rc@31|",""
-"Condition Register AND XL-form","crand BT,BA,BB","19@0|BT@6|BA@11|BB@16|257@21|/@31|",""
-"Condition Register AND with Complement XL-form","crandc BT,BA,BB","19@0|BT@6|BA@11|BB@16|129@21|/@31|",""
-"Condition Register Equivalent XL-form","creqv BT,BA,BB","19@0|BT@6|BA@11|BB@16|289@21|/@31|",""
-"Condition Register NAND XL-form","crnand BT,BA,BB","19@0|BT@6|BA@11|BB@16|225@21|/@31|",""
-"Condition Register NOR XL-form","crnor BT,BA,BB","19@0|BT@6|BA@11|BB@16|33@21|/@31|",""
-"Condition Register OR XL-form","cror BT,BA,BB","19@0|BT@6|BA@11|BB@16|449@21|/@31|",""
-"Condition Register OR with Complement XL-form","crorc BT,BA,BB","19@0|BT@6|BA@11|BB@16|417@21|/@31|",""
-"Condition Register XOR XL-form","crxor BT,BA,BB","19@0|BT@6|BA@11|BB@16|193@21|/@31|",""
-"Data Cache Block set to Zero X-form","dcbz RA,RB","31@0|///@6|RA@11|RB@16|1014@21|/@31|",""
-"Equivalent X-form","eqv RA,RS,RB (Rc=0)|eqv. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|284@21|Rc@31|",""
-"Extend Sign Halfword X-form","extsh RA,RS (Rc=0)|extsh. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|922@21|Rc@31|",""
-"Floating Absolute Value X-form","fabs FRT,FRB (Rc=0)|fabs. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|264@21|Rc@31|",""
-"Floating Add A-form","fadd FRT,FRA,FRB (Rc=0)|fadd. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|21@26|Rc@31|",""
-"Floating Compare Ordered X-form","fcmpo BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|32@21|/@31|",""
-"Floating Compare Unordered X-form","fcmpu BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|0@21|/@31|",""
-"Floating Divide A-form","fdiv FRT,FRA,FRB (Rc=0)|fdiv. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|18@26|Rc@31|",""
-"Floating Multiply-Add A-form","fmadd FRT,FRA,FRC,FRB (Rc=0)|fmadd. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|29@26|Rc@31|",""
-"Floating Move Register X-form","fmr FRT,FRB (Rc=0)|fmr. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|72@21|Rc@31|",""
-"Floating Multiply-Subtract A-form","fmsub FRT,FRA,FRC,FRB (Rc=0)|fmsub. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|28@26|Rc@31|",""
-"Floating Multiply A-form","fmul FRT,FRA,FRC (Rc=0)|fmul. FRT,FRA,FRC (Rc=1)","63@0|FRT@6|FRA@11|///@16|FRC@21|25@26|Rc@31|",""
-"Floating Negative Absolute Value X-form","fnabs FRT,FRB (Rc=0)|fnabs. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|136@21|Rc@31|",""
-"Floating Negate X-form","fneg FRT,FRB (Rc=0)|fneg. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|40@21|Rc@31|",""
-"Floating Negative Multiply-Add A-form","fnmadd FRT,FRA,FRC,FRB (Rc=0)|fnmadd. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|31@26|Rc@31|",""
-"Floating Negative Multiply-Subtract A-form","fnmsub FRT,FRA,FRC,FRB (Rc=0)|fnmsub. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|30@26|Rc@31|",""
-"Floating Round to Single-Precision X-form","frsp FRT,FRB (Rc=0)|frsp. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|12@21|Rc@31|",""
-"Floating Subtract A-form","fsub FRT,FRA,FRB (Rc=0)|fsub. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|20@26|Rc@31|",""
-"Instruction Synchronize XL-form","isync","19@0|///@6|///@11|///@16|150@21|/@31|",""
-"Load Byte and Zero D-form","lbz RT,D(RA)","34@0|RT@6|RA@11|D@16|",""
-"Load Byte and Zero with Update D-form","lbzu RT,D(RA)","35@0|RT@6|RA@11|D@16|",""
-"Load Byte and Zero with Update Indexed X-form","lbzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|119@21|/@31|",""
-"Load Byte and Zero Indexed X-form","lbzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|87@21|/@31|",""
-"Load Floating-Point Double D-form","lfd FRT,D(RA)","50@0|FRT@6|RA@11|D@16|",""
-"Load Floating-Point Double with Update D-form","lfdu FRT,D(RA)","51@0|FRT@6|RA@11|D@16|",""
-"Load Floating-Point Double with Update Indexed X-form","lfdux FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|631@21|/@31|",""
-"Load Floating-Point Double Indexed X-form","lfdx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|599@21|/@31|",""
-"Load Floating-Point Single D-form","lfs FRT,D(RA)","48@0|FRT@6|RA@11|D@16|",""
-"Load Floating-Point Single with Update D-form","lfsu FRT,D(RA)","49@0|FRT@6|RA@11|D@16|",""
-"Load Floating-Point Single with Update Indexed X-form","lfsux FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|567@21|/@31|",""
-"Load Floating-Point Single Indexed X-form","lfsx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|535@21|/@31|",""
-"Load Halfword Algebraic D-form","lha RT,D(RA)","42@0|RT@6|RA@11|D@16|",""
-"Load Halfword Algebraic with Update D-form","lhau RT,D(RA)","43@0|RT@6|RA@11|D@16|",""
-"Load Halfword Algebraic with Update Indexed X-form","lhaux RT,RA,RB","31@0|RT@6|RA@11|RB@16|375@21|/@31|",""
-"Load Halfword Algebraic Indexed X-form","lhax RT,RA,RB","31@0|RT@6|RA@11|RB@16|343@21|/@31|",""
-"Load Halfword Byte-Reverse Indexed X-form","lhbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|790@21|/@31|",""
-"Load Halfword and Zero D-form","lhz RT,D(RA)","40@0|RT@6|RA@11|D@16|",""
-"Load Halfword and Zero with Update D-form","lhzu RT,D(RA)","41@0|RT@6|RA@11|D@16|",""
-"Load Halfword and Zero with Update Indexed X-form","lhzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|311@21|/@31|",""
-"Load Halfword and Zero Indexed X-form","lhzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|279@21|/@31|",""
-"Load Multiple Word D-form","lmw RT,D(RA)","46@0|RT@6|RA@11|D@16|",""
-"Load String Word Immediate X-form","lswi RT,RA,NB","31@0|RT@6|RA@11|NB@16|597@21|/@31|",""
-"Load String Word Indexed X-form","lswx RT,RA,RB","31@0|RT@6|RA@11|RB@16|533@21|/@31|",""
-"Load Word Byte-Reverse Indexed X-form","lwbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|534@21|/@31|",""
-"Load Word and Zero D-form","lwz RT,D(RA)","32@0|RT@6|RA@11|D@16|",""
-"Load Word and Zero with Update D-form","lwzu RT,D(RA)","33@0|RT@6|RA@11|D@16|",""
-"Load Word and Zero with Update Indexed X-form","lwzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|55@21|/@31|",""
-"Load Word and Zero Indexed X-form","lwzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|23@21|/@31|",""
-"Move Condition Register Field XL-form","mcrf BF,BFA","19@0|BF@6|//@9|BFA@11|//@14|///@16|0@21|/@31|",""
-"Move to Condition Register from FPSCR X-form","mcrfs BF,BFA","63@0|BF@6|//@9|BFA@11|//@14|///@16|64@21|/@31|",""
-"Move From Condition Register XFX-form","mfcr RT","31@0|RT@6|0@11|///@12|/@20|19@21|/@31|",""
-"Move From FPSCR X-form","mffs FRT (Rc=0)|mffs. FRT (Rc=1)","63@0|FRT@6|0@11|///@16|583@21|Rc@31|",""
-"Move From MSR X-form","mfmsr RT","31@0|RT@6|///@11|///@16|83@21|/@31|",""
-"Move From Special Purpose Register XFX-form","mfspr RT,SPR","31@0|RT@6|spr@11|339@21|/@31|",""
-"Move To Condition Register Fields XFX-form","mtcrf FXM,RS","31@0|RS@6|0@11|FXM@12|/@20|144@21|/@31|",""
-"Move To FPSCR Bit 0 X-form","mtfsb0 BT (Rc=0)|mtfsb0. BT (Rc=1)","63@0|BT@6|///@11|///@16|70@21|Rc@31|",""
-"Move To FPSCR Bit 1 X-form","mtfsb1 BT (Rc=0)|mtfsb1. BT (Rc=1)","63@0|BT@6|///@11|///@16|38@21|Rc@31|",""
-"Move To FPSCR Fields XFL-form","mtfsf FLM,FRB,L,W (Rc=0)|mtfsf. FLM,FRB,L,W (Rc=1)","63@0|L@6|FLM@7|W@15|FRB@16|711@21|Rc@31|",""
-"Move To FPSCR Field Immediate X-form","mtfsfi BF,U,W (Rc=0)|mtfsfi. BF,U,W (Rc=1)","63@0|BF@6|//@9|///@11|W@15|U@16|/@20|134@21|Rc@31|",""
-"Move To MSR X-form","mtmsr RS,L","31@0|RS@6|///@11|L@15|///@16|146@21|/@31|",""
-"Move To Special Purpose Register XFX-form","mtspr SPR,RS","31@0|RS@6|spr@11|467@21|/@31|",""
-"Multiply Low Immediate D-form","mulli RT,RA,SI","7@0|RT@6|RA@11|SI@16|",""
-"Multiply Low Word XO-form","mullw RT,RA,RB (OE=0 Rc=0)|mullw. RT,RA,RB (OE=0 Rc=1)|mullwo RT,RA,RB (OE=1 Rc=0)|mullwo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|235@22|Rc@31|",""
-"NAND X-form","nand RA,RS,RB (Rc=0)|nand. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|476@21|Rc@31|",""
-"Negate XO-form","neg RT,RA (OE=0 Rc=0)|neg. RT,RA (OE=0 Rc=1)|nego RT,RA (OE=1 Rc=0)|nego. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|104@22|Rc@31|",""
-"NOR X-form","nor RA,RS,RB (Rc=0)|nor. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|124@21|Rc@31|",""
-"OR X-form","or RA,RS,RB (Rc=0)|or. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|444@21|Rc@31|",""
-"OR with Complement X-form","orc RA,RS,RB (Rc=0)|orc. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|412@21|Rc@31|",""
-"OR Immediate D-form","ori RA,RS,UI|nop (RA=0 RS=0 UI=0)","24@0|RS@6|RA@11|UI@16|",""
-"OR Immediate Shifted D-form","oris RA,RS,UI","25@0|RS@6|RA@11|UI@16|",""
-"Rotate Left Word Immediate then Mask Insert M-form","rlwimi RA,RS,SH,MB,ME (Rc=0)|rlwimi. RA,RS,SH,MB,ME (Rc=1)","20@0|RS@6|RA@11|SH@16|MB@21|ME@26|Rc@31|",""
-"Rotate Left Word Immediate then AND with Mask M-form","rlwinm RA,RS,SH,MB,ME (Rc=0)|rlwinm. RA,RS,SH,MB,ME (Rc=1)","21@0|RS@6|RA@11|SH@16|MB@21|ME@26|Rc@31|",""
-"Rotate Left Word then AND with Mask M-form","rlwnm RA,RS,RB,MB,ME (Rc=0)|rlwnm. RA,RS,RB,MB,ME (Rc=1)","23@0|RS@6|RA@11|RB@16|MB@21|ME@26|Rc@31|",""
-"Shift Left Word X-form","slw RA,RS,RB (Rc=0)|slw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|24@21|Rc@31|",""
-"Shift Right Algebraic Word X-form","sraw RA,RS,RB (Rc=0)|sraw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|792@21|Rc@31|",""
-"Shift Right Algebraic Word Immediate X-form","srawi RA,RS,SH (Rc=0)|srawi. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|SH@16|824@21|Rc@31|",""
-"Shift Right Word X-form","srw RA,RS,RB (Rc=0)|srw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|536@21|Rc@31|",""
-"Store Byte D-form","stb RS,D(RA)","38@0|RS@6|RA@11|D@16|",""
-"Store Byte with Update D-form","stbu RS,D(RA)","39@0|RS@6|RA@11|D@16|",""
-"Store Byte with Update Indexed X-form","stbux RS,RA,RB","31@0|RS@6|RA@11|RB@16|247@21|/@31|",""
-"Store Byte Indexed X-form","stbx RS,RA,RB","31@0|RS@6|RA@11|RB@16|215@21|/@31|",""
-"Store Floating-Point Double D-form","stfd FRS,D(RA)","54@0|FRS@6|RA@11|D@16|",""
-"Store Floating-Point Double with Update D-form","stfdu FRS,D(RA)","55@0|FRS@6|RA@11|D@16|",""
-"Store Floating-Point Double with Update Indexed X-form","stfdux FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|759@21|/@31|",""
-"Store Floating-Point Double Indexed X-form","stfdx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|727@21|/@31|",""
-"Store Floating-Point Single D-form","stfs FRS,D(RA)","52@0|FRS@6|RA@11|D@16|",""
-"Store Floating-Point Single with Update D-form","stfsu FRS,D(RA)","53@0|FRS@6|RA@11|D@16|",""
-"Store Floating-Point Single with Update Indexed X-form","stfsux FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|695@21|/@31|",""
-"Store Floating-Point Single Indexed X-form","stfsx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|663@21|/@31|",""
-"Store Halfword D-form","sth RS,D(RA)","44@0|RS@6|RA@11|D@16|",""
-"Store Halfword Byte-Reverse Indexed X-form","sthbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|918@21|/@31|",""
-"Store Halfword with Update D-form","sthu RS,D(RA)","45@0|RS@6|RA@11|D@16|",""
-"Store Halfword with Update Indexed X-form","sthux RS,RA,RB","31@0|RS@6|RA@11|RB@16|439@21|/@31|",""
-"Store Halfword Indexed X-form","sthx RS,RA,RB","31@0|RS@6|RA@11|RB@16|407@21|/@31|",""
-"Store Multiple Word D-form","stmw RS,D(RA)","47@0|RS@6|RA@11|D@16|",""
-"Store String Word Immediate X-form","stswi RS,RA,NB","31@0|RS@6|RA@11|NB@16|725@21|/@31|",""
-"Store String Word Indexed X-form","stswx RS,RA,RB","31@0|RS@6|RA@11|RB@16|661@21|/@31|",""
-"Store Word D-form","stw RS,D(RA)","36@0|RS@6|RA@11|D@16|",""
-"Store Word Byte-Reverse Indexed X-form","stwbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|662@21|/@31|",""
-"Store Word with Update D-form","stwu RS,D(RA)","37@0|RS@6|RA@11|D@16|",""
-"Store Word with Update Indexed X-form","stwux RS,RA,RB","31@0|RS@6|RA@11|RB@16|183@21|/@31|",""
-"Store Word Indexed X-form","stwx RS,RA,RB","31@0|RS@6|RA@11|RB@16|151@21|/@31|",""
-"Subtract From Carrying XO-form","subfc RT,RA,RB (OE=0 Rc=0)|subfc. RT,RA,RB (OE=0 Rc=1)|subfco RT,RA,RB (OE=1 Rc=0)|subfco. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|8@22|Rc@31|",""
-"Subtract From Extended XO-form","subfe RT,RA,RB (OE=0 Rc=0)|subfe. RT,RA,RB (OE=0 Rc=1)|subfeo RT,RA,RB (OE=1 Rc=0)|subfeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|136@22|Rc@31|",""
-"Subtract From Immediate Carrying D-form","subfic RT,RA,SI","8@0|RT@6|RA@11|SI@16|",""
-"Subtract From Minus One Extended XO-form","subfme RT,RA (OE=0 Rc=0)|subfme. RT,RA (OE=0 Rc=1)|subfmeo RT,RA (OE=1 Rc=0)|subfmeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|232@22|Rc@31|",""
-"Subtract From Zero Extended XO-form","subfze RT,RA (OE=0 Rc=0)|subfze. RT,RA (OE=0 Rc=1)|subfzeo RT,RA (OE=1 Rc=0)|subfzeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|200@22|Rc@31|",""
-"Synchronize X-form","sync L,SC","31@0|//@6|L@8|///@11|SC@14|///@16|598@21|/@31|",""
-"TLB Invalidate Entry X-form","tlbie RB,RS,RIC,PRS,R","31@0|RS@6|/@11|RIC@12|PRS@14|R@15|RB@16|306@21|/@31|",""
-"Trap Word X-form","tw TO,RA,RB","31@0|TO@6|RA@11|RB@16|4@21|/@31|",""
-"Trap Word Immediate D-form","twi TO,RA,SI","3@0|TO@6|RA@11|SI@16|",""
-"XOR X-form","xor RA,RS,RB (Rc=0)|xor. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|316@21|Rc@31|",""
-"XOR Immediate D-form","xori RA,RS,UI","26@0|RS@6|RA@11|UI@16|",""
-"XOR Immediate Shifted D-form","xoris RA,RS,UI","27@0|RS@6|RA@11|UI@16|",""
+"Byte-Reverse Doubleword X-form","brd RA,RS","31@0|RS@6|RA@11|///@16|187@21|/@31|","v3.1"
+"Byte-Reverse Halfword X-form","brh RA,RS","31@0|RS@6|RA@11|///@16|219@21|/@31|","v3.1"
+"Byte-Reverse Word X-form","brw RA,RS","31@0|RS@6|RA@11|///@16|155@21|/@31|","v3.1"
+"Centrifuge Doubleword X-form","cfuged RA,RS,RB","31@0|RS@6|RA@11|RB@16|220@21|/@31|","v3.1"
+"Count Leading Zeros Doubleword under bit Mask X-form","cntlzdm RA,RS,RB","31@0|RS@6|RA@11|RB@16|59@21|/@31|","v3.1"
+"Count Trailing Zeros Doubleword under bit Mask X-form","cnttzdm RA,RS,RB","31@0|RS@6|RA@11|RB@16|571@21|/@31|","v3.1"
+"DFP Convert From Fixed Quadword Quad X-form","dcffixqq FRTp,VRB","63@0|FRTp@6|0@11|VRB@16|994@21|/@31|","v3.1"
+"DFP Convert To Fixed Quadword Quad X-form","dctfixqq VRT,FRBp","63@0|VRT@6|1@11|FRBp@16|994@21|/@31|","v3.1"
+"Load VSX Vector Special Value Quadword X-form","lxvkq XT,UIM","60@0|T@6|31@11|UIM@16|360@21|TX@31|","v3.1"
+"Load VSX Vector Paired DQ-form","lxvp XTp,DQ(RA)","6@0|Tp@6|TX@10|RA@11|DQ@16|0@28|","v3.1"
+"Load VSX Vector Paired Indexed X-form","lxvpx XTp,RA,RB","31@0|Tp@6|TX@10|RA@11|RB@16|333@21|/@31|","v3.1"
+"Load VSX Vector Rightmost Byte Indexed X-form","lxvrbx XT,RA,RB","31@0|T@6|RA@11|RB@16|13@21|TX@31|","v3.1"
+"Load VSX Vector Rightmost Doubleword Indexed X-form","lxvrdx XT,RA,RB","31@0|T@6|RA@11|RB@16|109@21|TX@31|","v3.1"
+"Load VSX Vector Rightmost Halfword Indexed X-form","lxvrhx XT,RA,RB","31@0|T@6|RA@11|RB@16|45@21|TX@31|","v3.1"
+"Load VSX Vector Rightmost Word Indexed X-form","lxvrwx XT,RA,RB","31@0|T@6|RA@11|RB@16|77@21|TX@31|","v3.1"
+"Move to VSR Byte Mask VX-form","mtvsrbm VRT,RB","4@0|VRT@6|16@11|RB@16|1602@21|","v3.1"
+"Move To VSR Byte Mask Immediate DX-form","mtvsrbmi VRT,bm","4@0|VRT@6|b1@11|b0@16|10@26|b2@31|","v3.1"
+"Move to VSR Doubleword Mask VX-form","mtvsrdm VRT,RB","4@0|VRT@6|19@11|RB@16|1602@21|","v3.1"
+"Move to VSR Halfword Mask VX-form","mtvsrhm VRT,RB","4@0|VRT@6|17@11|RB@16|1602@21|","v3.1"
+"Move to VSR Quadword Mask VX-form","mtvsrqm VRT,RB","4@0|VRT@6|20@11|RB@16|1602@21|","v3.1"
+"Move to VSR Word Mask VX-form","mtvsrwm VRT,RB","4@0|VRT@6|18@11|RB@16|1602@21|","v3.1"
+"Prefixed Add Immediate MLS:D-form","paddi RT,RA,SI,R",",1@0|2@6|0@8|//@9|R@11|//@12|si0@14|,14@0|RT@6|RA@11|si1@16|","v3.1"
+"Parallel Bits Deposit Doubleword X-form","pdepd RA,RS,RB","31@0|RS@6|RA@11|RB@16|156@21|/@31|","v3.1"
+"Parallel Bits Extract Doubleword X-form","pextd RA,RS,RB","31@0|RS@6|RA@11|RB@16|188@21|/@31|","v3.1"
+"Prefixed Load Byte and Zero MLS:D-form","plbz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,34@0|RT@6|RA@11|d1@16|","v3.1"
+"Prefixed Load Doubleword 8LS:D-form","pld RT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,57@0|RT@6|RA@11|d1@16|","v3.1"
+"Prefixed Load Floating-Point Double MLS:D-form","plfd FRT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,50@0|FRT@6|RA@11|d1@16|","v3.1"
+"Prefixed Load Floating-Point Single MLS:D-form","plfs FRT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,48@0|FRT@6|RA@11|d1@16|","v3.1"
+"Prefixed Load Halfword Algebraic MLS:D-form","plha RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,42@0|RT@6|RA@11|d1@16|","v3.1"
+"Prefixed Load Halfword and Zero MLS:D-form","plhz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,40@0|RT@6|RA@11|d1@16|","v3.1"
+"Prefixed Load Quadword 8LS:D-form","plq RTp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,56@0|RTp@6|RA@11|d1@16|","v3.1"
+"Prefixed Load Word Algebraic 8LS:D-form","plwa RT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,41@0|RT@6|RA@11|d1@16|","v3.1"
+"Prefixed Load Word and Zero MLS:D-form","plwz RT,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,32@0|RT@6|RA@11|d1@16|","v3.1"
+"Prefixed Load VSX Scalar Doubleword 8LS:D-form","plxsd VRT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,42@0|VRT@6|RA@11|d1@16|","v3.1"
+"Prefixed Load VSX Scalar Single-Precision 8LS:D-form","plxssp VRT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,43@0|VRT@6|RA@11|d1@16|","v3.1"
+"Prefixed Load VSX Vector 8LS:D-form","plxv XT,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,25@0|TX@5|T@6|RA@11|d1@16|","v3.1"
+"Prefixed Load VSX Vector Paired 8LS:D-form","plxvp XTp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,58@0|Tp@6|TX@10|RA@11|d1@16|","v3.1"
+"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) MMIRR:XX3-form","pmxvbf16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|51@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvbf16ger2nn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|242@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvbf16ger2np AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|114@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvbf16ger2pn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|178@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvbf16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|50@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) MMIRR:XX3-form","pmxvf16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|19@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf16ger2nn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|210@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf16ger2np AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|82@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf16ger2pn AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|146@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|18@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) MMIRR:XX3-form","pmxvf32ger AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|27@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf32gernn AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|218@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf32gernp AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|90@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf32gerpn AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|154@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf32gerpp AT,XA,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|26@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) MMIRR:XX3-form","pmxvf64ger AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|59@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate MMIRR:XX3-form","pmxvf64gernn AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|250@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate MMIRR:XX3-form","pmxvf64gernp AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|122@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate MMIRR:XX3-form","pmxvf64gerpn AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|186@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvf64gerpp AT,XAp,XB,XMSK,YMSK",",1@0|3@6|9@8|//@12|/@14|/@15|///@16|XMSK@24|YMSK@28|//@30|,59@0|AT@6|//@9|Ap@11|B@16|58@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) MMIRR:XX3-form","pmxvi16ger2 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|75@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi16ger2pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|107@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation MMIRR:XX3-form","pmxvi16ger2s AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi16ger2spp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@18|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|42@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) MMIRR:XX3-form","pmxvi4ger8 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi4ger8pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|34@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) MMIRR:XX3-form","pmxvi8ger4 AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|3@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi8ger4pp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|2@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate MMIRR:XX3-form","pmxvi8ger4spp AT,XA,XB,XMSK,YMSK,PMSK",",1@0|3@6|9@8|//@12|/@14|/@15|PMSK@16|///@20|XMSK@24|YMSK@28|,59@0|AT@6|//@9|A@11|B@16|99@21|AX@29|BX@30|/@31|","v3.1"
+"Prefixed Nop MRR:*-form","pnop",",1@0|3@6|0@8|///@12|0@14|//@31|,///@0|","v3.1"
+"Prefixed Store Byte MLS:D-form","pstb RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,38@0|RS@6|RA@11|d1@16|","v3.1"
+"Prefixed Store Doubleword 8LS:D-form","pstd RS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,61@0|RS@6|RA@11|d1@16|","v3.1"
+"Prefixed Store Floating-Point Double MLS:D-form","pstfd FRS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,54@0|FRS@6|RA@11|d1@16|","v3.1"
+"Prefixed Store Floating-Point Single MLS:D-form","pstfs FRS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,52@0|FRS@6|RA@11|d1@16|","v3.1"
+"Prefixed Store Halfword MLS:D-form","psth RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,44@0|RS@6|RA@11|d1@16|","v3.1"
+"Prefixed Store Quadword 8LS:D-form","pstq RSp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,60@0|RSp@6|RA@11|d1@16|","v3.1"
+"Prefixed Store Word MLS:D-form","pstw RS,D(RA),R",",1@0|2@6|0@8|//@9|R@11|//@12|d0@14|,36@0|RS@6|RA@11|d1@16|","v3.1"
+"Prefixed Store VSX Scalar Doubleword 8LS:D-form","pstxsd VRS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,46@0|VRS@6|RA@11|d1@16|","v3.1"
+"Prefixed Store VSX Scalar Single-Precision 8LS:D-form","pstxssp VRS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,47@0|VRS@6|RA@11|d1@16|","v3.1"
+"Prefixed Store VSX Vector 8LS:D-form","pstxv XS,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,27@0|SX@5|S@6|RA@11|d1@16|","v3.1"
+"Prefixed Store VSX Vector Paired 8LS:D-form","pstxvp XSp,D(RA),R",",1@0|0@6|0@8|//@9|R@11|//@12|d0@14|,62@0|Sp@6|SX@10|RA@11|d1@16|","v3.1"
+"Set Boolean Condition X-form","setbc RT,BI","31@0|RT@6|BI@11|///@16|384@21|/@31|","v3.1"
+"Set Boolean Condition Reverse X-form","setbcr RT,BI","31@0|RT@6|BI@11|///@16|416@21|/@31|","v3.1"
+"Set Negative Boolean Condition X-form","setnbc RT,BI","31@0|RT@6|BI@11|///@16|448@21|/@31|","v3.1"
+"Set Negative Boolean Condition Reverse X-form","setnbcr RT,BI","31@0|RT@6|BI@11|///@16|480@21|/@31|","v3.1"
+"Store VSX Vector Paired DQ-form","stxvp XSp,DQ(RA)","6@0|Sp@6|SX@10|RA@11|DQ@16|1@28|","v3.1"
+"Store VSX Vector Paired Indexed X-form","stxvpx XSp,RA,RB","31@0|Sp@6|SX@10|RA@11|RB@16|461@21|/@31|","v3.1"
+"Store VSX Vector Rightmost Byte Indexed X-form","stxvrbx XS,RA,RB","31@0|S@6|RA@11|RB@16|141@21|SX@31|","v3.1"
+"Store VSX Vector Rightmost Doubleword Indexed X-form","stxvrdx XS,RA,RB","31@0|S@6|RA@11|RB@16|237@21|SX@31|","v3.1"
+"Store VSX Vector Rightmost Halfword Indexed X-form","stxvrhx XS,RA,RB","31@0|S@6|RA@11|RB@16|173@21|SX@31|","v3.1"
+"Store VSX Vector Rightmost Word Indexed X-form","stxvrwx XS,RA,RB","31@0|S@6|RA@11|RB@16|205@21|SX@31|","v3.1"
+"Vector Centrifuge Doubleword VX-form","vcfuged VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1357@21|","v3.1"
+"Vector Clear Leftmost Bytes VX-form","vclrlb VRT,VRA,RB","4@0|VRT@6|VRA@11|RB@16|397@21|","v3.1"
+"Vector Clear Rightmost Bytes VX-form","vclrrb VRT,VRA,RB","4@0|VRT@6|VRA@11|RB@16|461@21|","v3.1"
+"Vector Count Leading Zeros Doubleword under bit Mask VX-form","vclzdm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1924@21|","v3.1"
+"Vector Compare Equal Quadword VC-form","vcmpequq VRT,VRA,VRB (Rc=0)|vcmpequq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|455@22|","v3.1"
+"Vector Compare Greater Than Signed Quadword VC-form","vcmpgtsq VRT,VRA,VRB (Rc=0)|vcmpgtsq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|903@22|","v3.1"
+"Vector Compare Greater Than Unsigned Quadword VC-form","vcmpgtuq VRT,VRA,VRB (Rc=0)|vcmpgtuq. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|647@22|","v3.1"
+"Vector Compare Signed Quadword VX-form","vcmpsq BF,VRA,VRB","4@0|BF@6|//@9|VRA@11|VRB@16|321@21|","v3.1"
+"Vector Compare Unsigned Quadword VX-form","vcmpuq BF,VRA,VRB","4@0|BF@6|//@9|VRA@11|VRB@16|257@21|","v3.1"
+"Vector Count Mask Bits Byte VX-form","vcntmbb RT,VRB,MP","4@0|RT@6|12@11|MP@15|VRB@16|1602@21|","v3.1"
+"Vector Count Mask Bits Doubleword VX-form","vcntmbd RT,VRB,MP","4@0|RT@6|15@11|MP@15|VRB@16|1602@21|","v3.1"
+"Vector Count Mask Bits Halfword VX-form","vcntmbh RT,VRB,MP","4@0|RT@6|13@11|MP@15|VRB@16|1602@21|","v3.1"
+"Vector Count Mask Bits Word VX-form","vcntmbw RT,VRB,MP","4@0|RT@6|14@11|MP@15|VRB@16|1602@21|","v3.1"
+"Vector Count Trailing Zeros Doubleword under bit Mask VX-form","vctzdm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1988@21|","v3.1"
+"Vector Divide Extended Signed Doubleword VX-form","vdivesd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|971@21|","v3.1"
+"Vector Divide Extended Signed Quadword VX-form","vdivesq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|779@21|","v3.1"
+"Vector Divide Extended Signed Word VX-form","vdivesw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|907@21|","v3.1"
+"Vector Divide Extended Unsigned Doubleword VX-form","vdiveud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|715@21|","v3.1"
+"Vector Divide Extended Unsigned Quadword VX-form","vdiveuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|523@21|","v3.1"
+"Vector Divide Extended Unsigned Word VX-form","vdiveuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|651@21|","v3.1"
+"Vector Divide Signed Doubleword VX-form","vdivsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|459@21|","v3.1"
+"Vector Divide Signed Quadword VX-form","vdivsq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|267@21|","v3.1"
+"Vector Divide Signed Word VX-form","vdivsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|395@21|","v3.1"
+"Vector Divide Unsigned Doubleword VX-form","vdivud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|203@21|","v3.1"
+"Vector Divide Unsigned Quadword VX-form","vdivuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|11@21|","v3.1"
+"Vector Divide Unsigned Word VX-form","vdivuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|139@21|","v3.1"
+"Vector Expand Byte Mask VX-form","vexpandbm VRT,VRB","4@0|VRT@6|0@11|VRB@16|1602@21|","v3.1"
+"Vector Expand Doubleword Mask VX-form","vexpanddm VRT,VRB","4@0|VRT@6|3@11|VRB@16|1602@21|","v3.1"
+"Vector Expand Halfword Mask VX-form","vexpandhm VRT,VRB","4@0|VRT@6|1@11|VRB@16|1602@21|","v3.1"
+"Vector Expand Quadword Mask VX-form","vexpandqm VRT,VRB","4@0|VRT@6|4@11|VRB@16|1602@21|","v3.1"
+"Vector Expand Word Mask VX-form","vexpandwm VRT,VRB","4@0|VRT@6|2@11|VRB@16|1602@21|","v3.1"
+"Vector Extract Double Doubleword to VSR using GPR-specified Left-Index VA-form","vextddvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|30@26|","v3.1"
+"Vector Extract Double Doubleword to VSR using GPR-specified Right-Index VA-form","vextddvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|31@26|","v3.1"
+"Vector Extract Double Unsigned Byte to VSR using GPR-specified Left-Index VA-form","vextdubvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|24@26|","v3.1"
+"Vector Extract Double Unsigned Byte to VSR using GPR-specified Right-Index VA-form","vextdubvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|25@26|","v3.1"
+"Vector Extract Double Unsigned Halfword to VSR using GPR-specified Left-Index VA-form","vextduhvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|26@26|","v3.1"
+"Vector Extract Double Unsigned Halfword to VSR using GPR-specified Right-Index VA-form","vextduhvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|27@26|","v3.1"
+"Vector Extract Double Unsigned Word to VSR using GPR-specified Left-Index VA-form","vextduwvlx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|28@26|","v3.1"
+"Vector Extract Double Unsigned Word to VSR using GPR-specified Right-Index VA-form","vextduwvrx VRT,VRA,VRB,RC","4@0|VRT@6|VRA@11|VRB@16|RC@21|29@26|","v3.1"
+"Vector Extract Byte Mask VX-form","vextractbm RT,VRB","4@0|RT@6|8@11|VRB@16|1602@21|","v3.1"
+"Vector Extract Doubleword Mask VX-form","vextractdm RT,VRB","4@0|RT@6|11@11|VRB@16|1602@21|","v3.1"
+"Vector Extract Halfword Mask VX-form","vextracthm RT,VRB","4@0|RT@6|9@11|VRB@16|1602@21|","v3.1"
+"Vector Extract Quadword Mask VX-form","vextractqm RT,VRB","4@0|RT@6|12@11|VRB@16|1602@21|","v3.1"
+"Vector Extract Word Mask VX-form","vextractwm RT,VRB","4@0|RT@6|10@11|VRB@16|1602@21|","v3.1"
+"Vector Extend Sign Doubleword to Quadword VX-form","vextsd2q VRT,VRB","4@0|VRT@6|27@11|VRB@16|1538@21|","v3.1"
+"Vector Gather every Nth Bit VX-form","vgnb RT,VRB,N","4@0|RT@6|//@11|N@13|VRB@16|1228@21|","v3.1"
+"Vector Insert Byte from GPR using GPR-specified Left-Index VX-form","vinsblx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|527@21|","v3.1"
+"Vector Insert Byte from GPR using GPR-specified Right-Index VX-form","vinsbrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|783@21|","v3.1"
+"Vector Insert Byte from VSR using GPR-specified Left-Index VX-form","vinsbvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|15@21|","v3.1"
+"Vector Insert Byte from VSR using GPR-specified Right-Index VX-form","vinsbvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|271@21|","v3.1"
+"Vector Insert Doubleword from GPR using immediate-specified index VX-form","vinsd VRT,RB,UIM","4@0|VRT@6|/@11|UIM@12|RB@16|463@21|","v3.1"
+"Vector Insert Doubleword from GPR using GPR-specified Left-Index VX-form","vinsdlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|719@21|","v3.1"
+"Vector Insert Doubleword from GPR using GPR-specified Right-Index VX-form","vinsdrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|975@21|","v3.1"
+"Vector Insert Halfword from GPR using GPR-specified Left-Index VX-form","vinshlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|591@21|","v3.1"
+"Vector Insert Halfword from GPR using GPR-specified Right-Index VX-form","vinshrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|847@21|","v3.1"
+"Vector Insert Halfword from VSR using GPR-specified Left-Index VX-form","vinshvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|79@21|","v3.1"
+"Vector Insert Halfword from VSR using GPR-specified Right-Index VX-form","vinshvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|335@21|","v3.1"
+"Vector Insert Word from GPR using immediate-specified index VX-form","vinsw VRT,RB,UIM","4@0|VRT@6|/@11|UIM@12|RB@16|207@21|","v3.1"
+"Vector Insert Word from GPR using GPR-specified Left-Index VX-form","vinswlx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|655@21|","v3.1"
+"Vector Insert Word from GPR using GPR-specified Right-Index VX-form","vinswrx VRT,RA,RB","4@0|VRT@6|RA@11|RB@16|911@21|","v3.1"
+"Vector Insert Word from VSR using GPR-specified Left-Index VX-form","vinswvlx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|143@21|","v3.1"
+"Vector Insert Word from VSR using GPR-specified Left-Index VX-form","vinswvrx VRT,RA,VRB","4@0|VRT@6|RA@11|VRB@16|399@21|","v3.1"
+"Vector Modulo Signed Doubleword VX-form","vmodsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1995@21|","v3.1"
+"Vector Modulo Signed Quadword VX-form","vmodsq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1803@21|","v3.1"
+"Vector Modulo Signed Word VX-form","vmodsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1931@21|","v3.1"
+"Vector Modulo Unsigned Doubleword VX-form","vmodud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1739@21|","v3.1"
+"Vector Modulo Unsigned Quadword VX-form","vmoduq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1547@21|","v3.1"
+"Vector Modulo Unsigned Word VX-form","vmoduw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1675@21|","v3.1"
+"Vector Multiply-Sum & write Carry-out Unsigned Doubleword VA-form","vmsumcud VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|23@26|","v3.1"
+"Vector Multiply Even Signed Doubleword VX-form","vmulesd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|968@21|","v3.1"
+"Vector Multiply Even Unsigned Doubleword VX-form","vmuleud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|712@21|","v3.1"
+"Vector Multiply High Signed Doubleword VX-form","vmulhsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|969@21|","v3.1"
+"Vector Multiply High Signed Word VX-form","vmulhsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|905@21|","v3.1"
+"Vector Multiply High Unsigned Doubleword VX-form","vmulhud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|713@21|","v3.1"
+"Vector Multiply High Unsigned Word VX-form","vmulhuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|649@21|","v3.1"
+"Vector Multiply Low Doubleword VX-form","vmulld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|457@21|","v3.1"
+"Vector Multiply Odd Signed Doubleword VX-form","vmulosd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|456@21|","v3.1"
+"Vector Multiply Odd Unsigned Doubleword VX-form","vmuloud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|200@21|","v3.1"
+"Vector Parallel Bits Deposit Doubleword VX-form","vpdepd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1485@21|","v3.1"
+"Vector Parallel Bits Extract Doubleword VX-form","vpextd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1421@21|","v3.1"
+"Vector Rotate Left Quadword VX-form","vrlq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|5@21|","v3.1"
+"Vector Rotate Left Quadword then Mask Insert VX-form","vrlqmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|69@21|","v3.1"
+"Vector Rotate Left Quadword then AND with Mask VX-form","vrlqnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|325@21|","v3.1"
+"Vector Shift Left Double by Bit Immediate VN-form","vsldbi VRT,VRA,VRB,SH","4@0|VRT@6|VRA@11|VRB@16|0@21|SH@23|22@26|","v3.1"
+"Vector Shift Left Quadword VX-form","vslq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|261@21|","v3.1"
+"Vector Shift Right Algebraic Quadword VX-form","vsraq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|773@21|","v3.1"
+"Vector Shift Right Double by Bit Immediate VN-form","vsrdbi VRT,VRA,VRB,SH","4@0|VRT@6|VRA@11|VRB@16|1@21|SH@23|22@26|","v3.1"
+"Vector Shift Right Quadword VX-form","vsrq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|517@21|","v3.1"
+"Vector String Isolate Byte Left-justified VX-form","vstribl VRT,VRB (Rc=0)|vstribl. VRT,VRB (Rc=1)","4@0|VRT@6|0@11|VRB@16|Rc@21|13@22|","v3.1"
+"Vector String Isolate Byte Right-justified VX-form","vstribr VRT,VRB (Rc=0)|vstribr. VRT,VRB (Rc=1)","4@0|VRT@6|1@11|VRB@16|Rc@21|13@22|","v3.1"
+"Vector String Isolate Halfword Left-justified VX-form","vstrihl VRT,VRB (Rc=0)|vstrihl. VRT,VRB (Rc=1)","4@0|VRT@6|2@11|VRB@16|Rc@21|13@22|","v3.1"
+"Vector String Isolate Halfword Right-justified VX-form","vstrihr VRT,VRB (Rc=0)|vstrihr. VRT,VRB (Rc=1)","4@0|VRT@6|3@11|VRB@16|Rc@21|13@22|","v3.1"
+"VSX Scalar Compare Equal Quad-Precision X-form","xscmpeqqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|68@21|/@31|","v3.1"
+"VSX Scalar Compare Greater Than or Equal Quad-Precision X-form","xscmpgeqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|196@21|/@31|","v3.1"
+"VSX Scalar Compare Greater Than Quad-Precision X-form","xscmpgtqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|228@21|/@31|","v3.1"
+"VSX Scalar Convert with round to zero Quad-Precision to Signed Quadword X-form","xscvqpsqz VRT,VRB","63@0|VRT@6|8@11|VRB@16|836@21|/@31|","v3.1"
+"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Quadword X-form","xscvqpuqz VRT,VRB","63@0|VRT@6|0@11|VRB@16|836@21|/@31|","v3.1"
+"VSX Scalar Convert with round Signed Quadword to Quad-Precision X-form","xscvsqqp VRT,VRB","63@0|VRT@6|11@11|VRB@16|836@21|/@31|","v3.1"
+"VSX Scalar Convert with round Unsigned Quadword to Quad-Precision X-form","xscvuqqp VRT,VRB","63@0|VRT@6|3@11|VRB@16|836@21|/@31|","v3.1"
+"VSX Scalar Maximum Type-C Quad-Precision X-form","xsmaxcqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|676@21|/@31|","v3.1"
+"VSX Scalar Minimum Type-C Quad-Precision X-form","xsmincqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|740@21|/@31|","v3.1"
+"VSX Vector bfloat16 GER (Rank-2 Update) XX3-form","xvbf16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|51@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative accumulate XX3-form","xvbf16ger2nn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|242@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive accumulate XX3-form","xvbf16ger2np AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|114@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative accumulate XX3-form","xvbf16ger2pn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|178@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate XX3-form","xvbf16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|50@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector Convert bfloat16 to Single-Precision format XX2-form","xvcvbf16spn XT,XB","60@0|T@6|16@11|B@16|475@21|BX@30|TX@31|","v3.1"
+"VSX Vector Convert with round Single-Precision to bfloat16 format XX2-form","xvcvspbf16 XT,XB","60@0|T@6|17@11|B@16|475@21|BX@30|TX@31|","v3.1"
+"VSX Vector 16-bit Floating-Point GER (rank-2 update) XX3-form","xvf16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|19@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate XX3-form","xvf16ger2nn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|210@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Positive accumulate XX3-form","xvf16ger2np AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|82@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Negative accumulate XX3-form","xvf16ger2pn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|146@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 16-bit Floating-Point GER (rank-2 update) Positive multiply, Positive accumulate XX3-form","xvf16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|18@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 32-bit Floating-Point GER (rank-1 update) XX3-form","xvf32ger AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|27@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate XX3-form","xvf32gernn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|218@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 32-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate XX3-form","xvf32gernp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|90@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate XX3-form","xvf32gerpn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|154@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 32-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate XX3-form","xvf32gerpp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|26@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 64-bit Floating-Point GER (rank-1 update) XX3-form","xvf64ger AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|59@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Negative accumulate XX3-form","xvf64gernn AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|250@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 64-bit Floating-Point GER (rank-1 update) Negative multiply, Positive accumulate XX3-form","xvf64gernp AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|122@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Negative accumulate XX3-form","xvf64gerpn AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|186@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 64-bit Floating-Point GER (rank-1 update) Positive multiply, Positive accumulate XX3-form","xvf64gerpp AT,XAp,XB","59@0|AT@6|//@9|Ap@11|B@16|58@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 16-bit Signed Integer GER (rank-2 update) XX3-form","xvi16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|75@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate XX3-form","xvi16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|107@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation XX3-form","xvi16ger2s AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate XX3-form","xvi16ger2spp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|42@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 4-bit Signed Integer GER (rank-8 update) XX3-form","xvi4ger8 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate XX3-form","xvi4ger8pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|34@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) XX3-form","xvi8ger4 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|3@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate XX3-form","xvi8ger4pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|2@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate XX3-form","xvi8ger4spp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|99@21|AX@29|BX@30|/@31|","v3.1"
+"VSX Vector Test Least-Significant Bit by Byte XX2-form","xvtlsbb BF,XB","60@0|BF@6|//@9|2@11|B@16|475@21|BX@30|/@31|","v3.1"
+"VSX Vector Blend Variable Byte 8RR:XX4-form","xxblendvb XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|0@26|CX@28|AX@29|BX@30|TX@31|","v3.1"
+"VSX Vector Blend Variable Doubleword 8RR:XX4-form","xxblendvd XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|3@26|CX@28|AX@29|BX@30|TX@31|","v3.1"
+"VSX Vector Blend Variable Halfword 8RR:XX4-form","xxblendvh XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|1@26|CX@28|AX@29|BX@30|TX@31|","v3.1"
+"VSX Vector Blend Variable Word 8RR:XX4-form","xxblendvw XT,XA,XB,XC",",1@0|1@6|0@8|//@12|///@14|,33@0|T@6|A@11|B@16|C@21|2@26|CX@28|AX@29|BX@30|TX@31|","v3.1"
+"VSX Vector Evaluate 8RR-XX4-form","xxeval XT,XA,XB,XC,IMM",",1@0|1@6|0@8|//@12|///@14|IMM@24|,34@0|T@6|A@11|B@16|C@21|1@26|CX@28|AX@29|BX@30|TX@31|","v3.1"
+"VSX Vector Generate PCV from Byte Mask X-form","xxgenpcvbm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|916@21|TX@31|","v3.1"
+"VSX Vector Generate PCV from Doubleword Mask X-form","xxgenpcvdm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|949@21|TX@31|","v3.1"
+"VSX Vector Generate PCV from Halfword Mask X-form","xxgenpcvhm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|917@21|TX@31|","v3.1"
+"VSX Vector Generate PCV from Word Mask X-form","xxgenpcvwm XT,VRB,IMM","60@0|T@6|IMM@11|VRB@16|948@21|TX@31|","v3.1"
+"VSX Move From Accumulator X-form","xxmfacc AS","31@0|AS@6|//@9|0@11|///@16|177@21|/@31|","v3.1"
+"VSX Move To Accumulator X-form","xxmtacc AT","31@0|AT@6|//@9|1@11|///@16|177@21|/@31|","v3.1"
+"VSX Vector Permute Extended 8RR:XX4-form","xxpermx XT,XA,XB,XC,UIM",",1@0|1@6|0@8|//@12|///@14|UIM@29|,34@0|T@6|A@11|B@16|C@21|0@26|CX@28|AX@29|BX@30|TX@31|","v3.1"
+"VSX Set Accumulator to Zero X-form","xxsetaccz AT","31@0|AT@6|//@9|3@11|///@16|177@21|/@31|","v3.1"
+"VSX Vector Splat Immediate32 Doubleword Indexed 8RR:D-form","xxsplti32dx XT,IX,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|0@11|IX@14|TX@15|imm1@16|","v3.1"
+"VSX Vector Splat Immediate Double-Precision 8RR:D-form","xxspltidp XT,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|2@11|TX@15|imm1@16|","v3.1"
+"VSX Vector Splat Immediate Word 8RR:D-form","xxspltiw XT,IMM32",",1@0|1@6|0@8|//@12|//@14|imm0@16|,32@0|T@6|3@11|TX@15|imm1@16|","v3.1"
+"Ultravisor Message Clear X-form","msgclru RB","31@0|///@6|///@11|RB@16|110@21|/@31|","v3.0C"
+"Ultravisor Message SendX-form","msgsndu RB","31@0|///@6|///@11|RB@16|78@21|/@31|","v3.0C"
+"Ultravisor Return From Interrupt Doubleword XL-form","urfid","19@0|///@6|///@11|///@16|306@21|/@31|","v3.0C"
+"Add Extended using alternate carry bit Z23-form","addex RT,RA,RB,CY","31@0|RT@6|RA@11|RB@16|CY@21|170@23|/@31|","v3.0B"
+"Move From FPSCR Control & Set DRN X-form","mffscdrn FRT,FRB","63@0|FRT@6|20@11|FRB@16|583@21|/@31|","v3.0B"
+"Move From FPSCR Control & Set DRN Immediate X-form","mffscdrni FRT,DRM","63@0|FRT@6|21@11|//@16|DRM@18|583@21|/@31|","v3.0B"
+"Move From FPSCR & Clear Enables X-form","mffsce FRT","63@0|FRT@6|1@11|///@16|583@21|/@31|","v3.0B"
+"Move From FPSCR Control & Set RN X-form","mffscrn FRT,FRB","63@0|FRT@6|22@11|FRB@16|583@21|/@31|","v3.0B"
+"Move From FPSCR Control & Set RN Immediate X-form","mffscrni FRT,RM","63@0|FRT@6|23@11|///@16|RM@19|583@21|/@31|","v3.0B"
+"Move From FPSCR Lightweight X-form","mffsl FRT","63@0|FRT@6|24@11|///@16|583@21|/@31|","v3.0B"
+"SLB Invalidate All Global X-form","slbiag RS, L","31@0|RS@6|///@11|L@15|///@16|850@21|/@31|","v3.0B"
+"Vector Multiply-Sum Unsigned Doubleword Modulo VA-form","vmsumudm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|35@26|","v3.0B"
+"Add PC Immediate Shifted DX-form","addpcis RT,D","19@0|RT@6|d1@11|d0@16|2@26|d2@31|","v3.0"
+"Decimal Convert From National VX-form","bcdcfn. VRT,VRB,PS","4@0|VRT@6|7@11|VRB@16|1@21|PS@22|385@23|","v3.0"
+"Decimal Convert From Signed Quadword VX-form","bcdcfsq. VRT,VRB,PS","4@0|VRT@6|2@11|VRB@16|1@21|PS@22|385@23|","v3.0"
+"Decimal Convert From Zoned VX-form","bcdcfz. VRT,VRB,PS","4@0|VRT@6|6@11|VRB@16|1@21|PS@22|385@23|","v3.0"
+"Decimal Copy Sign VX-form","bcdcpsgn. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|833@21|","v3.0"
+"Decimal Convert To National VX-form","bcdctn. VRT,VRB","4@0|VRT@6|5@11|VRB@16|1@21|/@22|385@23|","v3.0"
+"Decimal Convert To Signed Quadword VX-form","bcdctsq. VRT,VRB","4@0|VRT@6|0@11|VRB@16|1@21|/@22|385@23|","v3.0"
+"Decimal Convert To Zoned VX-form","bcdctz. VRT,VRB,PS","4@0|VRT@6|4@11|VRB@16|1@21|PS@22|385@23|","v3.0"
+"Decimal Shift VX-form","bcds. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|193@23|","v3.0"
+"Decimal Set Sign VX-form","bcdsetsgn. VRT,VRB,PS","4@0|VRT@6|31@11|VRB@16|1@21|PS@22|385@23|","v3.0"
+"Decimal Shift and Round VX-form","bcdsr. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|449@23|","v3.0"
+"Decimal Truncate VX-form","bcdtrunc. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|257@23|","v3.0"
+"Decimal Unsigned Shift VX-form","bcdus. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1@21|/@22|129@23|","v3.0"
+"Decimal Unsigned Truncate VX-form","bcdutrunc. VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1@21|/@22|321@23|","v3.0"
+"Compare Equal Byte X-form","cmpeqb BF,RA,RB","31@0|BF@6|//@9|RA@11|RB@16|224@21|/@31|","v3.0"
+"Compare Ranged Byte X-form","cmprb BF,L,RA,RB","31@0|BF@6|/@9|L@10|RA@11|RB@16|192@21|/@31|","v3.0"
+"Count Trailing Zeros Doubleword X-form","cnttzd RA,RS (Rc=0)|cnttzd. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|570@21|Rc@31|","v3.0"
+"Count Trailing Zeros Word X-form","cnttzw RA,RS (Rc=0)|cnttzw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|538@21|Rc@31|","v3.0"
+"Copy X-form","copy RA,RB","31@0|///@6|1@10|RA@11|RB@16|774@21|/@31|","v3.0"
+"Copy-Paste Abort X-form","cpabort","31@0|///@6|///@11|///@16|838@21|/@31|","v3.0"
+"Deliver A Random Number X-form","darn RT,L","31@0|RT@6|///@11|L@14|///@16|755@21|/@31|","v3.0"
+"DFP Test Significance Immediate X-form","dtstsfi BF,UIM,FRB","59@0|BF@6|/@9|UIM@10|FRB@16|675@21|/@31|","v3.0"
+"DFP Test Significance Immediate Quad X-form","dtstsfiq BF,UIM,FRBp","63@0|BF@6|/@9|UIM@10|FRBp@16|675@21|/@31|","v3.0"
+"Extend Sign Word and Shift Left Immediate XS-form","extswsli RA,RS,SH (Rc=0)|extswsli. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|sh@16|445@21|sh@30|Rc@31|","v3.0"
+"Load Doubleword ATomic X-form","ldat RT,RA,FC","31@0|RT@6|RA@11|FC@16|614@21|/@31|","v3.0"
+"Load Word ATomic X-form","lwat RT,RA,FC","31@0|RT@6|RA@11|FC@16|582@21|/@31|","v3.0"
+"Load VSX Scalar Doubleword DS-form","lxsd VRT,DS(RA)","57@0|VRT@6|RA@11|DS@16|2@30|","v3.0"
+"Load VSX Scalar as Integer Byte & Zero Indexed X-form","lxsibzx XT,RA,RB","31@0|T@6|RA@11|RB@16|781@21|TX@31|","v3.0"
+"Load VSX Scalar as Integer Halfword & Zero Indexed X-form","lxsihzx XT,RA,RB","31@0|T@6|RA@11|RB@16|813@21|TX@31|","v3.0"
+"Load VSX Scalar Single-Precision DS-form","lxssp VRT,DS(RA)","57@0|VRT@6|RA@11|DS@16|3@30|","v3.0"
+"Load VSX Vector DQ-form","lxv XT,DQ(RA)","61@0|T@6|RA@11|DQ@16|TX@28|1@29|","v3.0"
+"Load VSX Vector Byte*16 Indexed X-form","lxvb16x XT,RA,RB","31@0|T@6|RA@11|RB@16|876@21|TX@31|","v3.0"
+"Load VSX Vector Halfword*8 Indexed X-form","lxvh8x XT,RA,RB","31@0|T@6|RA@11|RB@16|812@21|TX@31|","v3.0"
+"Load VSX Vector with Length X-form","lxvl XT,RA,RB","31@0|T@6|RA@11|RB@16|269@21|TX@31|","v3.0"
+"Load VSX Vector with Length Left-justified X-form","lxvll XT,RA,RB","31@0|T@6|RA@11|RB@16|301@21|TX@31|","v3.0"
+"Load VSX Vector Word & Splat Indexed X-form","lxvwsx XT,RA,RB","31@0|T@6|RA@11|RB@16|364@21|TX@31|","v3.0"
+"Load VSX Vector Indexed X-form","lxvx XT,RA,RB","31@0|T@6|RA@11|RB@16|4@21|/@25|12@26|TX@31|","v3.0"
+"Multiply-Add High Doubleword VA-form","maddhd RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|48@26|","v3.0"
+"Multiply-Add High Doubleword Unsigned VA-form","maddhdu RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|49@26|","v3.0"
+"Multiply-Add Low Doubleword VA-form","maddld RT,RA,RB,RC","4@0|RT@6|RA@11|RB@16|RC@21|51@26|","v3.0"
+"Move to CR from XER Extended X-form","mcrxrx BF","31@0|BF@6|//@9|///@11|///@16|576@21|/@31|","v3.0"
+"Move From VSR Lower Doubleword X-form","mfvsrld RA,XS","31@0|S@6|RA@11|///@16|307@21|SX@31|","v3.0"
+"Modulo Signed Doubleword X-form","modsd RT,RA,RB","31@0|RT@6|RA@11|RB@16|777@21|/@31|","v3.0"
+"Modulo Signed Word X-form","modsw RT,RA,RB","31@0|RT@6|RA@11|RB@16|779@21|/@31|","v3.0"
+"Modulo Unsigned Doubleword X-form","modud RT,RA,RB","31@0|RT@6|RA@11|RB@16|265@21|/@31|","v3.0"
+"Modulo Unsigned Word X-form","moduw RT,RA,RB","31@0|RT@6|RA@11|RB@16|267@21|/@31|","v3.0"
+"Message Synchronize X-form","msgsync","31@0|///@6|///@11|///@16|886@21|/@31|","v3.0"
+"Move To VSR Double Doubleword X-form","mtvsrdd XT,RA,RB","31@0|T@6|RA@11|RB@16|435@21|TX@31|","v3.0"
+"Move To VSR Word & Splat X-form","mtvsrws XT,RA","31@0|T@6|RA@11|///@16|403@21|TX@31|","v3.0"
+"Paste X-form","paste. RA,RB,L","31@0|///@6|L@10|RA@11|RB@16|902@21|1@31|","v3.0"
+"Set Boolean X-form","setb RT,BFA","31@0|RT@6|BFA@11|//@14|///@16|128@21|/@31|","v3.0"
+"SLB Invalidate Entry Global X-form","slbieg RS,RB","31@0|RS@6|///@11|RB@16|466@21|/@31|","v3.0"
+"SLB Synchronize X-form","slbsync","31@0|///@6|///@11|///@16|338@21|/@31|","v3.0"
+"Store Doubleword ATomic X-form","stdat RS,RA,FC","31@0|RS@6|RA@11|FC@16|742@21|/@31|","v3.0"
+"Stop XL-form","stop","19@0|///@6|///@11|///@16|370@21|/@31|","v3.0"
+"Store Word ATomic X-form","stwat RS,RA,FC","31@0|RS@6|RA@11|FC@16|710@21|/@31|","v3.0"
+"Store VSX Scalar Doubleword DS-form","stxsd VRS,DS(RA)","61@0|VRS@6|RA@11|DS@16|2@30|","v3.0"
+"Store VSX Scalar as Integer Byte Indexed X-form","stxsibx XS,RA,RB","31@0|S@6|RA@11|RB@16|909@21|SX@31|","v3.0"
+"Store VSX Scalar as Integer Halfword Indexed X-form","stxsihx XS,RA,RB","31@0|S@6|RA@11|RB@16|941@21|SX@31|","v3.0"
+"Store VSX Scalar Single DS-form","stxssp VRS,DS(RA)","61@0|VRS@6|RA@11|DS@16|3@30|","v3.0"
+"Store VSX Vector DQ-form","stxv XS,DQ(RA)","61@0|S@6|RA@11|DQ@16|SX@28|5@29|","v3.0"
+"Store VSX Vector Byte*16 Indexed X-form","stxvb16x XS,RA,RB","31@0|S@6|RA@11|RB@16|1004@21|SX@31|","v3.0"
+"Store VSX Vector Halfword*8 Indexed X-form","stxvh8x XS,RA,RB","31@0|S@6|RA@11|RB@16|940@21|SX@31|","v3.0"
+"Store VSX Vector with Length X-form","stxvl XS,RA,RB","31@0|S@6|RA@11|RB@16|397@21|SX@31|","v3.0"
+"Store VSX Vector with Length Left-justified X-form","stxvll XS,RA,RB","31@0|S@6|RA@11|RB@16|429@21|SX@31|","v3.0"
+"Store VSX Vector Indexed X-form","stxvx XS,RA,RB","31@0|S@6|RA@11|RB@16|396@21|SX@31|","v3.0"
+"Vector Absolute Difference Unsigned Byte VX-form","vabsdub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1027@21|","v3.0"
+"Vector Absolute Difference Unsigned Halfword VX-form","vabsduh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1091@21|","v3.0"
+"Vector Absolute Difference Unsigned Word VX-form","vabsduw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1155@21|","v3.0"
+"Vector Bit Permute Doubleword VX-form","vbpermd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1484@21|","v3.0"
+"Vector Count Leading Zero Least-Significant Bits Byte VX-form","vclzlsbb RT,VRB","4@0|RT@6|0@11|VRB@16|1538@21|","v3.0"
+"Vector Compare Not Equal Byte VC-form","vcmpneb VRT,VRA,VRB (Rc=0)|vcmpneb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|7@22|","v3.0"
+"Vector Compare Not Equal Halfword VC-form","vcmpneh VRT,VRA,VRB (Rc=0)|vcmpneh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|71@22|","v3.0"
+"Vector Compare Not Equal Word VC-form","vcmpnew VRT,VRA,VRB (Rc=0)|vcmpnew. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|135@22|","v3.0"
+"Vector Compare Not Equal or Zero Byte VC-form","vcmpnezb VRT,VRA,VRB (Rc=0)|vcmpnezb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|263@22|","v3.0"
+"Vector Compare Not Equal or Zero Halfword VC-form","vcmpnezh VRT,VRA,VRB (Rc=0)|vcmpnezh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|327@22|","v3.0"
+"Vector Compare Not Equal or Zero Word VC-form","vcmpnezw VRT,VRA,VRB (Rc=0)|vcmpnezw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|391@22|","v3.0"
+"Vector Count Trailing Zeros Byte VX-form","vctzb VRT,VRB","4@0|VRT@6|28@11|VRB@16|1538@21|","v3.0"
+"Vector Count Trailing Zeros Doubleword VX-form","vctzd VRT,VRB","4@0|VRT@6|31@11|VRB@16|1538@21|","v3.0"
+"Vector Count Trailing Zeros Halfword VX-form","vctzh VRT,VRB","4@0|VRT@6|29@11|VRB@16|1538@21|","v3.0"
+"Vector Count Trailing Zero Least-Significant Bits Byte VX-form","vctzlsbb RT,VRB","4@0|RT@6|1@11|VRB@16|1538@21|","v3.0"
+"Vector Count Trailing Zeros Word VX-form","vctzw VRT,VRB","4@0|VRT@6|30@11|VRB@16|1538@21|","v3.0"
+"Vector Extract Doubleword to VSR using immediate-specified index VX-form","vextractd VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|717@21|","v3.0"
+"Vector Extract Unsigned Byte to VSR using immediate-specified index VX-form","vextractub VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|525@21|","v3.0"
+"Vector Extract Unsigned Halfword to VSR using immediate-specified index VX-form","vextractuh VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|589@21|","v3.0"
+"Vector Extract Unsigned Word to VSR using immediate-specified index VX-form","vextractuw VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|653@21|","v3.0"
+"Vector Extend Sign Byte To Doubleword VX-form","vextsb2d VRT,VRB","4@0|VRT@6|24@11|VRB@16|1538@21|","v3.0"
+"Vector Extend Sign Byte To Word VX-form","vextsb2w VRT,VRB","4@0|VRT@6|16@11|VRB@16|1538@21|","v3.0"
+"Vector Extend Sign Halfword To Doubleword VX-form","vextsh2d VRT,VRB","4@0|VRT@6|25@11|VRB@16|1538@21|","v3.0"
+"Vector Extend Sign Halfword To Word VX-form","vextsh2w VRT,VRB","4@0|VRT@6|17@11|VRB@16|1538@21|","v3.0"
+"Vector Extend Sign Word To Doubleword VX-form","vextsw2d VRT,VRB","4@0|VRT@6|26@11|VRB@16|1538@21|","v3.0"
+"Vector Extract Unsigned Byte to GPR using GPR-specified Left-Index VX-form","vextublx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1549@21|","v3.0"
+"Vector Extract Unsigned Byte to GPR using GPR-specified Right-Index VX-form","vextubrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1805@21|","v3.0"
+"Vector Extract Unsigned Halfword to GPR using GPR-specified Left-Index VX-form","vextuhlx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1613@21|","v3.0"
+"Vector Extract Unsigned Halfword to GPR using GPR-specified Right-Index VX-form","vextuhrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1869@21|","v3.0"
+"Vector Extract Unsigned Word to GPR using GPR-specified Left-Index VX-form","vextuwlx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1677@21|","v3.0"
+"Vector Extract Unsigned Word to GPR using GPR-specified Right-Index VX-form","vextuwrx RT,RA,VRB","4@0|RT@6|RA@11|VRB@16|1933@21|","v3.0"
+"Vector Insert Byte from VSR using immediate-specified index VX-form","vinsertb VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|781@21|","v3.0"
+"Vector Insert Doubleword from VSR using immediate-specified index VX-form","vinsertd VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|973@21|","v3.0"
+"Vector Insert Halfword from VSR using immediate-specified index VX-form","vinserth VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|845@21|","v3.0"
+"Vector Insert Word from VSR using immediate-specified index VX-form","vinsertw VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|909@21|","v3.0"
+"Vector Multiply-by-10 & write Carry-out Unsigned Quadword VX-form","vmul10cuq VRT,VRA","4@0|VRT@6|VRA@11|///@16|1@21|","v3.0"
+"Vector Multiply-by-10 Extended & write Carry-out Unsigned Quadword VX-form","vmul10ecuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|65@21|","v3.0"
+"Vector Multiply-by-10 Extended Unsigned Quadword VX-form","vmul10euq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|577@21|","v3.0"
+"Vector Multiply-by-10 Unsigned Quadword VX-form","vmul10uq VRT,VRA","4@0|VRT@6|VRA@11|///@16|513@21|","v3.0"
+"Vector Negate Doubleword VX-form","vnegd VRT,VRB","4@0|VRT@6|7@11|VRB@16|1538@21|","v3.0"
+"Vector Negate Word VX-form","vnegw VRT,VRB","4@0|VRT@6|6@11|VRB@16|1538@21|","v3.0"
+"Vector Permute Right-indexed VA-form","vpermr VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|59@26|","v3.0"
+"Vector Parity Byte Doubleword VX-form","vprtybd VRT,VRB","4@0|VRT@6|9@11|VRB@16|1538@21|","v3.0"
+"Vector Parity Byte Quadword VX-form","vprtybq VRT,VRB","4@0|VRT@6|10@11|VRB@16|1538@21|","v3.0"
+"Vector Parity Byte Word VX-form","vprtybw VRT,VRB","4@0|VRT@6|8@11|VRB@16|1538@21|","v3.0"
+"Vector Rotate Left Doubleword then Mask Insert VX-form","vrldmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|197@21|","v3.0"
+"Vector Rotate Left Doubleword then AND with Mask VX-form","vrldnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|453@21|","v3.0"
+"Vector Rotate Left Word then Mask Insert VX-form","vrlwmi VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|133@21|","v3.0"
+"Vector Rotate Left Word then AND with Mask VX-form","vrlwnm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|389@21|","v3.0"
+"Vector Shift Left Variable VX-form","vslv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1860@21|","v3.0"
+"Vector Shift Right Variable VX-form","vsrv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1796@21|","v3.0"
+"Wait X-form","wait WC,PL","31@0|??@6|/@8|WC@9|///@11|PL@14|///@16|30@21|/@31|","v3.0"
+"VSX Scalar Absolute Quad-Precision X-form","xsabsqp VRT,VRB","63@0|VRT@6|0@11|VRB@16|804@21|/@31|","v3.0"
+"VSX Scalar Add Quad-Precision [using round to Odd] X-form","xsaddqp VRT,VRA,VRB (RO=0)|xsaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|4@21|RO@31|","v3.0"
+"VSX Scalar Compare Equal Double-Precision XX3-form","xscmpeqdp XT,XA,XB","60@0|T@6|A@11|B@16|3@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Scalar Compare Exponents Double-Precision XX3-form","xscmpexpdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|59@21|AX@29|BX@30|/@31|","v3.0"
+"VSX Scalar Compare Exponents Quad-Precision X-form","xscmpexpqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|164@21|/@31|","v3.0"
+"VSX Scalar Compare Greater Than or Equal Double-Precision XX3-form","xscmpgedp XT,XA,XB","60@0|T@6|A@11|B@16|19@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Scalar Compare Greater Than Double-Precision XX3-form","xscmpgtdp XT,XA,XB","60@0|T@6|A@11|B@16|11@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Scalar Compare Ordered Quad-Precision X-form","xscmpoqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|132@21|/@31|","v3.0"
+"VSX Scalar Compare Unordered Quad-Precision X-form","xscmpuqp BF,VRA,VRB","63@0|BF@6|//@9|VRA@11|VRB@16|644@21|/@31|","v3.0"
+"VSX Scalar Copy Sign Quad-Precision X-form","xscpsgnqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|100@21|/@31|","v3.0"
+"VSX Scalar Convert with round Double-Precision to Half-Precision format XX2-form","xscvdphp XT,XB","60@0|T@6|17@11|B@16|347@21|BX@30|TX@31|","v3.0"
+"VSX Scalar Convert Double-Precision to Quad-Precision format X-form","xscvdpqp VRT,VRB","63@0|VRT@6|22@11|VRB@16|836@21|/@31|","v3.0"
+"VSX Scalar Convert Half-Precision to Double-Precision format XX2-form","xscvhpdp XT,XB","60@0|T@6|16@11|B@16|347@21|BX@30|TX@31|","v3.0"
+"VSX Scalar Convert with round Quad-Precision to Double-Precision format [using round to Odd] X-form","xscvqpdp VRT,VRB (RO=0)|xscvqpdpo VRT,VRB (RO=1)","63@0|VRT@6|20@11|VRB@16|836@21|RO@31|","v3.0"
+"VSX Scalar Convert with round to zero Quad-Precision to Signed Doubleword format X-form","xscvqpsdz VRT,VRB","63@0|VRT@6|25@11|VRB@16|836@21|/@31|","v3.0"
+"VSX Scalar Convert with round to zero Quad-Precision to Signed Word format X-form","xscvqpswz VRT,VRB","63@0|VRT@6|9@11|VRB@16|836@21|/@31|","v3.0"
+"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Doubleword format X-form","xscvqpudz VRT,VRB","63@0|VRT@6|17@11|VRB@16|836@21|/@31|","v3.0"
+"VSX Scalar Convert with round to zero Quad-Precision to Unsigned Word format X-form","xscvqpuwz VRT,VRB","63@0|VRT@6|1@11|VRB@16|836@21|/@31|","v3.0"
+"VSX Scalar Convert Signed Doubleword to Quad-Precision format X-form","xscvsdqp VRT,VRB","63@0|VRT@6|10@11|VRB@16|836@21|/@31|","v3.0"
+"VSX Scalar Convert Unsigned Doubleword to Quad-Precision format X-form","xscvudqp VRT,VRB","63@0|VRT@6|2@11|VRB@16|836@21|/@31|","v3.0"
+"VSX Scalar Divide Quad-Precision [using round to Odd] X-form","xsdivqp VRT,VRA,VRB (RO=0)|xsdivqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|548@21|RO@31|","v3.0"
+"VSX Scalar Insert Exponent Double-Precision X-form","xsiexpdp XT,RA,RB","60@0|T@6|RA@11|RB@16|918@21|TX@31|","v3.0"
+"VSX Scalar Insert Exponent Quad-Precision X-form","xsiexpqp VRT,VRA,VRB","63@0|VRT@6|VRA@11|VRB@16|868@21|/@31|","v3.0"
+"VSX Scalar Multiply-Add Quad-Precision [using round to Odd] X-form","xsmaddqp VRT,VRA,VRB (RO=0)|xsmaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|388@21|RO@31|","v3.0"
+"VSX Scalar Maximum Type-C Double-Precision XX3-form","xsmaxcdp XT,XA,XB","60@0|T@6|A@11|B@16|128@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Scalar Maximum Type-J Double-Precision XX3-form","xsmaxjdp XT,XA,XB","60@0|T@6|A@11|B@16|144@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Scalar Minimum Type-C Double-Precision XX3-form","xsmincdp XT,XA,XB","60@0|T@6|A@11|B@16|136@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Scalar Minimum Type-J Double-Precision XX3-form","xsminjdp XT,XA,XB","60@0|T@6|A@11|B@16|152@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Scalar Multiply-Subtract Quad-Precision [using round to Odd] X-form","xsmsubqp VRT,VRA,VRB (RO=0)|xsmsubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|420@21|RO@31|","v3.0"
+"VSX Scalar Multiply Quad-Precision [using round to Odd] X-form","xsmulqp VRT,VRA,VRB (RO=0)|xsmulqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|36@21|RO@31|","v3.0"
+"VSX Scalar Negative Absolute Quad-Precision X-form","xsnabsqp VRT,VRB","63@0|VRT@6|8@11|VRB@16|804@21|TX@31|","v3.0"
+"VSX Scalar Negate Quad-Precision X-form","xsnegqp VRT,VRB","63@0|VRT@6|16@11|VRB@16|804@21|/@31|","v3.0"
+"VSX Scalar Negative Multiply-Add Quad-Precision [using round to Odd] X-form","xsnmaddqp VRT,VRA,VRB (RO=0)|xsnmaddqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|452@21|RO@31|","v3.0"
+"VSX Scalar Negative Multiply-Subtract Quad-Precision [using round to Odd] X-form","xsnmsubqp VRT,VRA,VRB (RO=0)|xsnmsubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|484@21|RO@31|","v3.0"
+"VSX Scalar Round to Quad-Precision Integer [with Inexact] Z23-form","xsrqpi R,VRT,VRB,RMC (EX=0)|xsrqpix R,VRT,VRB,RMC (EX=1)","63@0|VRT@6|///@11|R@15|VRB@16|RMC@21|5@23|EX@31|","v3.0"
+"VSX Scalar Round Quad-Precision to Double-Extended Precision Z23-form","xsrqpxp R,VRT,VRB,RMC","63@0|VRT@6|///@11|R@15|VRB@16|RMC@21|37@23|/@31|","v3.0"
+"VSX Scalar Square Root Quad-Precision [using round to Odd] X-form","xssqrtqp VRT,VRB (RO=0)|xssqrtqpo VRT,VRB (RO=1)","63@0|VRT@6|27@11|VRB@16|804@21|RO@31|","v3.0"
+"VSX Scalar Subtract Quad-Precision [using round to Odd] X-form","xssubqp VRT,VRA,VRB (RO=0)|xssubqpo VRT,VRA,VRB (RO=1)","63@0|VRT@6|VRA@11|VRB@16|516@21|RO@31|","v3.0"
+"VSX Scalar Test Data Class Double-Precision XX2-form","xststdcdp BF,XB,DCMX","60@0|BF@6|DCMX@9|B@16|362@21|BX@30|/@31|","v3.0"
+"VSX Scalar Test Data Class Quad-Precision X-form","xststdcqp BF,VRB,DCMX","63@0|BF@6|DCMX@9|VRB@16|708@21|/@31|","v3.0"
+"VSX Scalar Test Data Class Single-Precision XX2-form","xststdcsp BF,XB,DCMX","60@0|BF@6|DCMX@9|B@16|298@21|BX@30|/@31|","v3.0"
+"VSX Scalar Extract Exponent Double-Precision XX2-form","xsxexpdp RT,XB","60@0|RT@6|0@11|B@16|347@21|BX@30|/@31|","v3.0"
+"VSX Scalar Extract Exponent Quad-Precision X-form","xsxexpqp VRT,VRB","63@0|VRT@6|2@11|VRB@16|804@21|/@31|","v3.0"
+"VSX Scalar Extract Significand Double-Precision XX2-form","xsxsigdp RT,XB","60@0|RT@6|1@11|B@16|347@21|BX@30|/@31|","v3.0"
+"VSX Scalar Extract Significand Quad-Precision X-form","xsxsigqp VRT,VRB","63@0|VRT@6|18@11|VRB@16|804@21|/@31|","v3.0"
+"VSX Vector Convert Half-Precision to Single-Precision format XX2-form","xvcvhpsp XT,XB","60@0|T@6|24@11|B@16|475@21|BX@30|TX@31|","v3.0"
+"VSX Vector Convert with round Single-Precision to Half-Precision format XX2-form","xvcvsphp XT,XB","60@0|T@6|25@11|B@16|475@21|BX@30|TX@31|","v3.0"
+"VSX Vector Insert Exponent Double-Precision XX3-form","xviexpdp XT,XA,XB","60@0|T@6|A@11|B@16|248@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Vector Insert Exponent Single-Precision XX3-form","xviexpsp XT,XA,XB","60@0|T@6|A@11|B@16|216@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Vector Test Data Class Double-Precision XX2-form","xvtstdcdp XT,XB,DCMX","60@0|T@6|dx@11|B@16|15@21|dc@25|5@26|dm@29|BX@30|TX@31|","v3.0"
+"VSX Vector Test Data Class Single-Precision XX2-form","xvtstdcsp XT,XB,DCMX","60@0|T@6|dx@11|B@16|13@21|dc@25|5@26|dm@29|BX@30|TX@31|","v3.0"
+"VSX Vector Extract Exponent Double-Precision XX2-form","xvxexpdp XT,XB","60@0|T@6|0@11|B@16|475@21|BX@30|TX@31|","v3.0"
+"VSX Vector Extract Exponent Single-Precision XX2-form","xvxexpsp XT,XB","60@0|T@6|8@11|B@16|475@21|BX@30|TX@31|","v3.0"
+"VSX Vector Extract Significand Double-Precision XX2-form","xvxsigdp XT,XB","60@0|T@6|1@11|B@16|475@21|BX@30|TX@31|","v3.0"
+"VSX Vector Extract Significand Single-Precision XX2-form","xvxsigsp XT,XB","60@0|T@6|9@11|B@16|475@21|BX@30|TX@31|","v3.0"
+"VSX Vector Byte-Reverse Doubleword XX2-form","xxbrd XT,XB","60@0|T@6|23@11|B@16|475@21|BX@30|TX@31|","v3.0"
+"VSX Vector Byte-Reverse Halfword XX2-form","xxbrh XT,XB","60@0|T@6|7@11|B@16|475@21|BX@30|TX@31|","v3.0"
+"VSX Vector Byte-Reverse Quadword XX2-form","xxbrq XT,XB","60@0|T@6|31@11|B@16|475@21|BX@30|TX@31|","v3.0"
+"VSX Vector Byte-Reverse Word XX2-form","xxbrw XT,XB","60@0|T@6|15@11|B@16|475@21|BX@30|TX@31|","v3.0"
+"VSX Vector Extract Unsigned Word XX2-form","xxextractuw XT,XB,UIM","60@0|T@6|/@11|UIM@12|B@16|165@21|BX@30|TX@31|","v3.0"
+"VSX Vector Insert Word XX2-form","xxinsertw XT,XB,UIM","60@0|T@6|/@11|UIM@12|B@16|181@21|BX@30|TX@31|","v3.0"
+"VSX Vector Permute XX3-form","xxperm XT,XA,XB","60@0|T@6|A@11|B@16|26@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Vector Permute Right-indexed XX3-form","xxpermr XT,XA,XB","60@0|T@6|A@11|B@16|58@21|AX@29|BX@30|TX@31|","v3.0"
+"VSX Vector Splat Immediate Byte X-form","xxspltib XT,IMM8","60@0|T@6|0@11|IMM8@13|360@21|TX@31|","v3.0"
+"Decimal Add Modulo VX-form","bcdadd. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|1@23|","v2.07"
+"Decimal Subtract Modulo VX-form","bcdsub. VRT,VRA,VRB,PS","4@0|VRT@6|VRA@11|VRB@16|1@21|PS@22|65@23|","v2.07"
+"Branch Conditional to Branch Target Address Register XL-form","bctar BO,BI,BH (LK=0)|bctarl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|560@21|LK@31|","v2.07"
+"Clear BHRB X-form","clrbhrb","31@0|///@6|///@11|///@16|430@21|/@31|","v2.07"
+"Floating Merge Even Word X-form","fmrgew FRT,FRA,FRB","63@0|FRT@6|FRA@11|FRB@16|966@21|/@31|","v2.07"
+"Floating Merge Odd Word X-form","fmrgow FRT,FRA,FRB","63@0|FRT@6|FRA@11|FRB@16|838@21|/@31|","v2.07"
+"Instruction Cache Block Touch X-form","icbt CT, RA, RB","31@0|/@6|CT@7|RA@11|RB@16|22@21|/@31|","v2.07"
+"Load Quadword And Reserve Indexed X-form","lqarx RTp,RA,RB,EH","31@0|RTp@6|RA@11|RB@16|276@21|EH@31|","v2.07"
+"Load VSX Scalar as Integer Word Algebraic Indexed X-form","lxsiwax XT,RA,RB","31@0|T@6|RA@11|RB@16|76@21|TX@31|","v2.07"
+"Load VSX Scalar as Integer Word & Zero Indexed X-form","lxsiwzx XT,RA,RB","31@0|T@6|RA@11|RB@16|12@21|TX@31|","v2.07"
+"Load VSX Scalar Single-Precision Indexed X-form","lxsspx XT,RA,RB","31@0|T@6|RA@11|RB@16|524@21|TX@31|","v2.07"
+"Move From BHRB XFX-form","mfbhrbe RT,BHRBE","31@0|RT@6|BHRBE@11|302@21|/@31|","v2.07"
+"Move From VSR Doubleword X-form","mfvsrd RA,XS","31@0|S@6|RA@11|///@16|51@21|SX@31|","v2.07"
+"Move From VSR Word and Zero X-form","mfvsrwz RA,XS","31@0|S@6|RA@11|///@16|115@21|SX@31|","v2.07"
+"Message Clear X-form","msgclr RB","31@0|///@6|///@11|RB@16|238@21|/@31|","v2.07"
+"Message Clear Privileged X-form","msgclrp RB","31@0|///@6|///@11|RB@16|174@21|/@31|","v2.07"
+"Message Send X-form","msgsnd RB","31@0|///@6|///@11|RB@16|206@21|/@31|","v2.07"
+"Message Send Privileged X-form","msgsndp RB","31@0|///@6|///@11|RB@16|142@21|/@31|","v2.07"
+"Move To VSR Doubleword X-form","mtvsrd XT,RA","31@0|T@6|RA@11|///@16|179@21|TX@31|","v2.07"
+"Move To VSR Word Algebraic X-form","mtvsrwa XT,RA","31@0|T@6|RA@11|///@16|211@21|TX@31|","v2.07"
+"Move To VSR Word and Zero X-form","mtvsrwz XT,RA","31@0|T@6|RA@11|///@16|243@21|TX@31|","v2.07"
+"Return from Event Based Branch XL-form","rfebb S","19@0|///@6|///@11|///@16|S@20|146@21|/@31|","v2.07"
+"Store Quadword Conditional Indexed X-form","stqcx. RSp,RA,RB","31@0|RSp@6|RA@11|RB@16|182@21|1@31|","v2.07"
+"Store VSX Scalar as Integer Word Indexed X-form","stxsiwx XS,RA,RB","31@0|S@6|RA@11|RB@16|140@21|SX@31|","v2.07"
+"Store VSX Scalar Single-Precision Indexed X-form","stxsspx XS,RA,RB","31@0|S@6|RA@11|RB@16|652@21|SX@31|","v2.07"
+"Vector Add & write Carry Unsigned Quadword VX-form","vaddcuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|320@21|","v2.07"
+"Vector Add Extended & write Carry Unsigned Quadword VA-form","vaddecuq VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|61@26|","v2.07"
+"Vector Add Extended Unsigned Quadword Modulo VA-form","vaddeuqm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|60@26|","v2.07"
+"Vector Add Unsigned Doubleword Modulo VX-form","vaddudm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|192@21|","v2.07"
+"Vector Add Unsigned Quadword Modulo VX-form","vadduqm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|256@21|","v2.07"
+"Vector Bit Permute Quadword VX-form","vbpermq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1356@21|","v2.07"
+"Vector AES Cipher VX-form","vcipher VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1288@21|","v2.07"
+"Vector AES Cipher Last VX-form","vcipherlast VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1289@21|","v2.07"
+"Vector Count Leading Zeros Byte VX-form","vclzb VRT,VRB","4@0|VRT@6|///@11|VRB@16|1794@21|","v2.07"
+"Vector Count Leading Zeros Doubleword VX-form","vclzd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1986@21|","v2.07"
+"Vector Count Leading Zeros Halfword VX-form","vclzh VRT,VRB","4@0|VRT@6|///@11|VRB@16|1858@21|","v2.07"
+"Vector Count Leading Zeros Word VX-form","vclzw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1922@21|","v2.07"
+"Vector Compare Equal Unsigned Doubleword VC-form","vcmpequd VRT,VRA,VRB (Rc=0)|vcmpequd. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|199@22|","v2.07"
+"Vector Compare Greater Than Signed Doubleword VC-form","vcmpgtsd VRT,VRA,VRB (Rc=0)|vcmpgtsd. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|967@22|","v2.07"
+"Vector Compare Greater Than Unsigned Doubleword VC-form","vcmpgtud VRT,VRA,VRB (Rc=0)|vcmpgtud. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|711@22|","v2.07"
+"Vector Logical Equivalence VX-form","veqv VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1668@21|","v2.07"
+"Vector Gather Bits by Bytes by Doubleword VX-form","vgbbd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1292@21|","v2.07"
+"Vector Maximum Signed Doubleword VX-form","vmaxsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|450@21|","v2.07"
+"Vector Maximum Unsigned Doubleword VX-form","vmaxud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|194@21|","v2.07"
+"Vector Minimum Signed Doubleword VX-form","vminsd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|962@21|","v2.07"
+"Vector Minimum Unsigned Doubleword VX-form","vminud VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|706@21|","v2.07"
+"Vector Merge Even Word VX-form","vmrgew VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1932@21|","v2.07"
+"Vector Merge Odd Word VX-form","vmrgow VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1676@21|","v2.07"
+"Vector Multiply Even Signed Word VX-form","vmulesw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|904@21|","v2.07"
+"Vector Multiply Even Unsigned Word VX-form","vmuleuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|648@21|","v2.07"
+"Vector Multiply Odd Signed Word VX-form","vmulosw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|392@21|","v2.07"
+"Vector Multiply Odd Unsigned Word VX-form","vmulouw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|136@21|","v2.07"
+"Vector Multiply Unsigned Word Modulo VX-form","vmuluwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|137@21|","v2.07"
+"Vector Logical NAND VX-form","vnand VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1412@21|","v2.07"
+"Vector AES Inverse Cipher VX-form","vncipher VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1352@21|","v2.07"
+"Vector AES Inverse Cipher Last VX-form","vncipherlast VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1353@21|","v2.07"
+"Vector Logical OR with Complement VX-form","vorc VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1348@21|","v2.07"
+"Vector Permute & Exclusive-OR VA-form","vpermxor VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|45@26|","v2.07"
+"Vector Pack Signed Doubleword Signed Saturate VX-form","vpksdss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1486@21|","v2.07"
+"Vector Pack Signed Doubleword Unsigned Saturate VX-form","vpksdus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1358@21|","v2.07"
+"Vector Pack Unsigned Doubleword Unsigned Modulo VX-form","vpkudum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1102@21|","v2.07"
+"Vector Pack Unsigned Doubleword Unsigned Saturate VX-form","vpkudus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1230@21|","v2.07"
+"Vector Polynomial Multiply-Sum Byte VX-form","vpmsumb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1032@21|","v2.07"
+"Vector Polynomial Multiply-Sum Doubleword VX-form","vpmsumd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1224@21|","v2.07"
+"Vector Polynomial Multiply-Sum Halfword VX-form","vpmsumh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1096@21|","v2.07"
+"Vector Polynomial Multiply-Sum Word VX-form","vpmsumw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1160@21|","v2.07"
+"Vector Population Count Byte VX-form","vpopcntb VRT,VRB","4@0|VRT@6|///@11|VRB@16|1795@21|","v2.07"
+"Vector Population Count Doubleword VX-form","vpopcntd VRT,VRB","4@0|VRT@6|///@11|VRB@16|1987@21|","v2.07"
+"Vector Population Count Halfword VX-form","vpopcnth VRT,VRB","4@0|VRT@6|///@11|VRB@16|1859@21|","v2.07"
+"Vector Population Count Word VX-form","vpopcntw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1923@21|","v2.07"
+"Vector Rotate Left Doubleword VX-form","vrld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|196@21|","v2.07"
+"Vector AES SubBytes VX-form","vsbox VRT,VRA","4@0|VRT@6|VRA@11|///@16|1480@21|","v2.07"
+"Vector SHA-512 Sigma Doubleword VX-form","vshasigmad VRT,VRA,ST,SIX","4@0|VRT@6|VRA@11|ST@16|SIX@17|1730@21|","v2.07"
+"Vector SHA-256 Sigma Word VX-form","vshasigmaw VRT,VRA,ST,SIX","4@0|VRT@6|VRA@11|ST@16|SIX@17|1666@21|","v2.07"
+"Vector Shift Left Doubleword VX-form","vsld VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1476@21|","v2.07"
+"Vector Shift Right Algebraic Doubleword VX-form","vsrad VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|964@21|","v2.07"
+"Vector Shift Right Doubleword VX-form","vsrd VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1732@21|","v2.07"
+"Vector Subtract & write Carry-out Unsigned Quadword VX-form","vsubcuq VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1344@21|","v2.07"
+"Vector Subtract Extended & write Carry-out Unsigned Quadword VA-form","vsubecuq VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|63@26|","v2.07"
+"Vector Subtract Extended Unsigned Quadword Modulo VA-form","vsubeuqm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|62@26|","v2.07"
+"Vector Subtract Unsigned Doubleword Modulo VX-form","vsubudm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1216@21|","v2.07"
+"Vector Subtract Unsigned Quadword Modulo VX-form","vsubuqm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1280@21|","v2.07"
+"Vector Unpack High Signed Word VX-form","vupkhsw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1614@21|","v2.07"
+"Vector Unpack Low Signed Word VX-form","vupklsw VRT,VRB","4@0|VRT@6|///@11|VRB@16|1742@21|","v2.07"
+"VSX Scalar Add Single-Precision XX3-form","xsaddsp XT,XA,XB","60@0|T@6|A@11|B@16|0@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Convert Scalar Single-Precision to Vector Single-Precision format Non-signalling XX2-form","xscvdpspn XT,XB","60@0|T@6|///@11|B@16|267@21|BX@30|TX@31|","v2.07"
+"VSX Scalar Convert Single-Precision to Double-Precision format Non-signalling XX2-form","xscvspdpn XT,XB","60@0|T@6|///@11|B@16|331@21|BX@30|TX@31|","v2.07"
+"VSX Scalar Convert with round Signed Doubleword to Single-Precision format XX2-form","xscvsxdsp XT,XB","60@0|T@6|///@11|B@16|312@21|BX@30|TX@31|","v2.07"
+"VSX Scalar Convert with round Unsigned Doubleword to Single-Precision XX2-form","xscvuxdsp XT,XB","60@0|T@6|///@11|B@16|296@21|BX@30|TX@31|","v2.07"
+"VSX Scalar Divide Single-Precision XX3-form","xsdivsp XT,XA,XB","60@0|T@6|A@11|B@16|24@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Multiply-Add Type-A Single-Precision XX3-form","xsmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|1@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Multiply-Add Type-M Single-Precision XX3-form","xsmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|9@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Multiply-Subtract Type-A Single-Precision XX3-form","xsmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|17@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Multiply-Subtract Type-M Single-Precision XX3-form","xsmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|25@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Multiply Single-Precision XX3-form","xsmulsp XT,XA,XB","60@0|T@6|A@11|B@16|16@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Negative Multiply-Add Type-A Single-Precision XX3-form","xsnmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|129@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Negative Multiply-Add Type-M Single-Precision XX3-form","xsnmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|137@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Negative Multiply-Subtract Type-A Single-Precision XX3-form","xsnmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|145@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Negative Multiply-Subtract Type-M Single-Precision XX3-form","xsnmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|153@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Scalar Reciprocal Estimate Single-Precision XX2-form","xsresp XT,XB","60@0|T@6|///@11|B@16|26@21|BX@30|TX@31|","v2.07"
+"VSX Scalar Round to Single-Precision XX2-form","xsrsp XT,XB","60@0|T@6|///@11|B@16|281@21|BX@30|TX@31|","v2.07"
+"VSX Scalar Reciprocal Square Root Estimate Single-Precision XX2-form","xsrsqrtesp XT,XB","60@0|T@6|///@11|B@16|10@21|BX@30|TX@31|","v2.07"
+"VSX Scalar Square Root Single-Precision XX2-form","xssqrtsp XT,XB","60@0|T@6|///@11|B@16|11@21|BX@30|TX@31|","v2.07"
+"VSX Scalar Subtract Single-Precision XX3-form","xssubsp XT,XA,XB","60@0|T@6|A@11|B@16|8@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Vector Logical Equivalence XX3-form","xxleqv XT,XA,XB","60@0|T@6|A@11|B@16|186@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Vector Logical NAND XX3-form","xxlnand XT,XA,XB","60@0|T@6|A@11|B@16|178@21|AX@29|BX@30|TX@31|","v2.07"
+"VSX Vector Logical OR with Complement XX3-form","xxlorc XT,XA,XB","60@0|T@6|A@11|B@16|170@21|AX@29|BX@30|TX@31|","v2.07"
+"Add and Generate Sixes XO-form","addg6s RT,RA,RB","31@0|RT@6|RA@11|RB@16|/@21|74@22|/@31|","v2.06"
+"Bit Permute Doubleword X-form","bpermd RA,RS,RB","31@0|RS@6|RA@11|RB@16|252@21|/@31|","v2.06"
+"Convert Binary Coded Decimal To Declets X-form","cbcdtd RA, RS","31@0|RS@6|RA@11|///@16|314@21|/@31|","v2.06"
+"Convert Declets To Binary Coded Decimal X-form","cdtbcd RA, RS","31@0|RS@6|RA@11|///@16|282@21|/@31|","v2.06"
+"DFP Convert From Fixed X-form","dcffix FRT,FRB (Rc=0)|dcffix. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|802@21|Rc@31|","v2.06"
+"Divide Doubleword Extended XO-form","divde RT,RA,RB (OE=0 Rc=0)|divde. RT,RA,RB (OE=0 Rc=1)|divdeo RT,RA,RB (OE=1 Rc=0)|divdeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|425@22|Rc@31|","v2.06"
+"Divide Doubleword Extended Unsigned XO-form","divdeu RT,RA,RB (OE=0 Rc=0)|divdeu. RT,RA,RB (OE=0 Rc=1)|divdeuo RT,RA,RB (OE=1 Rc=0)|divdeuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|393@22|Rc@31|","v2.06"
+"Divide Word Extended XO-form","divwe RT,RA,RB (OE=0 Rc=0)|divwe. RT,RA,RB (OE=0 Rc=1)|divweo RT,RA,RB (OE=1 Rc=0)|divweo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|427@22|Rc@31|","v2.06"
+"Divide Word Extended Unsigned XO-form","divweu RT,RA,RB (OE=0 Rc=0)|divweu. RT,RA,RB (OE=0 Rc=1)|divweuo RT,RA,RB (OE=1 Rc=0)|divweuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|395@22|Rc@31|","v2.06"
+"Floating Convert with round Signed Doubleword to Single-Precision format X-form","fcfids FRT,FRB (Rc=0)|fcfids. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|846@21|Rc@31|","v2.06"
+"Floating Convert with round Unsigned Doubleword to Double-Precision format X-form","fcfidu FRT,FRB (Rc=0)|fcfidu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|974@21|Rc@31|","v2.06"
+"Floating Convert with round Unsigned Doubleword to Single-Precision format X-form","fcfidus FRT,FRB (Rc=0)|fcfidus. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|974@21|Rc@31|","v2.06"
+"Floating Convert with round Double-Precision To Unsigned Doubleword format X-form","fctidu FRT,FRB (Rc=0)|fctidu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|942@21|Rc@31|","v2.06"
+"Floating Convert with truncate Double-Precision To Unsigned Doubleword format X-form","fctiduz FRT,FRB (Rc=0)|fctiduz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|943@21|Rc@31|","v2.06"
+"Floating Convert with round Double-Precision To Unsigned Word format X-form","fctiwu FRT,FRB (Rc=0)|fctiwu. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|142@21|Rc@31|","v2.06"
+"Floating Convert with truncate Double-Precision To Unsigned Word format X-form","fctiwuz FRT,FRB (Rc=0)|fctiwuz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|143@21|Rc@31|","v2.06"
+"Floating Test for software Divide X-form","ftdiv BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|128@21|/@31|","v2.06"
+"Floating Test for software Square Root X-form","ftsqrt BF,FRB","63@0|BF@6|//@9|///@11|FRB@16|160@21|/@31|","v2.06"
+"Load Byte And Reserve Indexed X-form","lbarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|52@21|EH@31|","v2.06"
+"Load Doubleword Byte-Reverse Indexed X-form","ldbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|532@21|/@31|","v2.06"
+"Load Floating-Point as Integer Word & Zero Indexed X-form","lfiwzx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|887@21|/@31|","v2.06"
+"Load Halfword And Reserve Indexed Xform","lharx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|116@21|EH@31|","v2.06"
+"Load VSX Scalar Doubleword Indexed X-form","lxsdx XT,RA,RB","31@0|T@6|RA@11|RB@16|588@21|TX@31|","v2.06"
+"Load VSX Vector Doubleword*2 Indexed X-form","lxvd2x XT,RA,RB","31@0|T@6|RA@11|RB@16|844@21|TX@31|","v2.06"
+"Load VSX Vector Doubleword & Splat Indexed X-form","lxvdsx XT,RA,RB","31@0|T@6|RA@11|RB@16|332@21|TX@31|","v2.06"
+"Load VSX Vector Word*4 Indexed X-form","lxvw4x XT,RA,RB","31@0|T@6|RA@11|RB@16|780@21|TX@31|","v2.06"
+"Population Count Doubleword X-form","popcntd RA, RS","31@0|RS@6|RA@11|///@16|506@21|/@31|","v2.06"
+"Population Count Words X-form","popcntw RA, RS","31@0|RS@6|RA@11|///@16|378@21|/@31|","v2.06"
+"Store Byte Conditional Indexed X-form","stbcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|694@21|1@31|","v2.06"
+"Store Doubleword Byte-Reverse Indexed X-form","stdbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|660@21|/@31|","v2.06"
+"Store Halfword Conditional Indexed X-form","sthcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|726@21|1@31|","v2.06"
+"Store VSX Scalar Doubleword Indexed X-form","stxsdx XS,RA,RB","31@0|S@6|RA@11|RB@16|716@21|SX@31|","v2.06"
+"Store VSX Vector Doubleword*2 Indexed X-form","stxvd2x XS,RA,RB","31@0|S@6|RA@11|RB@16|972@21|SX@31|","v2.06"
+"Store VSX Vector Word*4 Indexed X-form","stxvw4x XS,RA,RB","31@0|S@6|RA@11|RB@16|908@21|SX@31|","v2.06"
+"VSX Scalar Absolute Double-Precision XX2-form","xsabsdp XT,XB","60@0|T@6|///@11|B@16|345@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Add Double-Precision XX3-form","xsadddp XT,XA,XB","60@0|T@6|A@11|B@16|32@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Compare Ordered Double-Precision XX3-form","xscmpodp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|43@21|AX@29|BX@30|/@31|","v2.06"
+"VSX Scalar Compare Unordered Double-Precision XX3-form","xscmpudp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|35@21|AX@29|BX@30|/@31|","v2.06"
+"VSX Scalar Copy Sign Double-Precision XX3-form","xscpsgndp XT,XA,XB","60@0|T@6|A@11|B@16|176@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Convert with round Double-Precision to Single-Precision format XX2-form","xscvdpsp XT,XB","60@0|T@6|///@11|B@16|265@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Convert with round to zero Double-Precision to Signed Doubleword format XX2-form","xscvdpsxds XT,XB","60@0|T@6|///@11|B@16|344@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Convert with round to zero Double-Precision to Signed Word format XX2-form","xscvdpsxws XT,XB","60@0|T@6|///@11|B@16|88@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Convert with round to zero Double-Precision to Unsigned Doubleword format XX2-form","xscvdpuxds XT,XB","60@0|T@6|///@11|B@16|328@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Convert with round to zero Double-Precision to Unsigned Word format XX2-form","xscvdpuxws XT,XB","60@0|T@6|///@11|B@16|72@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Convert Single-Precision to Double-Precision format XX2-form","xscvspdp XT,XB","60@0|T@6|///@11|B@16|329@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Convert with round Signed Doubleword to Double-Precision format XX2-form","xscvsxddp XT,XB","60@0|T@6|///@11|B@16|376@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Convert with round Unsigned Doubleword to Double-Precision format XX2-form","xscvuxddp XT,XB","60@0|T@6|///@11|B@16|360@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Divide Double-Precision XX3-form","xsdivdp XT,XA,XB","60@0|T@6|A@11|B@16|56@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Multiply-Add Type-A Double-Precision XX3-form","xsmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|33@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Multiply-Add Type-M Double-Precision XX3-form","xsmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|41@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Maximum Double-Precision XX3-form","xsmaxdp XT,XA,XB","60@0|T@6|A@11|B@16|160@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Minimum Double-Precision XX3-form","xsmindp XT,XA,XB","60@0|T@6|A@11|B@16|168@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Multiply-Subtract Type-A Double-Precision XX3-form","xsmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|49@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Multiply-Subtract Type-M Double-Precision XX3-form","xsmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|57@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Multiply Double-Precision XX3-form","xsmuldp XT,XA,XB","60@0|T@6|A@11|B@16|48@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Negative Absolute Double-Precision XX2-form","xsnabsdp XT,XB","60@0|T@6|///@11|B@16|361@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Negate Double-Precision XX2-form","xsnegdp XT,XB","60@0|T@6|///@11|B@16|377@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Negative Multiply-Add Type-A Double-Precision XX3-form","xsnmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|161@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Negative Multiply-Add Type-M Double-Precision XX3-form","xsnmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|169@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Negative Multiply-Subtract Type-A Double-Precision XX3-form","xsnmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|177@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Negative Multiply-Subtract Type-M Double-Precision XX3-form","xsnmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|185@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Round to Double-Precision Integer using round to Nearest Away XX2-form","xsrdpi XT,XB","60@0|T@6|///@11|B@16|73@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Round to Double-Precision Integer exact using Current rounding mode XX2-form","xsrdpic XT,XB","60@0|T@6|///@11|B@16|107@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Round to Double-Precision Integer using round toward -Infinity XX2-form","xsrdpim XT,XB","60@0|T@6|///@11|B@16|121@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Round to Double-Precision Integer using round toward +Infinity XX2-form","xsrdpip XT,XB","60@0|T@6|///@11|B@16|105@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Round to Double-Precision Integer using round toward Zero XX2-form","xsrdpiz XT,XB","60@0|T@6|///@11|B@16|89@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Reciprocal Estimate Double-Precision XX2-form","xsredp XT,XB","60@0|T@6|///@11|B@16|90@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Reciprocal Square Root Estimate Double-Precision XX2-form","xsrsqrtedp XT,XB","60@0|T@6|///@11|B@16|74@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Square Root Double-Precision XX2-form","xssqrtdp XT,XB","60@0|T@6|///@11|B@16|75@21|BX@30|TX@31|","v2.06"
+"VSX Scalar Subtract Double-Precision XX3-form","xssubdp XT,XA,XB","60@0|T@6|A@11|B@16|40@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Scalar Test for software Divide Double-Precision XX3-form","xstdivdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|61@21|AX@29|BX@30|/@31|","v2.06"
+"VSX Scalar Test for software Square Root Double-Precision XX2-form","xstsqrtdp BF,XB","60@0|BF@6|//@9|///@11|B@16|106@21|BX@30|/@31|","v2.06"
+"VSX Vector Absolute Value Double-Precision XX2-form","xvabsdp XT,XB","60@0|T@6|///@11|B@16|473@21|BX@30|TX@31|","v2.06"
+"VSX Vector Absolute Value Single-Precision XX2-form","xvabssp XT,XB","60@0|T@6|///@11|B@16|409@21|BX@30|TX@31|","v2.06"
+"VSX Vector Add Double-Precision XX3-form","xvadddp XT,XA,XB","60@0|T@6|A@11|B@16|96@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Add Single-Precision XX3-form","xvaddsp XT,XA,XB","60@0|T@6|A@11|B@16|64@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Compare Equal To Double-Precision XX3-form","xvcmpeqdp XT,XA,XB (Rc=0)|xvcmpeqdp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|99@22|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Compare Equal To Single-Precision XX3-form","xvcmpeqsp XT,XA,XB (Rc=0)|xvcmpeqsp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|67@22|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Compare Greater Than or Equal To Double-Precision XX3-form","xvcmpgedp XT,XA,XB (Rc=0)|xvcmpgedp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|115@22|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Compare Greater Than or Equal To Single-Precision XX3-form","xvcmpgesp XT,XA,XB (Rc=0)|xvcmpgesp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|83@22|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Compare Greater Than Double-Precision XX3-form","xvcmpgtdp XT,XA,XB (Rc=0)|xvcmpgtdp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|107@22|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Compare Greater Than Single-Precision XX3-form","xvcmpgtsp XT,XA,XB (Rc=0)|xvcmpgtsp. XT,XA,XB (Rc=1)","60@0|T@6|A@11|B@16|Rc@21|75@22|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Copy Sign Double-Precision XX3-form","xvcpsgndp XT,XA,XB","60@0|T@6|A@11|B@16|240@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Copy Sign Single-Precision XX3-form","xvcpsgnsp XT,XA,XB","60@0|T@6|A@11|B@16|208@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round Double-Precision to Single-Precision format XX2-form","xvcvdpsp XT,XB","60@0|T@6|///@11|B@16|393@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round to zero Double-Precision to Signed Doubleword format XX2-form","xvcvdpsxds XT,XB","60@0|T@6|///@11|B@16|472@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round to zero Double-Precision to Signed Word format XX2-form","xvcvdpsxws XT,XB","60@0|T@6|///@11|B@16|216@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round to zero Double-Precision to Unsigned Doubleword format XX2-form","xvcvdpuxds XT,XB","60@0|T@6|///@11|B@16|456@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round to zero Double-Precision to Unsigned Word format XX2-form","xvcvdpuxws XT,XB","60@0|T@6|///@11|B@16|200@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert Single-Precision to Double-Precision format XX2-form","xvcvspdp XT,XB","60@0|T@6|///@11|B@16|457@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round to zero Single-Precision to Signed Doubleword format XX2-form","xvcvspsxds XT,XB","60@0|T@6|///@11|B@16|408@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round to zero Single-Precision to Signed Word format XX2-form","xvcvspsxws XT,XB","60@0|T@6|///@11|B@16|152@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round to zero Single-Precision to Unsigned Doubleword format XX2-form","xvcvspuxds XT,XB","60@0|T@6|///@11|B@16|392@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round to zero Single-Precision to Unsigned Word format XX2-form","xvcvspuxws XT,XB","60@0|T@6|///@11|B@16|136@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round Signed Doubleword to Double-Precision format XX2-form","xvcvsxddp XT,XB","60@0|T@6|///@11|B@16|504@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round Signed Doubleword to Single-Precision format XX2-form","xvcvsxdsp XT,XB","60@0|T@6|///@11|B@16|440@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert Signed Word to Double-Precision format XX2-form","xvcvsxwdp XT,XB","60@0|T@6|///@11|B@16|248@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round Signed Word to Single-Precision format XX2-form","xvcvsxwsp XT,XB","60@0|T@6|///@11|B@16|184@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round Unsigned Doubleword to Double-Precision format XX2-form","xvcvuxddp XT,XB","60@0|T@6|///@11|B@16|488@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round Unsigned Doubleword to Single-Precision format XX2-form","xvcvuxdsp XT,XB","60@0|T@6|///@11|B@16|424@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert Unsigned Word to Double-Precision format XX2-form","xvcvuxwdp XT,XB","60@0|T@6|///@11|B@16|232@21|BX@30|TX@31|","v2.06"
+"VSX Vector Convert with round Unsigned Word to Single-Precision format XX2-form","xvcvuxwsp XT,XB","60@0|T@6|///@11|B@16|168@21|BX@30|TX@31|","v2.06"
+"VSX Vector Divide Double-Precision XX3-form","xvdivdp XT,XA,XB","60@0|T@6|A@11|B@16|120@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Divide Single-Precision XX3-form","xvdivsp XT,XA,XB","60@0|T@6|A@11|B@16|88@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Multiply-Add Type-A Double-Precision XX3-form","xvmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|97@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Multiply-Add Type-A Single-Precision XX3-form","xvmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|65@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Multiply-Add Type-M Double-Precision XX3-form","xvmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|105@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Multiply-Add Type-M Single-Precision XX3-form","xvmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|73@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Maximum Double-Precision XX3-form","xvmaxdp XT,XA,XB","60@0|T@6|A@11|B@16|224@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Maximum Single-Precision XX3-form","xvmaxsp XT,XA,XB","60@0|T@6|A@11|B@16|192@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Minimum Double-Precision XX3-form","xvmindp XT,XA,XB","60@0|T@6|A@11|B@16|232@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Minimum Single-Precision XX3-form","xvminsp XT,XA,XB","60@0|T@6|A@11|B@16|200@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Multiply-Subtract Type-A Double-Precision XX3-form","xvmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|113@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Multiply-Subtract Type-A Single-Precision XX3-form","xvmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|81@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Multiply-Subtract Type-M Double-Precision XX3-form","xvmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|121@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Multiply-Subtract Type-M Single-Precision XX3-form","xvmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|89@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Multiply Double-Precision XX3-form","xvmuldp XT,XA,XB","60@0|T@6|A@11|B@16|112@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Multiply Single-Precision XX3-form","xvmulsp XT,XA,XB","60@0|T@6|A@11|B@16|80@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Negative Absolute Double-Precision XX2-form","xvnabsdp XT,XB","60@0|T@6|///@11|B@16|489@21|BX@30|TX@31|","v2.06"
+"VSX Vector Negative Absolute Single-Precision XX2-form","xvnabssp XT,XB","60@0|T@6|///@11|B@16|425@21|BX@30|TX@31|","v2.06"
+"VSX Vector Negate Double-Precision XX2-form","xvnegdp XT,XB","60@0|T@6|///@11|B@16|505@21|BX@30|TX@31|","v2.06"
+"VSX Vector Negate Single-Precision XX2-form","xvnegsp XT,XB","60@0|T@6|///@11|B@16|441@21|BX@30|TX@31|","v2.06"
+"VSX Vector Negative Multiply-Add Type-A Double-Precision XX3-form","xvnmaddadp XT,XA,XB","60@0|T@6|A@11|B@16|225@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Negative Multiply-Add Type-A Single-Precision XX3-form","xvnmaddasp XT,XA,XB","60@0|T@6|A@11|B@16|193@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Negative Multiply-Add Type-M Double-Precision XX3-form","xvnmaddmdp XT,XA,XB","60@0|T@6|A@11|B@16|233@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Negative Multiply-Add Type-M Single-Precision XX3-form","xvnmaddmsp XT,XA,XB","60@0|T@6|A@11|B@16|201@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Negative Multiply-Subtract Type-A Double-Precision XX3-form","xvnmsubadp XT,XA,XB","60@0|T@6|A@11|B@16|241@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Negative Multiply-Subtract Type-A Single-Precision XX3-form","xvnmsubasp XT,XA,XB","60@0|T@6|A@11|B@16|209@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Negative Multiply-Subtract Type-M Double-Precision XX3-form","xvnmsubmdp XT,XA,XB","60@0|T@6|A@11|B@16|249@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Negative Multiply-Subtract Type-M Single-Precision XX3-form","xvnmsubmsp XT,XA,XB","60@0|T@6|A@11|B@16|217@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Round to Double-Precision Integer using round to Nearest Away XX2-form","xvrdpi XT,XB","60@0|T@6|///@11|B@16|201@21|BX@30|TX@31|","v2.06"
+"VSX Vector Round to Double-Precision Integer Exact using Current rounding mode XX2-form","xvrdpic XT,XB","60@0|T@6|///@11|B@16|235@21|BX@30|TX@31|","v2.06"
+"VSX Vector Round to Double-Precision Integer using round toward -Infinity XX2-form","xvrdpim XT,XB","60@0|T@6|///@11|B@16|249@21|BX@30|TX@31|","v2.06"
+"VSX Vector Round to Double-Precision Integer using round toward +Infinity XX2-form","xvrdpip XT,XB","60@0|T@6|///@11|B@16|233@21|BX@30|TX@31|","v2.06"
+"VSX Vector Round to Double-Precision Integer using round toward Zero XX2-form","xvrdpiz XT,XB","60@0|T@6|///@11|B@16|217@21|BX@30|TX@31|","v2.06"
+"VSX Vector Reciprocal Estimate Double-Precision XX2-form","xvredp XT,XB","60@0|T@6|///@11|B@16|218@21|BX@30|TX@31|","v2.06"
+"VSX Vector Reciprocal Estimate Single-Precision XX2-form","xvresp XT,XB","60@0|T@6|///@11|B@16|154@21|BX@30|TX@31|","v2.06"
+"VSX Vector Round to Single-Precision Integer using round to Nearest Away XX2-form","xvrspi XT,XB","60@0|T@6|///@11|B@16|137@21|BX@30|TX@31|","v2.06"
+"VSX Vector Round to Single-Precision Integer Exact using Current rounding mode XX2-form","xvrspic XT,XB","60@0|T@6|///@11|B@16|171@21|BX@30|TX@31|","v2.06"
+"VSX Vector Round to Single-Precision Integer using round toward -Infinity XX2-form","xvrspim XT,XB","60@0|T@6|///@11|B@16|185@21|BX@30|TX@31|","v2.06"
+"VSX Vector Round to Single-Precision Integer using round toward +Infinity XX2-form","xvrspip XT,XB","60@0|T@6|///@11|B@16|169@21|BX@30|TX@31|","v2.06"
+"VSX Vector Round to Single-Precision Integer using round toward Zero XX2-form","xvrspiz XT,XB","60@0|T@6|///@11|B@16|153@21|BX@30|TX@31|","v2.06"
+"VSX Vector Reciprocal Square Root Estimate Double-Precision XX2-form","xvrsqrtedp XT,XB","60@0|T@6|///@11|B@16|202@21|BX@30|TX@31|","v2.06"
+"VSX Vector Reciprocal Square Root Estimate Single-Precision XX2-form","xvrsqrtesp XT,XB","60@0|T@6|///@11|B@16|138@21|BX@30|TX@31|","v2.06"
+"VSX Vector Square Root Double-Precision XX2-form","xvsqrtdp XT,XB","60@0|T@6|///@11|B@16|203@21|BX@30|TX@31|","v2.06"
+"VSX Vector Square Root Single-Precision XX2-form","xvsqrtsp XT,XB","60@0|T@6|///@11|B@16|139@21|BX@30|TX@31|","v2.06"
+"VSX Vector Subtract Double-Precision XX3-form","xvsubdp XT,XA,XB","60@0|T@6|A@11|B@16|104@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Subtract Single-Precision XX3-form","xvsubsp XT,XA,XB","60@0|T@6|A@11|B@16|72@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Test for software Divide Double-Precision XX3-form","xvtdivdp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|125@21|AX@29|BX@30|/@31|","v2.06"
+"VSX Vector Test for software Divide Single-Precision XX3-form","xvtdivsp BF,XA,XB","60@0|BF@6|//@9|A@11|B@16|93@21|AX@29|BX@30|/@31|","v2.06"
+"VSX Vector Test for software Square Root Double-Precision XX2-form","xvtsqrtdp BF,XB","60@0|BF@6|//@9|///@11|B@16|234@21|BX@30|/@31|","v2.06"
+"VSX Vector Test for software Square Root Single-Precision XX2-form","xvtsqrtsp BF,XB","60@0|BF@6|//@9|///@11|B@16|170@21|BX@30|/@31|","v2.06"
+"VSX Vector Logical AND XX3-form","xxland XT,XA,XB","60@0|T@6|A@11|B@16|130@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Logical AND with Complement XX3-form","xxlandc XT,XA,XB","60@0|T@6|A@11|B@16|138@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Logical NOR XX3-form","xxlnor XT,XA,XB","60@0|T@6|A@11|B@16|162@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Logical OR XX3-form","xxlor XT,XA,XB","60@0|T@6|A@11|B@16|146@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Logical XOR XX3-form","xxlxor XT,XA,XB","60@0|T@6|A@11|B@16|154@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Merge High Word XX3-form","xxmrghw XT,XA,XB","60@0|T@6|A@11|B@16|18@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Merge Low Word XX3-form","xxmrglw XT,XA,XB","60@0|T@6|A@11|B@16|50@21|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Permute Doubleword Immediate XX3-form","xxpermdi XT,XA,XB,DM","60@0|T@6|A@11|B@16|0@21|DM@22|10@24|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Select XX4-form","xxsel XT,XA,XB,XC","60@0|T@6|A@11|B@16|C@21|3@26|CX@28|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Shift Left Double by Word Immediate XX3-form","xxsldwi XT,XA,XB,SHW","60@0|T@6|A@11|B@16|0@21|SHW@22|2@24|AX@29|BX@30|TX@31|","v2.06"
+"VSX Vector Splat Word XX2-form","xxspltw XT,XB,UIM","60@0|T@6|///@11|UIM@14|B@16|164@21|BX@30|TX@31|","v2.06"
+"Compare Bytes X-form","cmpb RA,RS,RB","31@0|RS@6|RA@11|RB@16|508@21|/@31|","v2.05"
+"DFP Add X-form","dadd FRT,FRA,FRB (Rc=0)|dadd. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|2@21|Rc@31|","v2.05"
+"DFP Add Quad X-form","daddq FRTp,FRAp,FRBp (Rc=0)|daddq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|2@21|Rc@31|","v2.05"
+"DFP Convert From Fixed Quad X-form","dcffixq FRTp,FRB (Rc=0)|dcffixq. FRTp,FRB (Rc=1)","63@0|FRTp@6|///@11|FRB@16|802@21|Rc@31|","v2.05"
+"DFP Compare Ordered X-form","dcmpo BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|130@21|/@31|","v2.05"
+"DFP Compare Ordered Quad X-form","dcmpoq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|130@21|/@31|","v2.05"
+"DFP Compare Unordered X-form","dcmpu BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|642@21|/@31|","v2.05"
+"DFP Compare Unordered Quad X-form","dcmpuq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|642@21|/@31|","v2.05"
+"DFP Convert To DFP Long X-form","dctdp FRT,FRB (Rc=0)|dctdp. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|258@21|Rc@31|","v2.05"
+"DFP Convert To Fixed X-form","dctfix FRT,FRB (Rc=0)|dctfix. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|290@21|Rc@31|","v2.05"
+"DFP Convert To Fixed Quad X-form","dctfixq FRT,FRBp (Rc=0)|dctfixq. FRT,FRBp (Rc=1)","63@0|FRT@6|///@11|FRBp@16|290@21|Rc@31|","v2.05"
+"DFP Convert To DFP Extended X-form","dctqpq FRTp,FRB (Rc=0)|dctqpq. FRTp,FRB (Rc=1)","63@0|FRTp@6|///@11|FRB@16|258@21|Rc@31|","v2.05"
+"DFP Decode DPD To BCD X-form","ddedpd SP,FRT,FRB (Rc=0)|ddedpd. SP,FRT,FRB (Rc=1)","59@0|FRT@6|SP@11|///@13|FRB@16|322@21|Rc@31|","v2.05"
+"DFP Decode DPD To BCD Quad X-form","ddedpdq SP,FRTp,FRBp (Rc=0)|ddedpdq. SP,FRTp,FRBp (Rc=1)","63@0|FRTp@6|SP@11|///@13|FRBp@16|322@21|Rc@31|","v2.05"
+"DFP Divide X-form","ddiv FRT,FRA,FRB (Rc=0)|ddiv. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|546@21|Rc@31|","v2.05"
+"DFP Divide Quad X-form","ddivq FRTp,FRAp,FRBp (Rc=0)|ddivq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|546@21|Rc@31|","v2.05"
+"DFP Encode BCD To DPD X-form","denbcd S,FRT,FRB (Rc=0)|denbcd. S,FRT,FRB (Rc=1)","59@0|FRT@6|S@11|///@12|FRB@16|834@21|Rc@31|","v2.05"
+"DFP Encode BCD To DPD Quad X-form","denbcdq S,FRTp,FRBp (Rc=0)|denbcdq. S,FRTp,FRBp (Rc=1)","63@0|FRTp@6|S@11|///@12|FRBp@16|834@21|Rc@31|","v2.05"
+"DFP Insert Biased Exponent X-form","diex FRT,FRA,FRB (Rc=0)|diex. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|866@21|Rc@31|","v2.05"
+"DFP Insert Biased Exponent Quad X-form","diexq FRTp,FRA,FRBp|diexq. FRTp,FRA,FRBp (Rc=1)","63@0|FRTp@6|FRA@11|FRBp@16|866@21|Rc@31|","v2.05"
+"DFP Multiply X-form","dmul FRT,FRA,FRB (Rc=0)|dmul. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|34@21|Rc@31|","v2.05"
+"DFP Multiply Quad X-form","dmulq FRTp,FRAp,FRBp (Rc=0)|dmulq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|34@21|Rc@31|","v2.05"
+"DFP Quantize Z23-form","dqua FRT,FRA,FRB,RMC (Rc=0)|dqua. FRT,FRA,FRB,RMC (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|RMC@21|3@23|Rc@31|","v2.05"
+"DFP Quantize Immediate Z23-form","dquai TE,FRT,FRB,RMC (Rc=0)|dquai. TE,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|TE@11|FRB@16|RMC@21|67@23|Rc@31|","v2.05"
+"DFP Quantize Immediate Quad Z23-form","dquaiq TE,FRTp,FRBp,RMC (Rc=0)|dquaiq. TE,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|TE@11|FRBp@16|RMC@21|67@23|Rc@31|","v2.05"
+"DFP Quantize Quad Z23-form","dquaq FRTp,FRAp,FRBp,RMC (Rc=0)|dquaq. FRTp,FRAp,FRBp,RMC (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|RMC@21|3@23|Rc@31|","v2.05"
+"DFP Round To DFP Long X-form","drdpq FRTp,FRBp (Rc=0)|drdpq. FRTp,FRBp (Rc=1)","63@0|FRTp@6|///@11|FRBp@16|770@21|Rc@31|","v2.05"
+"DFP Round To FP Integer Without Inexact Z23-form","drintn R,FRT,FRB,RMC (Rc=0)|drintn. R,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|///@11|R@15|FRB@16|RMC@21|227@23|Rc@31|","v2.05"
+"DFP Round To FP Integer Without Inexact Quad Z23-form","drintnq R,FRTp,FRBp,RMC (Rc=0)|drintnq. R,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|///@11|R@15|FRBp@16|RMC@21|227@23|Rc@31|","v2.05"
+"DFP Round To FP Integer With Inexact Z23-form","drintx R,FRT,FRB,RMC (Rc=0)|drintx. R,FRT,FRB,RMC (Rc=1)","59@0|FRT@6|///@11|R@15|FRB@16|RMC@21|99@23|Rc@31|","v2.05"
+"DFP Round To FP Integer With Inexact Quad Z23-form","drintxq R,FRTp,FRBp,RMC (Rc=0)|drintxq. R,FRTp,FRBp,RMC (Rc=1)","63@0|FRTp@6|///@11|R@15|FRBp@16|RMC@21|99@23|Rc@31|","v2.05"
+"DFP Reround Z23-form","drrnd FRT,FRA,FRB,RMC (Rc=0)|drrnd. FRT,FRA,FRB,RMC (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|RMC@21|35@23|Rc@31|","v2.05"
+"DFP Reround Quad Z23-form","drrndq FRTp,FRA,FRBp,RMC (Rc=0)|drrndq. FRTp,FRA,FRBp,RMC (Rc=1)","63@0|FRTp@6|FRA@11|FRBp@16|RMC@21|35@23|Rc@31|","v2.05"
+"DFP Round To DFP Short X-form","drsp FRT,FRB (Rc=0)|drsp. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|770@21|Rc@31|","v2.05"
+"DFP Shift Significand Left Immediate Z22-form","dscli FRT,FRA,SH (Rc=0)|dscli. FRT,FRA,SH (Rc=1)","59@0|FRT@6|FRA@11|SH@16|66@22|Rc@31|","v2.05"
+"DFP Shift Significand Left Immediate Quad Z22-form","dscliq FRTp,FRAp,SH (Rc=0)|dscliq. FRTp,FRAp,SH (Rc=1)","63@0|FRTp@6|FRAp@11|SH@16|66@22|Rc@31|","v2.05"
+"DFP Shift Significand Right Immediate Z22-form","dscri FRT,FRA,SH (Rc=0)|dscri. FRT,FRA,SH (Rc=1)","59@0|FRT@6|FRA@11|SH@16|98@22|Rc@31|","v2.05"
+"DFP Shift Significand Right Immediate Quad Z22-form","dscriq FRTp,FRAp,SH (Rc=0)|dscriq. FRTp,FRAp,SH (Rc=1)","63@0|FRTp@6|FRAp@11|SH@16|98@22|Rc@31|","v2.05"
+"DFP Subtract X-form","dsub FRT,FRA,FRB (Rc=0)|dsub. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|514@21|Rc@31|","v2.05"
+"DFP Subtract Quad X-form","dsubq FRTp,FRAp,FRBp (Rc=0)|dsubq. FRTp,FRAp,FRBp (Rc=1)","63@0|FRTp@6|FRAp@11|FRBp@16|514@21|Rc@31|","v2.05"
+"DFP Test Data Class Z22-form","dtstdc BF,FRA,DCM","59@0|BF@6|//@9|FRA@11|DCM@16|194@22|/@31|","v2.05"
+"DFP Test Data Class Quad Z22-form","dtstdcq BF,FRAp,DCM","63@0|BF@6|//@9|FRAp@11|DCM@16|194@22|/@31|","v2.05"
+"DFP Test Data Group Z22-form","dtstdg BF,FRA,DGM","59@0|BF@6|//@9|FRA@11|DGM@16|226@22|/@31|","v2.05"
+"DFP Test Data Group Quad Z22-form","dtstdgq BF,FRAp,DGM","63@0|BF@6|//@9|FRAp@11|DGM@16|226@22|/@31|","v2.05"
+"DFP Test Exponent X-form","dtstex BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|162@21|/@31|","v2.05"
+"DFP Test Exponent Quad X-form","dtstexq BF,FRAp,FRBp","63@0|BF@6|//@9|FRAp@11|FRBp@16|162@21|/@31|","v2.05"
+"DFP Test Significance X-form","dtstsf BF,FRA,FRB","59@0|BF@6|//@9|FRA@11|FRB@16|674@21|/@31|","v2.05"
+"DFP Test Significance Quad X-form","dtstsfq BF,FRA,FRBp","63@0|BF@6|//@9|FRA@11|FRBp@16|674@21|/@31|","v2.05"
+"DFP Extract Biased Exponent X-form","dxex FRT,FRB (Rc=0)|dxex. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|354@21|Rc@31|","v2.05"
+"DFP Extract Biased Exponent Quad X-form","dxexq FRT,FRBp (Rc=0)|dxexq. FRT,FRBp (Rc=1)","63@0|FRT@6|///@11|FRBp@16|354@21|Rc@31|","v2.05"
+"Floating Copy Sign X-form","fcpsgn FRT, FRA, FRB (Rc=0)|fcpsgn. FRT, FRA, FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|8@21|Rc@31|","v2.05"
+"Load Byte & Zero Caching Inhibited Indexed X-form","lbzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|853@21|/@31|","v2.05"
+"Load Doubleword Caching Inhibited Indexed X-form","ldcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|885@21|/@31|","v2.05"
+"Load Floating-Point Double Pair DS-form","lfdp FRTp,DS(RA)","57@0|FRTp@6|RA@11|DS@16|0@30|","v2.05"
+"Load Floating-Point Double Pair Indexed X-form","lfdpx FRTp,RA,RB","31@0|FRTp@6|RA@11|RB@16|791@21|/@31|","v2.05"
+"Load Floating-Point as Integer Word Algebraic Indexed X-form","lfiwax FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|855@21|/@31|","v2.05"
+"Load Halfword & Zero Caching Inhibited Indexed X-form","lhzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|821@21|/@31|","v2.05"
+"Load Word & Zero Caching Inhibited Indexed X-form","lwzcix RT,RA,RB","31@0|RT@6|RA@11|RB@16|789@21|/@31|","v2.05"
+"Parity Doubleword X-form","prtyd RA,RS","31@0|RS@6|RA@11|///@16|186@21|/@31|","v2.05"
+"Parity Word X-form","prtyw RA,RS","31@0|RS@6|RA@11|///@16|154@21|/@31|","v2.05"
+"SLB Find Entry ESID X-form","slbfee. RT,RB","31@0|RT@6|///@11|RB@16|979@21|1@31|","v2.05"
+"Store Byte Caching Inhibited Indexed X-form","stbcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|981@21|/@31|","v2.05"
+"Store Doubleword Caching Inhibited Indexed X-form","stdcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|1013@21|/@31|","v2.05"
+"Store Floating-Point Double Pair DS-form","stfdp FRSp,DS(RA)","61@0|FRSp@6|RA@11|DS@16|0@30|","v2.05"
+"Store Floating-Point Double Pair Indexed X-form","stfdpx FRSp,RA,RB","31@0|FRSp@6|RA@11|RB@16|919@21|/@31|","v2.05"
+"Store Halfword Caching Inhibited Indexed X-form","sthcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|949@21|/@31|","v2.05"
+"Store Word Caching Inhibited Indexed X-form","stwcix RS,RA,RB","31@0|RS@6|RA@11|RB@16|917@21|/@31|","v2.05"
+"Integer Select A-form","isel RT,RA,RB,BC","31@0|RT@6|RA@11|RB@16|BC@21|15@26|/@31|","v2.03"
+"Load Vector Element Byte Indexed X-form","lvebx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|7@21|/@31|","v2.03"
+"Load Vector Element Halfword Indexed X-form","lvehx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|39@21|/@31|","v2.03"
+"Load Vector Element Word Indexed X-form","lvewx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|71@21|/@31|","v2.03"
+"Load Vector for Shift Left Indexed X-form","lvsl VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|6@21|/@31|","v2.03"
+"Load Vector for Shift Right Indexed X-form","lvsr VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|38@21|/@31|","v2.03"
+"Load Vector Indexed X-form","lvx VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|103@21|/@31|","v2.03"
+"Load Vector Indexed Last X-form","lvxl VRT,RA,RB","31@0|VRT@6|RA@11|RB@16|359@21|/@31|","v2.03"
+"Move From Vector Status and Control Register VX-form","mfvscr VRT","4@0|VRT@6|///@11|///@16|1540@21|","v2.03"
+"Move To Vector Status and Control Register VX-form","mtvscr VRB","4@0|///@6|///@11|VRB@16|1604@21|","v2.03"
+"Store Vector Element Byte Indexed X-form","stvebx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|135@21|/@31|","v2.03"
+"Store Vector Element Halfword Indexed X-form","stvehx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|167@21|/@31|","v2.03"
+"Store Vector Element Word Indexed X-form","stvewx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|199@21|/@31|","v2.03"
+"Store Vector Indexed X-form","stvx VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|231@21|/@31|","v2.03"
+"Store Vector Indexed Last X-form","stvxl VRS,RA,RB","31@0|VRS@6|RA@11|RB@16|487@21|/@31|","v2.03"
+"TLB Invalidate Entry Local X-form","tlbiel RB,RS,RIC,PRS,R","31@0|RS@6|/@11|RIC@12|PRS@14|R@15|RB@16|274@21|/@31|","v2.03"
+"Vector Add & write Carry Unsigned Word VX-form","vaddcuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|384@21|","v2.03"
+"Vector Add Floating-Point VX-form","vaddfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|10@21|","v2.03"
+"Vector Add Signed Byte Saturate VX-form","vaddsbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|768@21|","v2.03"
+"Vector Add Signed Halfword Saturate VX-form","vaddshs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|832@21|","v2.03"
+"Vector Add Signed Word Saturate VX-form","vaddsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|896@21|","v2.03"
+"Vector Add Unsigned Byte Modulo VX-form","vaddubm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|0@21|","v2.03"
+"Vector Add Unsigned Byte Saturate VX-form","vaddubs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|512@21|","v2.03"
+"Vector Add Unsigned Halfword Modulo VX-form","vadduhm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|64@21|","v2.03"
+"Vector Add Unsigned Halfword Saturate VX-form","vadduhs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|576@21|","v2.03"
+"Vector Add Unsigned Word Modulo VX-form","vadduwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|128@21|","v2.03"
+"Vector Add Unsigned Word Saturate VX-form","vadduws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|640@21|","v2.03"
+"Vector Logical AND VX-form","vand VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1028@21|","v2.03"
+"Vector Logical AND with Complement VX-form","vandc VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1092@21|","v2.03"
+"Vector Average Signed Byte VX-form","vavgsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1282@21|","v2.03"
+"Vector Average Signed Halfword VX-form","vavgsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1346@21|","v2.03"
+"Vector Average Signed Word VX-form","vavgsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1410@21|","v2.03"
+"Vector Average Unsigned Byte VX-form","vavgub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1026@21|","v2.03"
+"Vector Average Unsigned Halfword VX-form","vavguh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1090@21|","v2.03"
+"Vector Average Unsigned Word VX-form","vavguw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1154@21|","v2.03"
+"Vector Convert with round to nearest From Signed Word to floating-point format VX-form","vcfsx VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|842@21|","v2.03"
+"Vector Convert with round to nearest From Unsigned Word to floating-point format VX-form","vcfux VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|778@21|","v2.03"
+"Vector Compare Bounds Floating-Point VC-form","vcmpbfp VRT,VRA,VRB (Rc=0)|vcmpbfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|966@22|","v2.03"
+"Vector Compare Equal Floating-Point VC-form","vcmpeqfp VRT,VRA,VRB (Rc=0)|vcmpeqfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|198@22|","v2.03"
+"Vector Compare Equal Unsigned Byte VC-form","vcmpequb VRT,VRA,VRB (Rc=0)|vcmpequb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|6@22|","v2.03"
+"Vector Compare Equal Unsigned Halfword VC-form","vcmpequh VRT,VRA,VRB (Rc=0)|vcmpequh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|70@22|","v2.03"
+"Vector Compare Equal Unsigned Word VC-form","vcmpequw VRT,VRA,VRB (Rc=0)|vcmpequw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|134@22|","v2.03"
+"Vector Compare Greater Than or Equal Floating-Point VC-form","vcmpgefp VRT,VRA,VRB (Rc=0)|vcmpgefp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|454@22|","v2.03"
+"Vector Compare Greater Than Floating-Point VC-form","vcmpgtfp VRT,VRA,VRB (Rc=0)|vcmpgtfp. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|710@22|","v2.03"
+"Vector Compare Greater Than Signed Byte VC-form","vcmpgtsb VRT,VRA,VRB (Rc=0)|vcmpgtsb. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|774@22|","v2.03"
+"Vector Compare Greater Than Signed Halfword VC-form","vcmpgtsh VRT,VRA,VRB (Rc=0)|vcmpgtsh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|838@22|","v2.03"
+"Vector Compare Greater Than Signed Word VC-form","vcmpgtsw VRT,VRA,VRB (Rc=0)|vcmpgtsw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|902@22|","v2.03"
+"Vector Compare Greater Than Unsigned Byte VC-form","vcmpgtub VRT,VRA,VRB (Rc=0)|vcmpgtub. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|518@22|","v2.03"
+"Vector Compare Greater Than Unsigned Halfword VC-form","vcmpgtuh VRT,VRA,VRB (Rc=0)|vcmpgtuh. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|582@22|","v2.03"
+"Vector Compare Greater Than Unsigned Word VC-form","vcmpgtuw VRT,VRA,VRB (Rc=0)|vcmpgtuw. VRT,VRA,VRB (Rc=1)","4@0|VRT@6|VRA@11|VRB@16|Rc@21|646@22|","v2.03"
+"Vector Convert with round to zero from floating-point To Signed Word format Saturate VX-form","vctsxs VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|970@21|","v2.03"
+"Vector Convert with round to zero from floating-point To Unsigned Word format Saturate VX-form","vctuxs VRT,VRB,UIM","4@0|VRT@6|UIM@11|VRB@16|906@21|","v2.03"
+"Vector 2 Raised to the Exponent Estimate Floating-Point VX-form","vexptefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|394@21|","v2.03"
+"Vector Log Base 2 Estimate Floating-Point VX-form","vlogefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|458@21|","v2.03"
+"Vector Multiply-Add Floating-Point VA-form","vmaddfp VRT,VRA,VRC,VRB","4@0|VRT@6|VRA@11|VRB@16|VRC@21|46@26|","v2.03"
+"Vector Maximum Floating-Point VX-form","vmaxfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1034@21|","v2.03"
+"Vector Maximum Signed Byte VX-form","vmaxsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|258@21|","v2.03"
+"Vector Maximum Signed Halfword VX-form","vmaxsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|322@21|","v2.03"
+"Vector Maximum Signed Word VX-form","vmaxsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|386@21|","v2.03"
+"Vector Maximum Unsigned Byte VX-form","vmaxub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|2@21|","v2.03"
+"Vector Maximum Unsigned Halfword VX-form","vmaxuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|66@21|","v2.03"
+"Vector Maximum Unsigned Word VX-form","vmaxuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|130@21|","v2.03"
+"Vector Multiply-High-Add Signed Halfword Saturate VA-form","vmhaddshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|32@26|","v2.03"
+"Vector Multiply-High-Round-Add Signed Halfword Saturate VA-form","vmhraddshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|33@26|","v2.03"
+"Vector Minimum Floating-Point VX-form","vminfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1098@21|","v2.03"
+"Vector Minimum Signed Byte VX-form","vminsb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|770@21|","v2.03"
+"Vector Minimum Signed Halfword VX-form","vminsh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|834@21|","v2.03"
+"Vector Minimum Signed Word VX-form","vminsw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|898@21|","v2.03"
+"Vector Minimum Unsigned Byte VX-form","vminub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|514@21|","v2.03"
+"Vector Minimum Unsigned Halfword VX-form","vminuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|578@21|","v2.03"
+"Vector Minimum Unsigned Word VX-form","vminuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|642@21|","v2.03"
+"Vector Multiply-Low-Add Unsigned Halfword Modulo VA-form","vmladduhm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|34@26|","v2.03"
+"Vector Merge High Byte VX-form","vmrghb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|12@21|","v2.03"
+"Vector Merge High Halfword VX-form","vmrghh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|76@21|","v2.03"
+"Vector Merge High Word VX-form","vmrghw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|140@21|","v2.03"
+"Vector Merge Low Byte VX-form","vmrglb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|268@21|","v2.03"
+"Vector Merge Low Halfword VX-form","vmrglh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|332@21|","v2.03"
+"Vector Merge Low Word VX-form","vmrglw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|396@21|","v2.03"
+"Vector Multiply-Sum Mixed Byte Modulo VA-form","vmsummbm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|37@26|","v2.03"
+"Vector Multiply-Sum Signed Halfword Modulo VA-form","vmsumshm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|40@26|","v2.03"
+"Vector Multiply-Sum Signed Halfword Saturate VA-form","vmsumshs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|41@26|","v2.03"
+"Vector Multiply-Sum Unsigned Byte Modulo VA-form","vmsumubm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|36@26|","v2.03"
+"Vector Multiply-Sum Unsigned Halfword Modulo VA-form","vmsumuhm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|38@26|","v2.03"
+"Vector Multiply-Sum Unsigned Halfword Saturate VA-form","vmsumuhs VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|39@26|","v2.03"
+"Vector Multiply Even Signed Byte VX-form","vmulesb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|776@21|","v2.03"
+"Vector Multiply Even Signed Halfword VX-form","vmulesh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|840@21|","v2.03"
+"Vector Multiply Even Unsigned Byte VX-form","vmuleub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|520@21|","v2.03"
+"Vector Multiply Even Unsigned Halfword VX-form","vmuleuh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|584@21|","v2.03"
+"Vector Multiply Odd Signed Byte VX-form","vmulosb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|264@21|","v2.03"
+"Vector Multiply Odd Signed Halfword VX-form","vmulosh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|328@21|","v2.03"
+"Vector Multiply Odd Unsigned Byte VX-form","vmuloub VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|8@21|","v2.03"
+"Vector Multiply Odd Unsigned Halfword VX-form","vmulouh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|72@21|","v2.03"
+"Vector Negative Multiply-Subtract Floating-Point VA-form","vnmsubfp VRT,VRA,VRC,VRB","4@0|VRT@6|VRA@11|VRB@16|VRC@21|47@26|","v2.03"
+"Vector Logical NOR VX-form","vnor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1284@21|","v2.03"
+"Vector Logical OR VX-form","vor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1156@21|","v2.03"
+"Vector Permute VA-form","vperm VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|43@26|","v2.03"
+"Vector Pack Pixel VX-form","vpkpx VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|782@21|","v2.03"
+"Vector Pack Signed Halfword Signed Saturate VX-form","vpkshss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|398@21|","v2.03"
+"Vector Pack Signed Halfword Unsigned Saturate VX-form","vpkshus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|270@21|","v2.03"
+"Vector Pack Signed Word Signed Saturate VX-form","vpkswss VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|462@21|","v2.03"
+"Vector Pack Signed Word Unsigned Saturate VX-form","vpkswus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|334@21|","v2.03"
+"Vector Pack Unsigned Halfword Unsigned Modulo VX-form","vpkuhum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|14@21|","v2.03"
+"Vector Pack Unsigned Halfword Unsigned Saturate VX-form","vpkuhus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|142@21|","v2.03"
+"Vector Pack Unsigned Word Unsigned Modulo VX-form","vpkuwum VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|78@21|","v2.03"
+"Vector Pack Unsigned Word Unsigned Saturate VX-form","vpkuwus VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|206@21|","v2.03"
+"Vector Reciprocal Estimate Floating-Point VX-form","vrefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|266@21|","v2.03"
+"Vector Round to Floating-Point Integer toward -Infinity VX-form","vrfim VRT,VRB","4@0|VRT@6|///@11|VRB@16|714@21|","v2.03"
+"Vector Round to Floating-Point Integer Nearest VX-form","vrfin VRT,VRB","4@0|VRT@6|///@11|VRB@16|522@21|","v2.03"
+"Vector Round to Floating-Point Integer toward +Infinity VX-form","vrfip VRT,VRB","4@0|VRT@6|///@11|VRB@16|650@21|","v2.03"
+"Vector Round to Floating-Point Integer toward Zero VX-form","vrfiz VRT,VRB","4@0|VRT@6|///@11|VRB@16|586@21|","v2.03"
+"Vector Rotate Left Byte VX-form","vrlb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|4@21|","v2.03"
+"Vector Rotate Left Halfword VX-form","vrlh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|68@21|","v2.03"
+"Vector Rotate Left Word VX-form","vrlw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|132@21|","v2.03"
+"Vector Reciprocal Square Root Estimate Floating-Point VX-form","vrsqrtefp VRT,VRB","4@0|VRT@6|///@11|VRB@16|330@21|","v2.03"
+"Vector Select VA-form","vsel VRT,VRA,VRB,VRC","4@0|VRT@6|VRA@11|VRB@16|VRC@21|42@26|","v2.03"
+"Vector Shift Left VX-form","vsl VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|452@21|","v2.03"
+"Vector Shift Left Byte VX-form","vslb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|260@21|","v2.03"
+"Vector Shift Left Double by Octet Immediate VA-form","vsldoi VRT,VRA,VRB,SHB","4@0|VRT@6|VRA@11|VRB@16|/@21|SHB@22|44@26|","v2.03"
+"Vector Shift Left Halfword VX-form","vslh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|324@21|","v2.03"
+"Vector Shift Left by Octet VX-form","vslo VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1036@21|","v2.03"
+"Vector Shift Left Word VX-form","vslw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|388@21|","v2.03"
+"Vector Splat Byte VX-form","vspltb VRT,VRB,UIM","4@0|VRT@6|/@11|UIM@12|VRB@16|524@21|","v2.03"
+"Vector Splat Halfword VX-form","vsplth VRT,VRB,UIM","4@0|VRT@6|//@11|UIM@13|VRB@16|588@21|","v2.03"
+"Vector Splat Immediate Signed Byte VX-form","vspltisb VRT,SIM","4@0|VRT@6|SIM@11|///@16|780@21|","v2.03"
+"Vector Splat Immediate Signed Halfword VX-form","vspltish VRT,SIM","4@0|VRT@6|SIM@11|///@16|844@21|","v2.03"
+"Vector Splat Immediate Signed Word VX-form","vspltisw VRT,SIM","4@0|VRT@6|SIM@11|///@16|908@21|","v2.03"
+"Vector Splat Word VX-form","vspltw VRT,VRB,UIM","4@0|VRT@6|///@11|UIM@14|VRB@16|652@21|","v2.03"
+"Vector Shift Right VX-form","vsr VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|708@21|","v2.03"
+"Vector Shift Right Algebraic Byte VX-form","vsrab VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|772@21|","v2.03"
+"Vector Shift Right Algebraic Halfword VX-form","vsrah VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|836@21|","v2.03"
+"Vector Shift Right Algebraic Word VX-form","vsraw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|900@21|","v2.03"
+"Vector Shift Right Byte VX-form","vsrb VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|516@21|","v2.03"
+"Vector Shift Right Halfword VX-form","vsrh VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|580@21|","v2.03"
+"Vector Shift Right by Octet VX-form","vsro VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1100@21|","v2.03"
+"Vector Shift Right Word VX-form","vsrw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|644@21|","v2.03"
+"Vector Subtract & Write Carry-out Unsigned Word VX-form","vsubcuw VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1408@21|","v2.03"
+"Vector Subtract Floating-Point VX-form","vsubfp VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|74@21|","v2.03"
+"Vector Subtract Signed Byte Saturate VX-form","vsubsbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1792@21|","v2.03"
+"Vector Subtract Signed Halfword Saturate VX-form","vsubshs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1856@21|","v2.03"
+"Vector Subtract Signed Word Saturate VX-form","vsubsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1920@21|","v2.03"
+"Vector Subtract Unsigned Byte Modulo VX-form","vsububm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1024@21|","v2.03"
+"Vector Subtract Unsigned Byte Saturate VX-form","vsububs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1536@21|","v2.03"
+"Vector Subtract Unsigned Halfword Modulo VX-form","vsubuhm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1088@21|","v2.03"
+"Vector Subtract Unsigned Halfword Saturate VX-form","vsubuhs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1600@21|","v2.03"
+"Vector Subtract Unsigned Word Modulo VX-form","vsubuwm VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1152@21|","v2.03"
+"Vector Subtract Unsigned Word Saturate VX-form","vsubuws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1664@21|","v2.03"
+"Vector Sum across Half Signed Word Saturate VX-form","vsum2sws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1672@21|","v2.03"
+"Vector Sum across Quarter Signed Byte Saturate VX-form","vsum4sbs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1800@21|","v2.03"
+"Vector Sum across Quarter Signed Halfword Saturate VX-form","vsum4shs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1608@21|","v2.03"
+"Vector Sum across Quarter Unsigned Byte Saturate VX-form","vsum4ubs VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1544@21|","v2.03"
+"Vector Sum across Signed Word Saturate VX-form","vsumsws VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1928@21|","v2.03"
+"Vector Unpack High Pixel VX-form","vupkhpx VRT,VRB","4@0|VRT@6|///@11|VRB@16|846@21|","v2.03"
+"Vector Unpack High Signed Byte VX-form","vupkhsb VRT,VRB","4@0|VRT@6|///@11|VRB@16|526@21|","v2.03"
+"Vector Unpack High Signed Halfword VX-form","vupkhsh VRT,VRB","4@0|VRT@6|///@11|VRB@16|590@21|","v2.03"
+"Vector Unpack Low Pixel VX-form","vupklpx VRT,VRB","4@0|VRT@6|///@11|VRB@16|974@21|","v2.03"
+"Vector Unpack Low Signed Byte VX-form","vupklsb VRT,VRB","4@0|VRT@6|///@11|VRB@16|654@21|","v2.03"
+"Vector Unpack Low Signed Halfword VX-form","vupklsh VRT,VRB","4@0|VRT@6|///@11|VRB@16|718@21|","v2.03"
+"Vector Logical XOR VX-form","vxor VRT,VRA,VRB","4@0|VRT@6|VRA@11|VRB@16|1220@21|","v2.03"
+"Floating Reciprocal Estimate A-form","fre FRT,FRB (Rc=0)|fre. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|24@26|Rc@31|","v2.02"
+"Floating Round to Integer Minus X-form","frim FRT,FRB (Rc=0)|frim. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|488@21|Rc@31|","v2.02"
+"Floating Round to Integer Nearest X-form","frin FRT,FRB (Rc=0)|frin. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|392@21|Rc@31|","v2.02"
+"Floating Round to Integer Plus X-form","frip FRT,FRB (Rc=0)|frip. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|456@21|Rc@31|","v2.02"
+"Floating Round to Integer Toward Zero X-form","friz FRT,FRB (Rc=0)|friz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|424@21|Rc@31|","v2.02"
+"Floating Reciprocal Square Root Estimate Single A-form","frsqrtes FRT,FRB (Rc=0)|frsqrtes. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|26@26|Rc@31|","v2.02"
+"Return From Interrupt Doubleword Hypervisor XL-form","hrfid","19@0|///@6|///@11|///@16|274@21|/@31|","v2.02"
+"Population Count Bytes X-form","popcntb RA, RS","31@0|RS@6|RA@11|///@16|122@21|/@31|","v2.02"
+"Move From One Condition Register Field XFX-form","mfocrf RT,FXM","31@0|RT@6|1@11|FXM@12|/@20|19@21|/@31|","v2.01"
+"Move To One Condition Register Field XFX-form","mtocrf FXM,RS","31@0|RS@6|1@11|FXM@12|/@20|144@21|/@31|","v2.01"
+"SLB Move From Entry ESID X-form","slbmfee RT,RB","31@0|RT@6|///@11|L@15|RB@16|915@21|/@31|","v2.00"
+"SLB Move From Entry VSID X-form","slbmfev RT,RB","31@0|RT@6|///@11|L@15|RB@16|851@21|/@31|","v2.00"
+"SLB Move To Entry X-form","slbmte RS,RB","31@0|RS@6|///@11|RB@16|402@21|/@31|","v2.00"
+"Return From System Call Vectored XL-form","rfscv","19@0|///@6|///@11|///@16|82@21|/@31|","v3.0"
+"System Call Vectored SC-form","scv LEV","17@0|///@6|///@11|///@16|LEV@20|///@27|0@30|1@31|","v3.0"
+"Load Quadword DQ-form","lq RTp,DQ(RA)","56@0|RTp@6|RA@11|DQ@16|///@28|","v2.03"
+"Store Quadword DS-form","stq RSp,DS(RA)","62@0|RSp@6|RA@11|DS@16|2@30|","v2.03"
+"Count Leading Zeros Doubleword X-form","cntlzd RA,RS (Rc=0)|cntlzd. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|58@21|Rc@31|","PPC"
+"Data Cache Block Flush X-form","dcbf RA,RB,L","31@0|//@6|L@8|RA@11|RB@16|86@21|/@31|","PPC"
+"Data Cache Block Store X-form","dcbst RA,RB","31@0|///@6|RA@11|RB@16|54@21|/@31|","PPC"
+"Data Cache Block Touch X-form","dcbt RA,RB,TH","31@0|TH@6|RA@11|RB@16|278@21|/@31|","PPC"
+"Data Cache Block Touch for Store X-form","dcbtst RA,RB,TH","31@0|TH@6|RA@11|RB@16|246@21|/@31|","PPC"
+"Divide Doubleword XO-form","divd RT,RA,RB (OE=0 Rc=0)|divd. RT,RA,RB (OE=0 Rc=1)|divdo RT,RA,RB (OE=1 Rc=0)|divdo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|489@22|Rc@31|","PPC"
+"Divide Doubleword Unsigned XO-form","divdu RT,RA,RB (OE=0 Rc=0)|divdu. RT,RA,RB (OE=0 Rc=1)|divduo RT,RA,RB (OE=1 Rc=0)|divduo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|457@22|Rc@31|","PPC"
+"Divide Word XO-form","divw RT,RA,RB (OE=0 Rc=0)|divw. RT,RA,RB (OE=0 Rc=1)|divwo RT,RA,RB (OE=1 Rc=0)|divwo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|491@22|Rc@31|","PPC"
+"Divide Word Unsigned XO-form","divwu RT,RA,RB (OE=0 Rc=0)|divwu. RT,RA,RB (OE=0 Rc=1)|divwuo RT,RA,RB (OE=1 Rc=0)|divwuo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|459@22|Rc@31|","PPC"
+"Enforce In-order Execution of I/O X-form","eieio","31@0|///@6|///@11|///@16|854@21|/@31|","PPC"
+"Extend Sign Byte X-form","extsb RA,RS (Rc=0)|extsb. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|954@21|Rc@31|","PPC"
+"Extend Sign Word X-form","extsw RA,RS (Rc=0)|extsw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|986@21|Rc@31|","PPC"
+"Floating Add Single A-form","fadds FRT,FRA,FRB (Rc=0)|fadds. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|21@26|Rc@31|","PPC"
+"Floating Convert with round Signed Doubleword to Double-Precision format X-form","fcfid FRT,FRB (Rc=0)|fcfid. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|846@21|Rc@31|","PPC"
+"Floating Convert with round Double-Precision To Signed Doubleword format X-form","fctid FRT,FRB (Rc=0)|fctid. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|814@21|Rc@31|","PPC"
+"Floating Convert with truncate Double-Precision To Signed Doubleword format X-form","fctidz FRT,FRB (Rc=0)|fctidz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|815@21|Rc@31|","PPC"
+"Floating Divide Single A-form","fdivs FRT,FRA,FRB (Rc=0)|fdivs. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|18@26|Rc@31|","PPC"
+"Floating Multiply-Add Single A-form","fmadds FRT,FRA,FRC,FRB (Rc=0)|fmadds. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|29@26|Rc@31|","PPC"
+"Floating Multiply-Subtract Single A-form","fmsubs FRT,FRA,FRC,FRB (Rc=0)|fmsubs. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|28@26|Rc@31|","PPC"
+"Floating Multiply Single A-form","fmuls FRT,FRA,FRC (Rc=0)|fmuls. FRT,FRA,FRC (Rc=1)","59@0|FRT@6|FRA@11|///@16|FRC@21|25@26|Rc@31|","PPC"
+"Floating Negative Multiply-Add Single A-form","fnmadds FRT,FRA,FRC,FRB (Rc=0)|fnmadds. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|31@26|Rc@31|","PPC"
+"Floating Negative Multiply-Subtract Single A-form","fnmsubs FRT,FRA,FRC,FRB (Rc=0)|fnmsubs. FRT,FRA,FRC,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|FRC@21|30@26|Rc@31|","PPC"
+"Floating Reciprocal Estimate Single A-form","fres FRT,FRB (Rc=0)|fres. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|24@26|Rc@31|","PPC"
+"Floating Reciprocal Square Root Estimate A-form","frsqrte FRT,FRB (Rc=0)|frsqrte. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|26@26|Rc@31|","PPC"
+"Floating Select A-form","fsel FRT,FRA,FRC,FRB (Rc=0)|fsel. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|23@26|Rc@31|","PPC"
+"Floating Square Root Single A-form","fsqrts FRT,FRB (Rc=0)|fsqrts. FRT,FRB (Rc=1)","59@0|FRT@6|///@11|FRB@16|///@21|22@26|Rc@31|","PPC"
+"Floating Subtract Single A-form","fsubs FRT,FRA,FRB (Rc=0)|fsubs. FRT,FRA,FRB (Rc=1)","59@0|FRT@6|FRA@11|FRB@16|///@21|20@26|Rc@31|","PPC"
+"Instruction Cache Block Invalidate X-form","icbi RA,RB","31@0|///@6|RA@11|RB@16|982@21|/@31|","PPC"
+"Load Doubleword DS-form","ld RT,DS(RA)","58@0|RT@6|RA@11|DS@16|0@30|","PPC"
+"Load Doubleword And Reserve Indexed X-form","ldarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|84@21|EH@31|","PPC"
+"Load Doubleword with Update DS-form","ldu RT,DS(RA)","58@0|RT@6|RA@11|DS@16|1@30|","PPC"
+"Load Doubleword with Update Indexed X-form","ldux RT,RA,RB","31@0|RT@6|RA@11|RB@16|53@21|/@31|","PPC"
+"Load Doubleword Indexed X-form","ldx RT,RA,RB","31@0|RT@6|RA@11|RB@16|21@21|/@31|","PPC"
+"Load Word Algebraic DS-form","lwa RT,DS(RA)","58@0|RT@6|RA@11|DS@16|2@30|","PPC"
+"Load Word & Reserve Indexed X-form","lwarx RT,RA,RB,EH","31@0|RT@6|RA@11|RB@16|20@21|EH@31|","PPC"
+"Load Word Algebraic with Update Indexed X-form","lwaux RT,RA,RB","31@0|RT@6|RA@11|RB@16|373@21|/@31|","PPC"
+"Load Word Algebraic Indexed X-form","lwax RT,RA,RB","31@0|RT@6|RA@11|RB@16|341@21|/@31|","PPC"
+"Move From Time Base XFX-form","mftb RT,TBR","31@0|RT@6|tbr@11|371@21|/@31|","PPC"
+"Move To MSR Doubleword X-form","mtmsrd RS,L","31@0|RS@6|///@11|L@15|///@16|178@21|/@31|","PPC"
+"Multiply High Doubleword XO-form","mulhd RT,RA,RB (Rc=0)|mulhd. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|73@22|Rc@31|","PPC"
+"Multiply High Doubleword Unsigned XO-form","mulhdu RT,RA,RB (Rc=0)|mulhdu. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|9@22|Rc@31|","PPC"
+"Multiply High Word XO-form","mulhw RT,RA,RB (Rc=0)|mulhw. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|75@22|Rc@31|","PPC"
+"Multiply High Word Unsigned XO-form","mulhwu RT,RA,RB (Rc=0)|mulhwu. RT,RA,RB (Rc=1)","31@0|RT@6|RA@11|RB@16|/@21|11@22|Rc@31|","PPC"
+"Multiply Low Doubleword XO-form","mulld RT,RA,RB (OE=0 Rc=0)|mulld. RT,RA,RB (OE=0 Rc=1)|mulldo RT,RA,RB (OE=1 Rc=0)|mulldo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|233@22|Rc@31|","PPC"
+"Return from Interrupt Doubleword XL-form","rfid","19@0|///@6|///@11|///@16|18@21|/@31|","PPC"
+"Rotate Left Doubleword then Clear Left MDS-form","rldcl RA,RS,RB,MB (Rc=0)|rldcl. RA,RS,RB,MB (Rc=1)","30@0|RS@6|RA@11|RB@16|mb@21|8@27|Rc@31|","PPC"
+"Rotate Left Doubleword then Clear Right MDS-form","rldcr RA,RS,RB,ME (Rc=0)|rldcr. RA,RS,RB,ME (Rc=1)","30@0|RS@6|RA@11|RB@16|me@21|9@27|Rc@31|","PPC"
+"Rotate Left Doubleword Immediate then Clear MD-form","rldic RA,RS,SH,MB (Rc=0)|rldic. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|2@27|sh@30|Rc@31|","PPC"
+"Rotate Left Doubleword Immediate then Clear Left MD-form","rldicl RA,RS,SH,MB (Rc=0)|rldicl. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|0@27|sh@30|Rc@31|","PPC"
+"Rotate Left Doubleword Immediate then Clear Right MD-form","rldicr RA,RS,SH,ME (Rc=0)|rldicr. RA,RS,SH,ME (Rc=1)","30@0|RS@6|RA@11|sh@16|me@21|1@27|sh@30|Rc@31|","PPC"
+"Rotate Left Doubleword Immediate then Mask Insert MD-form","rldimi RA,RS,SH,MB (Rc=0)|rldimi. RA,RS,SH,MB (Rc=1)","30@0|RS@6|RA@11|sh@16|mb@21|3@27|sh@30|Rc@31|","PPC"
+"System Call SC-form","sc LEV","17@0|///@6|///@11|///@16|LEV@20|///@27|1@30|/@31|","PPC"
+"SLB Invalidate All X-form","slbia IH","31@0|//@6|IH@8|///@11|///@16|498@21|/@31|","PPC"
+"SLB Invalidate Entry X-form","slbie RB","31@0|///@6|///@11|RB@16|434@21|/@31|","PPC"
+"Shift Left Doubleword X-form","sld RA,RS,RB (Rc=0)|sld. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|27@21|Rc@31|","PPC"
+"Shift Right Algebraic Doubleword X-form","srad RA,RS,RB (Rc=0)|srad. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|794@21|Rc@31|","PPC"
+"Shift Right Algebraic Doubleword Immediate XS-form","sradi RA,RS,SH (Rc=0)|sradi. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|sh@16|413@21|sh@30|Rc@31|","PPC"
+"Shift Right Doubleword X-form","srd RA,RS,RB (Rc=0)|srd. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|539@21|Rc@31|","PPC"
+"Store Doubleword DS-form","std RS,DS(RA)","62@0|RS@6|RA@11|DS@16|0@30|","PPC"
+"Store Doubleword Conditional Indexed X-form","stdcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|214@21|1@31|","PPC"
+"Store Doubleword with Update DS-form","stdu RS,DS(RA)","62@0|RS@6|RA@11|DS@16|1@30|","PPC"
+"Store Doubleword with Update Indexed X-form","stdux RS,RA,RB","31@0|RS@6|RA@11|RB@16|181@21|/@31|","PPC"
+"Store Doubleword Indexed X-form","stdx RS,RA,RB","31@0|RS@6|RA@11|RB@16|149@21|/@31|","PPC"
+"Store Floating-Point as Integer Word Indexed X-form","stfiwx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|983@21|/@31|","PPC"
+"Store Word Conditional Indexed X-form","stwcx. RS,RA,RB","31@0|RS@6|RA@11|RB@16|150@21|1@31|","PPC"
+"Subtract From XO-form","subf RT,RA,RB (OE=0 Rc=0)|subf. RT,RA,RB (OE=0 Rc=1)|subfo RT,RA,RB (OE=1 Rc=0)|subfo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|40@22|Rc@31|","PPC"
+"Trap Doubleword X-form","td TO,RA,RB","31@0|TO@6|RA@11|RB@16|68@21|/@31|","PPC"
+"Trap Doubleword Immediate D-form","tdi TO,RA,SI","2@0|TO@6|RA@11|SI@16|","PPC"
+"TLB Synchronize X-form","tlbsync","31@0|///@6|///@11|///@16|566@21|/@31|","PPC"
+"Floating Convert with round Double-Precision To Signed Word format X-form","fctiw FRT,FRB (Rc=0)|fctiw. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|14@21|Rc@31|","P2"
+"Floating Convert with truncate Double-Precision To Signed Word fomat X-form","fctiwz FRT,FRB (Rc=0)|fctiwz. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|15@21|Rc@31|","P2"
+"Floating Square Root A-form","fsqrt FRT,FRB (Rc=0)|fsqrt. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|///@21|22@26|Rc@31|","P2"
+"Add XO-form","add RT,RA,RB (OE=0 Rc=0)|add. RT,RA,RB (OE=0 Rc=1)|addo RT,RA,RB (OE=1 Rc=0)|addo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|266@22|Rc@31|","P1"
+"Add Carrying XO-form","addc RT,RA,RB (OE=0 Rc=0)|addc. RT,RA,RB (OE=0 Rc=1)|addco RT,RA,RB (OE=1 Rc=0)|addco. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|10@22|Rc@31|","P1"
+"Add Extended XO-form","adde RT,RA,RB (OE=0 Rc=0)|adde. RT,RA,RB (OE=0 Rc=1)|addeo RT,RA,RB (OE=1 Rc=0)|addeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|138@22|Rc@31|","P1"
+"Add Immediate D-form","addi RT,RA,SI|li RT,SI (RA=0)","14@0|RT@6|RA@11|SI@16|","P1"
+"Add Immediate Carrying D-formy","addic RT,RA,SI","12@0|RT@6|RA@11|SI@16|","P1"
+"Add Immediate Carrying and Record D-form","addic. RT,RA,SI","13@0|RT@6|RA@11|SI@16|","P1"
+"Add Immediate Shifted D-form","addis RT,RA,SI|lis RT,SI (RA=0)","15@0|RT@6|RA@11|SI@16|","P1"
+"Add to Minus One Extended XO-form","addme RT,RA (OE=0 Rc=0)|addme. RT,RA (OE=0 Rc=1)|addmeo RT,RA (OE=1 Rc=0)|addmeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|234@22|Rc@31|","P1"
+"Add to Zero Extended XO-form","addze RT,RA (OE=0 Rc=0)|addze. RT,RA (OE=0 Rc=1)|addzeo RT,RA (OE=1 Rc=0)|addzeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|202@22|Rc@31|","P1"
+"AND X-form","and RA,RS,RB (Rc=0)|and. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|28@21|Rc@31|","P1"
+"AND with Complement X-form","andc RA,RS,RB (Rc=0)|andc. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|60@21|Rc@31|","P1"
+"AND Immediate D-form","andi. RA,RS,UI","28@0|RS@6|RA@11|UI@16|","P1"
+"AND Immediate Shifted D-form","andis. RA,RS,UI","29@0|RS@6|RA@11|UI@16|","P1"
+"Branch I-form","b target_addr (AA=0 LK=0)|ba target_addr (AA=1 LK=0)|bl target_addr (AA=0 LK=1)|bla target_addr (AA=1 LK=1)","18@0|LI@6|AA@30|LK@31|","P1"
+"Branch Conditional B-form","bc BO,BI,target_addr (AA=0 LK=0)|bca BO,BI,target_addr (AA=1 LK=0)|bcl BO,BI,target_addr (AA=0 LK=1)|bcla BO,BI,target_addr (AA=1 LK=1)","16@0|BO@6|BI@11|BD@16|AA@30|LK@31|","P1"
+"Branch Conditional to Count Register XL-form","bcctr BO,BI,BH (LK=0)|bcctrl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|528@21|LK@31|","P1"
+"Branch Conditional to Link Register XL-form","bclr BO,BI,BH (LK=0)|bclrl BO,BI,BH (LK=1)","19@0|BO@6|BI@11|///@16|BH@19|16@21|LK@31|","P1"
+"Compare X-form","cmp BF,L,RA,RB|cmpw BF,RA,RB (L=0)|cmpd BF,RA,RB (L=1)","31@0|BF@6|/@9|L@10|RA@11|RB@16|0@21|/@31|","P1"
+"Compare Immediate D-form","cmpi BF,L,RA,SI|cmpwi BF,RA,SI (L=0)|cmpdi BF,RA,SI (L=1)","11@0|BF@6|/@9|L@10|RA@11|SI@16|","P1"
+"Compare Logical X-form","cmpl BF,L,RA,RB|cmplw BF,RA,RB (L=0)|cmpld BF,RA,RB (L=1)","31@0|BF@6|/@9|L@10|RA@11|RB@16|32@21|/@31|","P1"
+"Compare Logical Immediate D-form","cmpli BF,L,RA,UI|cmplwi BF,RA,UI (L=0)|cmpldi BF,RA,UI (L=1)","10@0|BF@6|/@9|L@10|RA@11|UI@16|","P1"
+"Count Leading Zeros Word X-form","cntlzw RA,RS (Rc=0)|cntlzw. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|26@21|Rc@31|","P1"
+"Condition Register AND XL-form","crand BT,BA,BB","19@0|BT@6|BA@11|BB@16|257@21|/@31|","P1"
+"Condition Register AND with Complement XL-form","crandc BT,BA,BB","19@0|BT@6|BA@11|BB@16|129@21|/@31|","P1"
+"Condition Register Equivalent XL-form","creqv BT,BA,BB","19@0|BT@6|BA@11|BB@16|289@21|/@31|","P1"
+"Condition Register NAND XL-form","crnand BT,BA,BB","19@0|BT@6|BA@11|BB@16|225@21|/@31|","P1"
+"Condition Register NOR XL-form","crnor BT,BA,BB","19@0|BT@6|BA@11|BB@16|33@21|/@31|","P1"
+"Condition Register OR XL-form","cror BT,BA,BB","19@0|BT@6|BA@11|BB@16|449@21|/@31|","P1"
+"Condition Register OR with Complement XL-form","crorc BT,BA,BB","19@0|BT@6|BA@11|BB@16|417@21|/@31|","P1"
+"Condition Register XOR XL-form","crxor BT,BA,BB","19@0|BT@6|BA@11|BB@16|193@21|/@31|","P1"
+"Data Cache Block set to Zero X-form","dcbz RA,RB","31@0|///@6|RA@11|RB@16|1014@21|/@31|","P1"
+"Equivalent X-form","eqv RA,RS,RB (Rc=0)|eqv. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|284@21|Rc@31|","P1"
+"Extend Sign Halfword X-form","extsh RA,RS (Rc=0)|extsh. RA,RS (Rc=1)","31@0|RS@6|RA@11|///@16|922@21|Rc@31|","P1"
+"Floating Absolute Value X-form","fabs FRT,FRB (Rc=0)|fabs. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|264@21|Rc@31|","P1"
+"Floating Add A-form","fadd FRT,FRA,FRB (Rc=0)|fadd. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|21@26|Rc@31|","P1"
+"Floating Compare Ordered X-form","fcmpo BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|32@21|/@31|","P1"
+"Floating Compare Unordered X-form","fcmpu BF,FRA,FRB","63@0|BF@6|//@9|FRA@11|FRB@16|0@21|/@31|","P1"
+"Floating Divide A-form","fdiv FRT,FRA,FRB (Rc=0)|fdiv. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|18@26|Rc@31|","P1"
+"Floating Multiply-Add A-form","fmadd FRT,FRA,FRC,FRB (Rc=0)|fmadd. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|29@26|Rc@31|","P1"
+"Floating Move Register X-form","fmr FRT,FRB (Rc=0)|fmr. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|72@21|Rc@31|","P1"
+"Floating Multiply-Subtract A-form","fmsub FRT,FRA,FRC,FRB (Rc=0)|fmsub. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|28@26|Rc@31|","P1"
+"Floating Multiply A-form","fmul FRT,FRA,FRC (Rc=0)|fmul. FRT,FRA,FRC (Rc=1)","63@0|FRT@6|FRA@11|///@16|FRC@21|25@26|Rc@31|","P1"
+"Floating Negative Absolute Value X-form","fnabs FRT,FRB (Rc=0)|fnabs. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|136@21|Rc@31|","P1"
+"Floating Negate X-form","fneg FRT,FRB (Rc=0)|fneg. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|40@21|Rc@31|","P1"
+"Floating Negative Multiply-Add A-form","fnmadd FRT,FRA,FRC,FRB (Rc=0)|fnmadd. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|31@26|Rc@31|","P1"
+"Floating Negative Multiply-Subtract A-form","fnmsub FRT,FRA,FRC,FRB (Rc=0)|fnmsub. FRT,FRA,FRC,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|FRC@21|30@26|Rc@31|","P1"
+"Floating Round to Single-Precision X-form","frsp FRT,FRB (Rc=0)|frsp. FRT,FRB (Rc=1)","63@0|FRT@6|///@11|FRB@16|12@21|Rc@31|","P1"
+"Floating Subtract A-form","fsub FRT,FRA,FRB (Rc=0)|fsub. FRT,FRA,FRB (Rc=1)","63@0|FRT@6|FRA@11|FRB@16|///@21|20@26|Rc@31|","P1"
+"Instruction Synchronize XL-form","isync","19@0|///@6|///@11|///@16|150@21|/@31|","P1"
+"Load Byte and Zero D-form","lbz RT,D(RA)","34@0|RT@6|RA@11|D@16|","P1"
+"Load Byte and Zero with Update D-form","lbzu RT,D(RA)","35@0|RT@6|RA@11|D@16|","P1"
+"Load Byte and Zero with Update Indexed X-form","lbzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|119@21|/@31|","P1"
+"Load Byte and Zero Indexed X-form","lbzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|87@21|/@31|","P1"
+"Load Floating-Point Double D-form","lfd FRT,D(RA)","50@0|FRT@6|RA@11|D@16|","P1"
+"Load Floating-Point Double with Update D-form","lfdu FRT,D(RA)","51@0|FRT@6|RA@11|D@16|","P1"
+"Load Floating-Point Double with Update Indexed X-form","lfdux FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|631@21|/@31|","P1"
+"Load Floating-Point Double Indexed X-form","lfdx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|599@21|/@31|","P1"
+"Load Floating-Point Single D-form","lfs FRT,D(RA)","48@0|FRT@6|RA@11|D@16|","P1"
+"Load Floating-Point Single with Update D-form","lfsu FRT,D(RA)","49@0|FRT@6|RA@11|D@16|","P1"
+"Load Floating-Point Single with Update Indexed X-form","lfsux FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|567@21|/@31|","P1"
+"Load Floating-Point Single Indexed X-form","lfsx FRT,RA,RB","31@0|FRT@6|RA@11|RB@16|535@21|/@31|","P1"
+"Load Halfword Algebraic D-form","lha RT,D(RA)","42@0|RT@6|RA@11|D@16|","P1"
+"Load Halfword Algebraic with Update D-form","lhau RT,D(RA)","43@0|RT@6|RA@11|D@16|","P1"
+"Load Halfword Algebraic with Update Indexed X-form","lhaux RT,RA,RB","31@0|RT@6|RA@11|RB@16|375@21|/@31|","P1"
+"Load Halfword Algebraic Indexed X-form","lhax RT,RA,RB","31@0|RT@6|RA@11|RB@16|343@21|/@31|","P1"
+"Load Halfword Byte-Reverse Indexed X-form","lhbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|790@21|/@31|","P1"
+"Load Halfword and Zero D-form","lhz RT,D(RA)","40@0|RT@6|RA@11|D@16|","P1"
+"Load Halfword and Zero with Update D-form","lhzu RT,D(RA)","41@0|RT@6|RA@11|D@16|","P1"
+"Load Halfword and Zero with Update Indexed X-form","lhzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|311@21|/@31|","P1"
+"Load Halfword and Zero Indexed X-form","lhzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|279@21|/@31|","P1"
+"Load Multiple Word D-form","lmw RT,D(RA)","46@0|RT@6|RA@11|D@16|","P1"
+"Load String Word Immediate X-form","lswi RT,RA,NB","31@0|RT@6|RA@11|NB@16|597@21|/@31|","P1"
+"Load String Word Indexed X-form","lswx RT,RA,RB","31@0|RT@6|RA@11|RB@16|533@21|/@31|","P1"
+"Load Word Byte-Reverse Indexed X-form","lwbrx RT,RA,RB","31@0|RT@6|RA@11|RB@16|534@21|/@31|","P1"
+"Load Word and Zero D-form","lwz RT,D(RA)","32@0|RT@6|RA@11|D@16|","P1"
+"Load Word and Zero with Update D-form","lwzu RT,D(RA)","33@0|RT@6|RA@11|D@16|","P1"
+"Load Word and Zero with Update Indexed X-form","lwzux RT,RA,RB","31@0|RT@6|RA@11|RB@16|55@21|/@31|","P1"
+"Load Word and Zero Indexed X-form","lwzx RT,RA,RB","31@0|RT@6|RA@11|RB@16|23@21|/@31|","P1"
+"Move Condition Register Field XL-form","mcrf BF,BFA","19@0|BF@6|//@9|BFA@11|//@14|///@16|0@21|/@31|","P1"
+"Move to Condition Register from FPSCR X-form","mcrfs BF,BFA","63@0|BF@6|//@9|BFA@11|//@14|///@16|64@21|/@31|","P1"
+"Move From Condition Register XFX-form","mfcr RT","31@0|RT@6|0@11|///@12|/@20|19@21|/@31|","P1"
+"Move From FPSCR X-form","mffs FRT (Rc=0)|mffs. FRT (Rc=1)","63@0|FRT@6|0@11|///@16|583@21|Rc@31|","P1"
+"Move From MSR X-form","mfmsr RT","31@0|RT@6|///@11|///@16|83@21|/@31|","P1"
+"Move From Special Purpose Register XFX-form","mfspr RT,SPR","31@0|RT@6|spr@11|339@21|/@31|","P1"
+"Move To Condition Register Fields XFX-form","mtcrf FXM,RS","31@0|RS@6|0@11|FXM@12|/@20|144@21|/@31|","P1"
+"Move To FPSCR Bit 0 X-form","mtfsb0 BT (Rc=0)|mtfsb0. BT (Rc=1)","63@0|BT@6|///@11|///@16|70@21|Rc@31|","P1"
+"Move To FPSCR Bit 1 X-form","mtfsb1 BT (Rc=0)|mtfsb1. BT (Rc=1)","63@0|BT@6|///@11|///@16|38@21|Rc@31|","P1"
+"Move To FPSCR Fields XFL-form","mtfsf FLM,FRB,L,W (Rc=0)|mtfsf. FLM,FRB,L,W (Rc=1)","63@0|L@6|FLM@7|W@15|FRB@16|711@21|Rc@31|","P1"
+"Move To FPSCR Field Immediate X-form","mtfsfi BF,U,W (Rc=0)|mtfsfi. BF,U,W (Rc=1)","63@0|BF@6|//@9|///@11|W@15|U@16|/@20|134@21|Rc@31|","P1"
+"Move To MSR X-form","mtmsr RS,L","31@0|RS@6|///@11|L@15|///@16|146@21|/@31|","P1"
+"Move To Special Purpose Register XFX-form","mtspr SPR,RS","31@0|RS@6|spr@11|467@21|/@31|","P1"
+"Multiply Low Immediate D-form","mulli RT,RA,SI","7@0|RT@6|RA@11|SI@16|","P1"
+"Multiply Low Word XO-form","mullw RT,RA,RB (OE=0 Rc=0)|mullw. RT,RA,RB (OE=0 Rc=1)|mullwo RT,RA,RB (OE=1 Rc=0)|mullwo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|235@22|Rc@31|","P1"
+"NAND X-form","nand RA,RS,RB (Rc=0)|nand. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|476@21|Rc@31|","P1"
+"Negate XO-form","neg RT,RA (OE=0 Rc=0)|neg. RT,RA (OE=0 Rc=1)|nego RT,RA (OE=1 Rc=0)|nego. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|104@22|Rc@31|","P1"
+"NOR X-form","nor RA,RS,RB (Rc=0)|nor. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|124@21|Rc@31|","P1"
+"OR X-form","or RA,RS,RB (Rc=0)|or. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|444@21|Rc@31|","P1"
+"OR with Complement X-form","orc RA,RS,RB (Rc=0)|orc. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|412@21|Rc@31|","P1"
+"OR Immediate D-form","ori RA,RS,UI|nop (RA=0 RS=0 UI=0)","24@0|RS@6|RA@11|UI@16|","P1"
+"OR Immediate Shifted D-form","oris RA,RS,UI","25@0|RS@6|RA@11|UI@16|","P1"
+"Rotate Left Word Immediate then Mask Insert M-form","rlwimi RA,RS,SH,MB,ME (Rc=0)|rlwimi. RA,RS,SH,MB,ME (Rc=1)","20@0|RS@6|RA@11|SH@16|MB@21|ME@26|Rc@31|","P1"
+"Rotate Left Word Immediate then AND with Mask M-form","rlwinm RA,RS,SH,MB,ME (Rc=0)|rlwinm. RA,RS,SH,MB,ME (Rc=1)","21@0|RS@6|RA@11|SH@16|MB@21|ME@26|Rc@31|","P1"
+"Rotate Left Word then AND with Mask M-form","rlwnm RA,RS,RB,MB,ME (Rc=0)|rlwnm. RA,RS,RB,MB,ME (Rc=1)","23@0|RS@6|RA@11|RB@16|MB@21|ME@26|Rc@31|","P1"
+"Shift Left Word X-form","slw RA,RS,RB (Rc=0)|slw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|24@21|Rc@31|","P1"
+"Shift Right Algebraic Word X-form","sraw RA,RS,RB (Rc=0)|sraw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|792@21|Rc@31|","P1"
+"Shift Right Algebraic Word Immediate X-form","srawi RA,RS,SH (Rc=0)|srawi. RA,RS,SH (Rc=1)","31@0|RS@6|RA@11|SH@16|824@21|Rc@31|","P1"
+"Shift Right Word X-form","srw RA,RS,RB (Rc=0)|srw. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|536@21|Rc@31|","P1"
+"Store Byte D-form","stb RS,D(RA)","38@0|RS@6|RA@11|D@16|","P1"
+"Store Byte with Update D-form","stbu RS,D(RA)","39@0|RS@6|RA@11|D@16|","P1"
+"Store Byte with Update Indexed X-form","stbux RS,RA,RB","31@0|RS@6|RA@11|RB@16|247@21|/@31|","P1"
+"Store Byte Indexed X-form","stbx RS,RA,RB","31@0|RS@6|RA@11|RB@16|215@21|/@31|","P1"
+"Store Floating-Point Double D-form","stfd FRS,D(RA)","54@0|FRS@6|RA@11|D@16|","P1"
+"Store Floating-Point Double with Update D-form","stfdu FRS,D(RA)","55@0|FRS@6|RA@11|D@16|","P1"
+"Store Floating-Point Double with Update Indexed X-form","stfdux FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|759@21|/@31|","P1"
+"Store Floating-Point Double Indexed X-form","stfdx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|727@21|/@31|","P1"
+"Store Floating-Point Single D-form","stfs FRS,D(RA)","52@0|FRS@6|RA@11|D@16|","P1"
+"Store Floating-Point Single with Update D-form","stfsu FRS,D(RA)","53@0|FRS@6|RA@11|D@16|","P1"
+"Store Floating-Point Single with Update Indexed X-form","stfsux FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|695@21|/@31|","P1"
+"Store Floating-Point Single Indexed X-form","stfsx FRS,RA,RB","31@0|FRS@6|RA@11|RB@16|663@21|/@31|","P1"
+"Store Halfword D-form","sth RS,D(RA)","44@0|RS@6|RA@11|D@16|","P1"
+"Store Halfword Byte-Reverse Indexed X-form","sthbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|918@21|/@31|","P1"
+"Store Halfword with Update D-form","sthu RS,D(RA)","45@0|RS@6|RA@11|D@16|","P1"
+"Store Halfword with Update Indexed X-form","sthux RS,RA,RB","31@0|RS@6|RA@11|RB@16|439@21|/@31|","P1"
+"Store Halfword Indexed X-form","sthx RS,RA,RB","31@0|RS@6|RA@11|RB@16|407@21|/@31|","P1"
+"Store Multiple Word D-form","stmw RS,D(RA)","47@0|RS@6|RA@11|D@16|","P1"
+"Store String Word Immediate X-form","stswi RS,RA,NB","31@0|RS@6|RA@11|NB@16|725@21|/@31|","P1"
+"Store String Word Indexed X-form","stswx RS,RA,RB","31@0|RS@6|RA@11|RB@16|661@21|/@31|","P1"
+"Store Word D-form","stw RS,D(RA)","36@0|RS@6|RA@11|D@16|","P1"
+"Store Word Byte-Reverse Indexed X-form","stwbrx RS,RA,RB","31@0|RS@6|RA@11|RB@16|662@21|/@31|","P1"
+"Store Word with Update D-form","stwu RS,D(RA)","37@0|RS@6|RA@11|D@16|","P1"
+"Store Word with Update Indexed X-form","stwux RS,RA,RB","31@0|RS@6|RA@11|RB@16|183@21|/@31|","P1"
+"Store Word Indexed X-form","stwx RS,RA,RB","31@0|RS@6|RA@11|RB@16|151@21|/@31|","P1"
+"Subtract From Carrying XO-form","subfc RT,RA,RB (OE=0 Rc=0)|subfc. RT,RA,RB (OE=0 Rc=1)|subfco RT,RA,RB (OE=1 Rc=0)|subfco. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|8@22|Rc@31|","P1"
+"Subtract From Extended XO-form","subfe RT,RA,RB (OE=0 Rc=0)|subfe. RT,RA,RB (OE=0 Rc=1)|subfeo RT,RA,RB (OE=1 Rc=0)|subfeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|136@22|Rc@31|","P1"
+"Subtract From Immediate Carrying D-form","subfic RT,RA,SI","8@0|RT@6|RA@11|SI@16|","P1"
+"Subtract From Minus One Extended XO-form","subfme RT,RA (OE=0 Rc=0)|subfme. RT,RA (OE=0 Rc=1)|subfmeo RT,RA (OE=1 Rc=0)|subfmeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|232@22|Rc@31|","P1"
+"Subtract From Zero Extended XO-form","subfze RT,RA (OE=0 Rc=0)|subfze. RT,RA (OE=0 Rc=1)|subfzeo RT,RA (OE=1 Rc=0)|subfzeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|200@22|Rc@31|","P1"
+"Synchronize X-form","sync L,SC","31@0|//@6|L@8|///@11|SC@14|///@16|598@21|/@31|","P1"
+"TLB Invalidate Entry X-form","tlbie RB,RS,RIC,PRS,R","31@0|RS@6|/@11|RIC@12|PRS@14|R@15|RB@16|306@21|/@31|","P1"
+"Trap Word X-form","tw TO,RA,RB","31@0|TO@6|RA@11|RB@16|4@21|/@31|","P1"
+"Trap Word Immediate D-form","twi TO,RA,SI","3@0|TO@6|RA@11|SI@16|","P1"
+"XOR X-form","xor RA,RS,RB (Rc=0)|xor. RA,RS,RB (Rc=1)","31@0|RS@6|RA@11|RB@16|316@21|Rc@31|","P1"
+"XOR Immediate D-form","xori RA,RS,UI","26@0|RS@6|RA@11|UI@16|","P1"
+"XOR Immediate Shifted D-form","xoris RA,RS,UI","27@0|RS@6|RA@11|UI@16|","P1"

From 5e4c51d3ff484467c6ff1b51b6bacc8b70221058 Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Tue, 17 Aug 2021 14:11:25 -0500
Subject: [PATCH 003/200] ppc64asm: fix plan9 style decoding issues

This reworks the decoding of CR bit fields to correctly decode the
fcmp/cmp/setbc families of instructions.

Comparison instructions always produce a result in a CR field, thus
it should be listed last if not implied to be CR0.

Furthermore, remove the context sensitive decoding of CR field and
CR bit type arguments from plan9Arg. These edge cases are better
handled during the per-instruction combining of decoded arguments.
This allows setbc like instructions to decode correctly without
special handling.

Change-Id: I264a600034b5abb8901b0c2e6bffe2887200ac27
Reviewed-on: https://go-review.googlesource.com/c/arch/+/347569
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.org>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Trust: Cherry Mui <cherryyz@google.com>
---
 ppc64/ppc64asm/plan9.go            | 71 ++++++++++++++----------------
 ppc64/ppc64asm/testdata/decode.txt |  8 +++-
 2 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/ppc64/ppc64asm/plan9.go b/ppc64/ppc64asm/plan9.go
index 89b91732..88e8e1c7 100644
--- a/ppc64/ppc64asm/plan9.go
+++ b/ppc64/ppc64asm/plan9.go
@@ -30,18 +30,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 			break
 		}
 		if s := plan9Arg(&inst, i, pc, a, symname); s != "" {
-			// In the case for some BC instructions, a CondReg arg has
-			// both the CR and the branch condition encoded in its value.
-			// plan9Arg will return a string with the string representation
-			// of these values separated by a blank that will be treated
-			// as 2 args from this point on.
-			if strings.IndexByte(s, ' ') > 0 {
-				t := strings.Split(s, " ")
-				args = append(args, t[0])
-				args = append(args, t[1])
-			} else {
-				args = append(args, s)
-			}
+			args = append(args, s)
 		}
 	}
 	var op string
@@ -61,7 +50,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		case 1:
 			return fmt.Sprintf("%s %s", op, args[0])
 		case 2:
-			if inst.Op == COPY || inst.Op == PASTECC || inst.Op == FCMPO || inst.Op == FCMPU {
+			if inst.Op == COPY || inst.Op == PASTECC {
 				return op + " " + args[0] + "," + args[1]
 			}
 			return op + " " + args[1] + "," + args[0]
@@ -97,13 +86,13 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		STQ, STFD, STFDU, STFS, STFSU:
 		return op + " " + strings.Join(args, ",")
 
-	case CMPD, CMPDI, CMPLD, CMPLDI, CMPW, CMPWI, CMPLW, CMPLWI:
-		if len(args) == 2 {
-			return op + " " + args[0] + "," + args[1]
-		} else if len(args) == 3 {
-			return op + " " + args[0] + "," + args[1] + "," + args[2]
+	case FCMPU, FCMPO, CMPD, CMPDI, CMPLD, CMPLDI, CMPW, CMPWI, CMPLW, CMPLWI:
+		crf := int(inst.Args[0].(CondReg) - CR0)
+		cmpstr := op + " " + args[1] + "," + args[2]
+		if crf != 0 { // print CRx as the final operand if not implied (i.e BF != 0)
+			cmpstr += "," + args[0]
 		}
-		return op + " " + args[0] + " ??"
+		return cmpstr
 
 	case LIS:
 		return "ADDIS $0," + args[1] + "," + args[0]
@@ -152,16 +141,15 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		}
 		return op + " " + strings.Join(args, ", ")
 	case BC:
-		if int(inst.Args[0].(Imm))&0x1c == 12 { // jump on cond bit set
-			if len(args) == 4 {
-				return fmt.Sprintf("B%s %s,%s", args[1], args[2], args[3])
-			}
-			return fmt.Sprintf("B%s %s", args[1], args[2])
-		} else if int(inst.Args[0].(Imm))&0x1c == 4 && revCondMap[args[1]] != "" { // jump on cond bit not set
-			if len(args) == 4 {
-				return fmt.Sprintf("B%s %s,%s", revCondMap[args[1]], args[2], args[3])
+		bo := int(inst.Args[0].(Imm))
+		bi := int(inst.Args[1].(CondReg) - Cond0LT)
+		bcname := condName[((bo&0x8)>>1)|(bi&0x3)]
+		if bo&0x17 == 4 { // jump only a CR bit set/unset, no hints (at bits) set.
+			if bi >= 4 {
+				return fmt.Sprintf("B%s CR%d,%s", bcname, bi>>2, args[2])
+			} else {
+				return fmt.Sprintf("B%s %s", bcname, args[2])
 			}
-			return fmt.Sprintf("B%s %s", revCondMap[args[1]], args[2])
 		}
 		return op + " " + strings.Join(args, ",")
 	case BCCTR:
@@ -203,19 +191,14 @@ func plan9Arg(inst *Inst, argIndex int, pc uint64, arg Arg, symname func(uint64)
 		if inst.Op == ISEL {
 			return fmt.Sprintf("$%d", (arg - Cond0LT))
 		}
-		if arg == CR0 && (strings.HasPrefix(inst.Op.String(), "cmp") || strings.HasPrefix(inst.Op.String(), "fcmp")) {
-			return "" // don't show cr0 for cmp instructions
-		} else if arg >= CR0 {
-			return fmt.Sprintf("CR%d", int(arg-CR0))
-		}
 		bit := [4]string{"LT", "GT", "EQ", "SO"}[(arg-Cond0LT)%4]
-		if strings.HasPrefix(inst.Op.String(), "cr") {
-			return fmt.Sprintf("CR%d%s", int(arg-Cond0LT)/4, bit)
-		}
 		if arg <= Cond0SO {
 			return bit
+		} else if arg > Cond0SO && arg <= Cond7SO {
+			return fmt.Sprintf("CR%d%s", int(arg-Cond0LT)/4, bit)
+		} else {
+			return fmt.Sprintf("CR%d", int(arg-CR0))
 		}
-		return fmt.Sprintf("%s CR%d", bit, int(arg-Cond0LT)/4)
 	case Imm:
 		return fmt.Sprintf("$%d", arg)
 	case SpReg:
@@ -281,6 +264,20 @@ var revCondMap = map[string]string{
 	"LT": "GE", "GT": "LE", "EQ": "NE",
 }
 
+// Lookup table to map BI[0:1] and BO[3] to an extended mnemonic for CR ops.
+// Bits 0-1 map to a bit with a CR field, and bit 2 selects the inverted (0)
+// or regular (1) extended mnemonic.
+var condName = []string{
+	"GE",
+	"LE",
+	"NE",
+	"NSO",
+	"LT",
+	"GT",
+	"EQ",
+	"SO",
+}
+
 // plan9OpMap maps an Op to its Plan 9 mnemonics, if different than its GNU mnemonics.
 var plan9OpMap = map[Op]string{
 	LWARX:     "LWAR",
diff --git a/ppc64/ppc64asm/testdata/decode.txt b/ppc64/ppc64asm/testdata/decode.txt
index 11c37aa0..a1f8fb33 100644
--- a/ppc64/ppc64asm/testdata/decode.txt
+++ b/ppc64/ppc64asm/testdata/decode.txt
@@ -41,7 +41,7 @@ e8610032|	plan9	MOVW 48(R1),R3
 7c00422c|	gnu	dcbt r0,r8,0
 7c00422c|	plan9	DCBT (R8)
 7fab3040|	gnu	cmpld cr7,r11,r6
-7fab3040|	plan9	CMPU CR7,R11,R6
+7fab3040|	plan9	CMPU R11,R6,CR7
 2c030001|	gnu	cmpwi r3,1
 2c030001|	plan9	CMPW R3,$1
 7c2b4840|	gnu	cmpld r11,r9
@@ -855,3 +855,9 @@ f0400fe0|	gnu	xvcvsxddp vs2,vs1
 7c20003c|	gnu	wait 1,0
 4c000924|	gnu	rfebb 1
 0602000138800007|	gnu	pli r4,-8589869049
+7c5b03c0|	plan9	SETNBCR CR6SO,R2
+fc811000|	plan9	FCMPU F1,F2,CR1
+7c220176|	plan9	BRD R1,R2
+7c2201b6|	plan9	BRH R1,R2
+7c220136|	plan9	BRW R1,R2
+7c2311b8|	plan9	CFUGED R1,R2,R3

From 6544aa4a77f5e21f3aabb1fda78a38bbf05c7869 Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Tue, 17 Aug 2021 11:21:54 -0500
Subject: [PATCH 004/200] ppc64asm: don't print invalid encodings of
 pst*/pl*/paddi insn

Change-Id: I5a01b89c96eba94f0eac3d4db65f98d0c2fc1166
Reviewed-on: https://go-review.googlesource.com/c/arch/+/347570
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.org>
---
 ppc64/ppc64asm/gnu.go              | 14 ++++++++++++--
 ppc64/ppc64asm/testdata/decode.txt |  2 ++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/ppc64/ppc64asm/gnu.go b/ppc64/ppc64asm/gnu.go
index 225ef4fb..b4c9bf8d 100644
--- a/ppc64/ppc64asm/gnu.go
+++ b/ppc64/ppc64asm/gnu.go
@@ -297,12 +297,17 @@ func GNUSyntax(inst Inst, pc uint64) string {
 				gnuArg(&inst, 0, inst.Args[0], PC),
 				gnuArg(&inst, 2, inst.Args[2], PC))
 			startArg = 4
-		} else if r == 0 {
+		} else {
 			str = fmt.Sprintf("%s %s,%s,%s", opName,
 				gnuArg(&inst, 0, inst.Args[0], PC),
 				gnuArg(&inst, 1, inst.Args[1], PC),
 				gnuArg(&inst, 2, inst.Args[2], PC))
 			startArg = 4
+			if r == 1 {
+				// This is an illegal encoding (ra != 0 && r == 1) on ISA 3.1.
+				v := uint64(inst.Enc)<<32 | uint64(inst.SuffixEnc)
+				return fmt.Sprintf(".quad 0x%x", v)
+			}
 		}
 		buf.WriteString(str)
 
@@ -317,11 +322,16 @@ func GNUSyntax(inst Inst, pc uint64) string {
 				str := fmt.Sprintf("%s %s,%d", opName, gnuArg(&inst, 0, inst.Args[0], PC), d)
 				buf.WriteString(str)
 				startArg = 4
-			} else if r == 0 {
+			} else {
 				str := fmt.Sprintf("%s %s,%d(%s)", opName,
 					gnuArg(&inst, 0, inst.Args[0], PC),
 					d,
 					gnuArg(&inst, 2, inst.Args[2], PC))
+				if r == 1 {
+					// This is an invalid encoding (ra != 0 && r == 1) on ISA 3.1.
+					v := uint64(inst.Enc)<<32 | uint64(inst.SuffixEnc)
+					return fmt.Sprintf(".quad 0x%x", v)
+				}
 				buf.WriteString(str)
 				startArg = 4
 			}
diff --git a/ppc64/ppc64asm/testdata/decode.txt b/ppc64/ppc64asm/testdata/decode.txt
index a1f8fb33..3213903d 100644
--- a/ppc64/ppc64asm/testdata/decode.txt
+++ b/ppc64/ppc64asm/testdata/decode.txt
@@ -861,3 +861,5 @@ fc811000|	plan9	FCMPU F1,F2,CR1
 7c2201b6|	plan9	BRH R1,R2
 7c220136|	plan9	BRW R1,R2
 7c2311b8|	plan9	CFUGED R1,R2,R3
+04100016e4820032|	gnu	.quad 0x4100016e4820032
+0612000138820007|	gnu	.quad 0x612000138820007

From b76863e36670e165c85261bc41fabaf345376022 Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Thu, 23 Sep 2021 10:44:22 -0700
Subject: [PATCH 005/200] x86asm: include size suffix on popcnt instruction

Update golang/go#48584

Change-Id: I2c770aaf88e15f8987dc8f1d974127b02b220777
Reviewed-on: https://go-review.googlesource.com/c/arch/+/351889
Trust: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 x86/x86asm/plan9x.go           |  1 +
 x86/x86asm/testdata/decode.txt | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/x86/x86asm/plan9x.go b/x86/x86asm/plan9x.go
index a93bffd4..59d8f977 100644
--- a/x86/x86asm/plan9x.go
+++ b/x86/x86asm/plan9x.go
@@ -204,6 +204,7 @@ var plan9Suffix = [maxOp + 1]bool{
 	OUT:       true,
 	POP:       true,
 	POPA:      true,
+	POPCNT:    true,
 	PUSH:      true,
 	PUSHA:     true,
 	RCL:       true,
diff --git a/x86/x86asm/testdata/decode.txt b/x86/x86asm/testdata/decode.txt
index 9832dcd3..604123f8 100644
--- a/x86/x86asm/testdata/decode.txt
+++ b/x86/x86asm/testdata/decode.txt
@@ -6518,10 +6518,10 @@ f30faec8|11223344556677885f5f5f5f	64	gnu	rdgsbase %eax
 f30faec8|11223344556677885f5f5f5f	64	intel	rdgsbase eax
 f30faec8|11223344556677885f5f5f5f	64	plan9	RDGSBASE AX
 f30fb811|223344556677885f5f5f5f5f	32	intel	popcnt edx, dword ptr [ecx]
-f30fb811|223344556677885f5f5f5f5f	32	plan9	POPCNT 0(CX), DX
+f30fb811|223344556677885f5f5f5f5f	32	plan9	POPCNTL 0(CX), DX
 f30fb811|223344556677885f5f5f5f5f	64	gnu	popcnt (%rcx),%edx
 f30fb811|223344556677885f5f5f5f5f	64	intel	popcnt edx, dword ptr [rcx]
-f30fb811|223344556677885f5f5f5f5f	64	plan9	POPCNT 0(CX), DX
+f30fb811|223344556677885f5f5f5f5f	64	plan9	POPCNTL 0(CX), DX
 f30fbc11|223344556677885f5f5f5f5f	32	intel	tzcnt edx, dword ptr [ecx]
 f30fbc11|223344556677885f5f5f5f5f	32	plan9	TZCNT 0(CX), DX
 f30fbc11|223344556677885f5f5f5f5f	64	gnu	tzcnt (%rcx),%edx
@@ -6565,7 +6565,7 @@ f3480faec8|11223344556677885f5f5f	64	intel	rdgsbase rax
 f3480faec8|11223344556677885f5f5f	64	plan9	RDGSBASE AX
 f3480fb811|223344556677885f5f5f5f	64	gnu	popcnt (%rcx),%rdx
 f3480fb811|223344556677885f5f5f5f	64	intel	popcnt rdx, qword ptr [rcx]
-f3480fb811|223344556677885f5f5f5f	64	plan9	POPCNT 0(CX), DX
+f3480fb811|223344556677885f5f5f5f	64	plan9	POPCNTQ 0(CX), DX
 f3480fbc11|223344556677885f5f5f5f	64	gnu	tzcnt (%rcx),%rdx
 f3480fbc11|223344556677885f5f5f5f	64	intel	tzcnt rdx, qword ptr [rcx]
 f3480fbc11|223344556677885f5f5f5f	64	plan9	TZCNT 0(CX), DX
@@ -6573,10 +6573,10 @@ f3480fbd11|223344556677885f5f5f5f	64	gnu	lzcnt (%rcx),%rdx
 f3480fbd11|223344556677885f5f5f5f	64	intel	lzcnt rdx, qword ptr [rcx]
 f3480fbd11|223344556677885f5f5f5f	64	plan9	LZCNT 0(CX), DX
 f3660fb811|223344556677885f5f5f5f	32	intel	popcnt dx, word ptr [ecx]
-f3660fb811|223344556677885f5f5f5f	32	plan9	POPCNT 0(CX), DX
+f3660fb811|223344556677885f5f5f5f	32	plan9	POPCNTW 0(CX), DX
 f3660fb811|223344556677885f5f5f5f	64	gnu	popcnt (%rcx),%dx
 f3660fb811|223344556677885f5f5f5f	64	intel	popcnt dx, word ptr [rcx]
-f3660fb811|223344556677885f5f5f5f	64	plan9	POPCNT 0(CX), DX
+f3660fb811|223344556677885f5f5f5f	64	plan9	POPCNTW 0(CX), DX
 f3660fbc11|223344556677885f5f5f5f	32	intel	tzcnt dx, word ptr [ecx]
 f3660fbc11|223344556677885f5f5f5f	32	plan9	TZCNT 0(CX), DX
 f3660fbc11|223344556677885f5f5f5f	64	gnu	tzcnt (%rcx),%dx

From 5424468ecbacebb9cadd2cfdcc6726e39c619b0e Mon Sep 17 00:00:00 2001
From: fanzha02 <fannie.zhang@arm.com>
Date: Thu, 10 Sep 2020 12:06:57 +0800
Subject: [PATCH 006/200] arm64/arm64asm: add support for TLBI and DC

The format of TLBI is TLBI <tlbi_op> {<Xt>}, where <Xt> is an optional
field. But there is no field for <Xt> in the instruction format table.
This CL adds a new Arg type sysOp to handle this case.

This patch is a copy of CL 256197. Co-authored-by: JunchenLi
<junchen.li@arm.com>

Change-Id: I6e12f49a8614ca80fd60eef5b63755323824f5fa
Reviewed-on: https://go-review.googlesource.com/c/arch/+/302889
Trust: Fannie Zhang <Fannie.Zhang@arm.com>
Run-TryBot: Fannie Zhang <Fannie.Zhang@arm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 arm64/arm64asm/condition.go            |   8 +-
 arm64/arm64asm/condition_util.go       |  18 +--
 arm64/arm64asm/decode.go               |  23 ++--
 arm64/arm64asm/decode_test.go          |   2 -
 arm64/arm64asm/inst.go                 | 158 +++++++++++++++++++++++++
 arm64/arm64asm/objdump_test.go         |   2 -
 arm64/arm64asm/plan9x.go               |  36 +++---
 arm64/arm64asm/testdata/gnucases.txt   | 106 +++++++++++++++++
 arm64/arm64asm/testdata/plan9cases.txt | 106 +++++++++++++++++
 9 files changed, 417 insertions(+), 42 deletions(-)

diff --git a/arm64/arm64asm/condition.go b/arm64/arm64asm/condition.go
index d6738572..37ad8eed 100644
--- a/arm64/arm64asm/condition.go
+++ b/arm64/arm64asm/condition.go
@@ -11,7 +11,7 @@ package arm64asm
 // Refer to instFormat inside decode.go for more details
 
 func at_sys_cr_system_cond(instr uint32) bool {
-	return sys_op_4((instr>>16)&0x7, 0x7, 0x8, (instr>>5)&0x7) == Sys_AT
+	return sys_op_4((instr>>16)&0x7, 0x7, 0x8, (instr>>5)&0x7) == sys_AT
 }
 
 func bfi_bfm_32m_bitfield_cond(instr uint32) bool {
@@ -61,11 +61,11 @@ func csinv_general_cond(instr uint32) bool {
 	return instr&0xe000 != 0xe000
 }
 func dc_sys_cr_system_cond(instr uint32) bool {
-	return sys_op_4((instr>>16)&0x7, 0x7, (instr>>8)&0xf, (instr>>5)&0x7) == Sys_DC
+	return sys_op_4((instr>>16)&0x7, 0x7, (instr>>8)&0xf, (instr>>5)&0x7) == sys_DC
 }
 
 func ic_sys_cr_system_cond(instr uint32) bool {
-	return sys_op_4((instr>>16)&0x7, 0x7, (instr>>8)&0xf, (instr>>5)&0x7) == Sys_IC
+	return sys_op_4((instr>>16)&0x7, 0x7, (instr>>8)&0xf, (instr>>5)&0x7) == sys_IC
 }
 
 func lsl_ubfm_32m_bitfield_cond(instr uint32) bool {
@@ -133,7 +133,7 @@ func sbfx_sbfm_64m_bitfield_cond(instr uint32) bool {
 }
 
 func tlbi_sys_cr_system_cond(instr uint32) bool {
-	return sys_op_4((instr>>16)&0x7, 0x8, (instr>>8)&0xf, (instr>>5)&0x7) == Sys_TLBI
+	return sys_op_4((instr>>16)&0x7, 0x8, (instr>>8)&0xf, (instr>>5)&0x7) == sys_TLBI
 }
 
 func ubfiz_ubfm_32m_bitfield_cond(instr uint32) bool {
diff --git a/arm64/arm64asm/condition_util.go b/arm64/arm64asm/condition_util.go
index 62c0c3b0..f2fa11b9 100644
--- a/arm64/arm64asm/condition_util.go
+++ b/arm64/arm64asm/condition_util.go
@@ -47,19 +47,19 @@ func move_wide_preferred_4(sf, N, imms, immr uint32) bool {
 	return false
 }
 
-type Sys uint8
+type sys uint8
 
 const (
-	Sys_AT Sys = iota
-	Sys_DC
-	Sys_IC
-	Sys_TLBI
-	Sys_SYS
+	sys_AT sys = iota
+	sys_DC
+	sys_IC
+	sys_TLBI
+	sys_SYS
 )
 
-func sys_op_4(op1, crn, crm, op2 uint32) Sys {
-	// TODO: system instruction
-	return Sys_SYS
+func sys_op_4(op1, crn, crm, op2 uint32) sys {
+	sysInst := sysInstFields{uint8(op1), uint8(crn), uint8(crm), uint8(op2)}
+	return sysInst.getType()
 }
 
 func is_zero(x uint32) bool {
diff --git a/arm64/arm64asm/decode.go b/arm64/arm64asm/decode.go
index 5e29c476..b1c4f5ec 100644
--- a/arm64/arm64asm/decode.go
+++ b/arm64/arm64asm/decode.go
@@ -684,17 +684,26 @@ func decodeArg(aop instArg, x uint32) Arg {
 		//TODO: system instruction
 		return nil
 
-	case arg_sysop_DC_SYS_CR_system:
-		//TODO: system instruction
-		return nil
-
 	case arg_sysop_SYS_CR_system:
 		//TODO: system instruction
 		return nil
 
-	case arg_sysop_TLBI_SYS_CR_system:
-		//TODO: system instruction
-		return nil
+	case arg_sysop_DC_SYS_CR_system, arg_sysop_TLBI_SYS_CR_system:
+		op1 := (x >> 16) & 7
+		cn := (x >> 12) & 15
+		cm := (x >> 8) & 15
+		op2 := (x >> 5) & 7
+		sysInst := sysInstFields{uint8(op1), uint8(cn), uint8(cm), uint8(op2)}
+		attrs := sysInst.getAttrs()
+		reg := int(x & 31)
+		if !attrs.hasOperand2 {
+			if reg == 31 {
+				return sysOp{sysInst, 0, false}
+			}
+			// This instruction is undefined if the Rt field is not set to 31.
+			return nil
+		}
+		return sysOp{sysInst, X0 + Reg(reg), true}
 
 	case arg_Bt:
 		return B0 + Reg(x&(1<<5-1))
diff --git a/arm64/arm64asm/decode_test.go b/arm64/arm64asm/decode_test.go
index 9c7d2b62..26eb6ae9 100644
--- a/arm64/arm64asm/decode_test.go
+++ b/arm64/arm64asm/decode_test.go
@@ -63,9 +63,7 @@ func testDecode(t *testing.T, syntax string) {
 		// TODO: system instruction.
 		var Todo = strings.Fields(`
 			sys
-			dc
 			at
-			tlbi
 			ic
 			hvc
 			smc
diff --git a/arm64/arm64asm/inst.go b/arm64/arm64asm/inst.go
index afeb9a3c..8c633fef 100644
--- a/arm64/arm64asm/inst.go
+++ b/arm64/arm64asm/inst.go
@@ -968,3 +968,161 @@ func (r RegisterWithArrangementAndIndex) String() string {
 	}
 	return fmt.Sprintf("%s[%d]", result, r.index)
 }
+
+type sysOp struct {
+	op          sysInstFields
+	r           Reg
+	hasOperand2 bool
+}
+
+func (s sysOp) isArg() {}
+
+func (s sysOp) String() string {
+	result := s.op.String()
+	// If s.hasOperand2 is false, the value in the register
+	// specified by s.r is ignored.
+	if s.hasOperand2 {
+		result += ", " + s.r.String()
+	}
+	return result
+}
+
+type sysInstFields struct {
+	op1 uint8
+	cn  uint8
+	cm  uint8
+	op2 uint8
+}
+
+type sysInstAttrs struct {
+	typ         sys
+	name        string
+	hasOperand2 bool
+}
+
+func (s sysInstFields) isArg() {}
+
+func (s sysInstFields) getAttrs() sysInstAttrs {
+	attrs, ok := sysInstsAttrs[sysInstFields{s.op1, s.cn, s.cm, s.op2}]
+	if !ok {
+		return sysInstAttrs{typ: sys_SYS}
+	}
+	return attrs
+}
+
+func (s sysInstFields) String() string {
+	return s.getAttrs().name
+}
+
+func (s sysInstFields) getType() sys {
+	return s.getAttrs().typ
+}
+
+var sysInstsAttrs = map[sysInstFields]sysInstAttrs{
+	sysInstFields{0, 8, 3, 0}:  {sys_TLBI, "VMALLE1IS", false},
+	sysInstFields{0, 8, 3, 1}:  {sys_TLBI, "VAE1IS", true},
+	sysInstFields{0, 8, 3, 2}:  {sys_TLBI, "ASIDE1IS", true},
+	sysInstFields{0, 8, 3, 3}:  {sys_TLBI, "VAAE1IS", true},
+	sysInstFields{0, 8, 3, 5}:  {sys_TLBI, "VALE1IS", true},
+	sysInstFields{0, 8, 3, 7}:  {sys_TLBI, "VAALE1IS", true},
+	sysInstFields{0, 8, 7, 0}:  {sys_TLBI, "VMALLE1", false},
+	sysInstFields{0, 8, 7, 1}:  {sys_TLBI, "VAE1", true},
+	sysInstFields{0, 8, 7, 2}:  {sys_TLBI, "ASIDE1", true},
+	sysInstFields{0, 8, 7, 3}:  {sys_TLBI, "VAAE1", true},
+	sysInstFields{0, 8, 7, 5}:  {sys_TLBI, "VALE1", true},
+	sysInstFields{0, 8, 7, 7}:  {sys_TLBI, "VAALE1", true},
+	sysInstFields{4, 8, 0, 1}:  {sys_TLBI, "IPAS2E1IS", true},
+	sysInstFields{4, 8, 0, 5}:  {sys_TLBI, "IPAS2LE1IS", true},
+	sysInstFields{4, 8, 3, 0}:  {sys_TLBI, "ALLE2IS", false},
+	sysInstFields{4, 8, 3, 1}:  {sys_TLBI, "VAE2IS", true},
+	sysInstFields{4, 8, 3, 4}:  {sys_TLBI, "ALLE1IS", false},
+	sysInstFields{4, 8, 3, 5}:  {sys_TLBI, "VALE2IS", true},
+	sysInstFields{4, 8, 3, 6}:  {sys_TLBI, "VMALLS12E1IS", false},
+	sysInstFields{4, 8, 4, 1}:  {sys_TLBI, "IPAS2E1", true},
+	sysInstFields{4, 8, 4, 5}:  {sys_TLBI, "IPAS2LE1", true},
+	sysInstFields{4, 8, 7, 0}:  {sys_TLBI, "ALLE2", false},
+	sysInstFields{4, 8, 7, 1}:  {sys_TLBI, "VAE2", true},
+	sysInstFields{4, 8, 7, 4}:  {sys_TLBI, "ALLE1", false},
+	sysInstFields{4, 8, 7, 5}:  {sys_TLBI, "VALE2", true},
+	sysInstFields{4, 8, 7, 6}:  {sys_TLBI, "VMALLS12E1", false},
+	sysInstFields{6, 8, 3, 0}:  {sys_TLBI, "ALLE3IS", false},
+	sysInstFields{6, 8, 3, 1}:  {sys_TLBI, "VAE3IS", true},
+	sysInstFields{6, 8, 3, 5}:  {sys_TLBI, "VALE3IS", true},
+	sysInstFields{6, 8, 7, 0}:  {sys_TLBI, "ALLE3", false},
+	sysInstFields{6, 8, 7, 1}:  {sys_TLBI, "VAE3", true},
+	sysInstFields{6, 8, 7, 5}:  {sys_TLBI, "VALE3", true},
+	sysInstFields{0, 8, 1, 0}:  {sys_TLBI, "VMALLE1OS", false},
+	sysInstFields{0, 8, 1, 1}:  {sys_TLBI, "VAE1OS", true},
+	sysInstFields{0, 8, 1, 2}:  {sys_TLBI, "ASIDE1OS", true},
+	sysInstFields{0, 8, 1, 3}:  {sys_TLBI, "VAAE1OS", true},
+	sysInstFields{0, 8, 1, 5}:  {sys_TLBI, "VALE1OS", true},
+	sysInstFields{0, 8, 1, 7}:  {sys_TLBI, "VAALE1OS", true},
+	sysInstFields{0, 8, 2, 1}:  {sys_TLBI, "RVAE1IS", true},
+	sysInstFields{0, 8, 2, 3}:  {sys_TLBI, "RVAAE1IS", true},
+	sysInstFields{0, 8, 2, 5}:  {sys_TLBI, "RVALE1IS", true},
+	sysInstFields{0, 8, 2, 7}:  {sys_TLBI, "RVAALE1IS", true},
+	sysInstFields{0, 8, 5, 1}:  {sys_TLBI, "RVAE1OS", true},
+	sysInstFields{0, 8, 5, 3}:  {sys_TLBI, "RVAAE1OS", true},
+	sysInstFields{0, 8, 5, 5}:  {sys_TLBI, "RVALE1OS", true},
+	sysInstFields{0, 8, 5, 7}:  {sys_TLBI, "RVAALE1OS", true},
+	sysInstFields{0, 8, 6, 1}:  {sys_TLBI, "RVAE1", true},
+	sysInstFields{0, 8, 6, 3}:  {sys_TLBI, "RVAAE1", true},
+	sysInstFields{0, 8, 6, 5}:  {sys_TLBI, "RVALE1", true},
+	sysInstFields{0, 8, 6, 7}:  {sys_TLBI, "RVAALE1", true},
+	sysInstFields{4, 8, 0, 2}:  {sys_TLBI, "RIPAS2E1IS", true},
+	sysInstFields{4, 8, 0, 6}:  {sys_TLBI, "RIPAS2LE1IS", true},
+	sysInstFields{4, 8, 1, 0}:  {sys_TLBI, "ALLE2OS", false},
+	sysInstFields{4, 8, 1, 1}:  {sys_TLBI, "VAE2OS", true},
+	sysInstFields{4, 8, 1, 4}:  {sys_TLBI, "ALLE1OS", false},
+	sysInstFields{4, 8, 1, 5}:  {sys_TLBI, "VALE2OS", true},
+	sysInstFields{4, 8, 1, 6}:  {sys_TLBI, "VMALLS12E1OS", false},
+	sysInstFields{4, 8, 2, 1}:  {sys_TLBI, "RVAE2IS", true},
+	sysInstFields{4, 8, 2, 5}:  {sys_TLBI, "RVALE2IS", true},
+	sysInstFields{4, 8, 4, 0}:  {sys_TLBI, "IPAS2E1OS", true},
+	sysInstFields{4, 8, 4, 2}:  {sys_TLBI, "RIPAS2E1", true},
+	sysInstFields{4, 8, 4, 3}:  {sys_TLBI, "RIPAS2E1OS", true},
+	sysInstFields{4, 8, 4, 4}:  {sys_TLBI, "IPAS2LE1OS", true},
+	sysInstFields{4, 8, 4, 6}:  {sys_TLBI, "RIPAS2LE1", true},
+	sysInstFields{4, 8, 4, 7}:  {sys_TLBI, "RIPAS2LE1OS", true},
+	sysInstFields{4, 8, 5, 1}:  {sys_TLBI, "RVAE2OS", true},
+	sysInstFields{4, 8, 5, 5}:  {sys_TLBI, "RVALE2OS", true},
+	sysInstFields{4, 8, 6, 1}:  {sys_TLBI, "RVAE2", true},
+	sysInstFields{4, 8, 6, 5}:  {sys_TLBI, "RVALE2", true},
+	sysInstFields{6, 8, 1, 0}:  {sys_TLBI, "ALLE3OS", false},
+	sysInstFields{6, 8, 1, 1}:  {sys_TLBI, "VAE3OS", true},
+	sysInstFields{6, 8, 1, 5}:  {sys_TLBI, "VALE3OS", true},
+	sysInstFields{6, 8, 2, 1}:  {sys_TLBI, "RVAE3IS", true},
+	sysInstFields{6, 8, 2, 5}:  {sys_TLBI, "RVALE3IS", true},
+	sysInstFields{6, 8, 5, 1}:  {sys_TLBI, "RVAE3OS", true},
+	sysInstFields{6, 8, 5, 5}:  {sys_TLBI, "RVALE3OS", true},
+	sysInstFields{6, 8, 6, 1}:  {sys_TLBI, "RVAE3", true},
+	sysInstFields{6, 8, 6, 5}:  {sys_TLBI, "RVALE3", true},
+	sysInstFields{0, 7, 6, 1}:  {sys_DC, "IVAC", true},
+	sysInstFields{0, 7, 6, 2}:  {sys_DC, "ISW", true},
+	sysInstFields{0, 7, 10, 2}: {sys_DC, "CSW", true},
+	sysInstFields{0, 7, 14, 2}: {sys_DC, "CISW", true},
+	sysInstFields{3, 7, 4, 1}:  {sys_DC, "ZVA", true},
+	sysInstFields{3, 7, 10, 1}: {sys_DC, "CVAC", true},
+	sysInstFields{3, 7, 11, 1}: {sys_DC, "CVAU", true},
+	sysInstFields{3, 7, 14, 1}: {sys_DC, "CIVAC", true},
+	sysInstFields{0, 7, 6, 3}:  {sys_DC, "IGVAC", true},
+	sysInstFields{0, 7, 6, 4}:  {sys_DC, "IGSW", true},
+	sysInstFields{0, 7, 6, 5}:  {sys_DC, "IGDVAC", true},
+	sysInstFields{0, 7, 6, 6}:  {sys_DC, "IGDSW", true},
+	sysInstFields{0, 7, 10, 4}: {sys_DC, "CGSW", true},
+	sysInstFields{0, 7, 10, 6}: {sys_DC, "CGDSW", true},
+	sysInstFields{0, 7, 14, 4}: {sys_DC, "CIGSW", true},
+	sysInstFields{0, 7, 14, 6}: {sys_DC, "CIGDSW", true},
+	sysInstFields{3, 7, 4, 3}:  {sys_DC, "GVA", true},
+	sysInstFields{3, 7, 4, 4}:  {sys_DC, "GZVA", true},
+	sysInstFields{3, 7, 10, 3}: {sys_DC, "CGVAC", true},
+	sysInstFields{3, 7, 10, 5}: {sys_DC, "CGDVAC", true},
+	sysInstFields{3, 7, 12, 3}: {sys_DC, "CGVAP", true},
+	sysInstFields{3, 7, 12, 5}: {sys_DC, "CGDVAP", true},
+	sysInstFields{3, 7, 13, 3}: {sys_DC, "CGVADP", true},
+	sysInstFields{3, 7, 13, 5}: {sys_DC, "CGDVADP", true},
+	sysInstFields{3, 7, 14, 3}: {sys_DC, "CIGVAC", true},
+	sysInstFields{3, 7, 14, 5}: {sys_DC, "CIGDVAC", true},
+	sysInstFields{3, 7, 12, 1}: {sys_DC, "CVAP", true},
+	sysInstFields{3, 7, 13, 1}: {sys_DC, "CVADP", true},
+}
diff --git a/arm64/arm64asm/objdump_test.go b/arm64/arm64asm/objdump_test.go
index 3baf8a19..a096dcec 100644
--- a/arm64/arm64asm/objdump_test.go
+++ b/arm64/arm64asm/objdump_test.go
@@ -120,9 +120,7 @@ func allowedMismatchObjdump(text string, inst *Inst, dec ExtInst) bool {
 // TODO: system instruction.
 var todo = strings.Fields(`
 	sys
-	dc
 	at
-	tlbi
 	ic
 	hvc
 	smc
diff --git a/arm64/arm64asm/plan9x.go b/arm64/arm64asm/plan9x.go
index f4eef8c0..ea5139cb 100644
--- a/arm64/arm64asm/plan9x.go
+++ b/arm64/arm64asm/plan9x.go
@@ -542,10 +542,7 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg
 			}
 
 		}
-		if regno == 31 {
-			return "ZR"
-		}
-		return fmt.Sprintf("R%d", regno)
+		return plan9gpr(a)
 
 	case RegSP:
 		regno := uint16(a) & 31
@@ -555,13 +552,7 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg
 		return fmt.Sprintf("R%d", regno)
 
 	case RegExtshiftAmount:
-		reg := ""
-		regno := uint16(a.reg) & 31
-		if regno == 31 {
-			reg = "ZR"
-		} else {
-			reg = fmt.Sprintf("R%d", uint16(a.reg)&31)
-		}
+		reg := plan9gpr(a.reg)
 		extshift := ""
 		amount := ""
 		if a.extShift != ExtShift(0) {
@@ -614,19 +605,13 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg
 	case MemExtend:
 		base := ""
 		index := ""
-		indexreg := ""
 		regno := uint16(a.Base) & 31
 		if regno == 31 {
 			base = "(RSP)"
 		} else {
 			base = fmt.Sprintf("(R%d)", regno)
 		}
-		regno = uint16(a.Index) & 31
-		if regno == 31 {
-			indexreg = "ZR"
-		} else {
-			indexreg = fmt.Sprintf("R%d", regno)
-		}
+		indexreg := plan9gpr(a.Index)
 
 		if a.Extend == lsl {
 			// Refer to ARM reference manual, for byte load/store(register), the index
@@ -736,7 +721,22 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg
 		if strings.Contains(a.String(), "#") {
 			return fmt.Sprintf("$%d", a)
 		}
+	case sysOp:
+		result := a.op.String()
+		if a.r != 0 {
+			result += ", " + plan9gpr(a.r)
+		}
+		return result
 	}
 
 	return strings.ToUpper(arg.String())
 }
+
+// Convert a general-purpose register to plan9 assembly format.
+func plan9gpr(r Reg) string {
+	regno := uint16(r) & 31
+	if regno == 31 {
+		return "ZR"
+	}
+	return fmt.Sprintf("R%d", regno)
+}
diff --git a/arm64/arm64asm/testdata/gnucases.txt b/arm64/arm64asm/testdata/gnucases.txt
index 21542099..3ea6941d 100644
--- a/arm64/arm64asm/testdata/gnucases.txt
+++ b/arm64/arm64asm/testdata/gnucases.txt
@@ -4649,3 +4649,109 @@ cd5a202e|	mvn v13.8b, v22.8b
 743d0a0e|	umov w20, v11.h[2]
 743d0c0e|	mov w20, v11.s[1]
 743d084e|	mov x20, v11.d[0]
+1f8308d5|	tlbi vmalle1is
+1f8708d5|	tlbi vmalle1
+1f830cd5|	tlbi alle2is
+9f830cd5|	tlbi alle1is
+df830cd5|	tlbi vmalls12e1is
+1f870cd5|	tlbi alle2
+9f870cd5|	tlbi alle1
+df870cd5|	tlbi vmalls12e1
+1f830ed5|	tlbi alle3is
+1f870ed5|	tlbi alle3
+1f8108d5|	tlbi vmalle1os
+1f810cd5|	tlbi alle2os
+9f810cd5|	tlbi alle1os
+df810cd5|	tlbi vmalls12e1os
+1f810ed5|	tlbi alle3os
+208308d5|	tlbi vae1is, x0
+418308d5|	tlbi aside1is, x1
+628308d5|	tlbi vaae1is, x2
+a38308d5|	tlbi vale1is, x3
+e48308d5|	tlbi vaale1is, x4
+258708d5|	tlbi vae1, x5
+468708d5|	tlbi aside1, x6
+678708d5|	tlbi vaae1, x7
+a88708d5|	tlbi vale1, x8
+e98708d5|	tlbi vaale1, x9
+2a800cd5|	tlbi ipas2e1is, x10
+ab800cd5|	tlbi ipas2le1is, x11
+2c830cd5|	tlbi vae2is, x12
+ad830cd5|	tlbi vale2is, x13
+2e840cd5|	tlbi ipas2e1, x14
+af840cd5|	tlbi ipas2le1, x15
+30870cd5|	tlbi vae2, x16
+b1870cd5|	tlbi vale2, x17
+3f830ed5|	tlbi vae3is, xzr
+b3830ed5|	tlbi vale3is, x19
+34870ed5|	tlbi vae3, x20
+b5870ed5|	tlbi vale3, x21
+368108d5|	tlbi vae1os, x22
+578108d5|	tlbi aside1os, x23
+788108d5|	tlbi vaae1os, x24
+b98108d5|	tlbi vale1os, x25
+fa8108d5|	tlbi vaale1os, x26
+3b8208d5|	tlbi rvae1is, x27
+7f8208d5|	tlbi rvaae1is, xzr
+bd8208d5|	tlbi rvale1is, x29
+fe8208d5|	tlbi rvaale1is, x30
+3f8508d5|	tlbi rvae1os, xzr
+608508d5|	tlbi rvaae1os, x0
+a18508d5|	tlbi rvale1os, x1
+e28508d5|	tlbi rvaale1os, x2
+238608d5|	tlbi rvae1, x3
+648608d5|	tlbi rvaae1, x4
+a58608d5|	tlbi rvale1, x5
+e68608d5|	tlbi rvaale1, x6
+47800cd5|	tlbi ripas2e1is, x7
+c8800cd5|	tlbi ripas2le1is, x8
+29810cd5|	tlbi vae2os, x9
+aa810cd5|	tlbi vale2os, x10
+2b820cd5|	tlbi rvae2is, x11
+ac820cd5|	tlbi rvale2is, x12
+0d840cd5|	tlbi ipas2e1os, x13
+4e840cd5|	tlbi ripas2e1, x14
+6f840cd5|	tlbi ripas2e1os, x15
+90840cd5|	tlbi ipas2le1os, x16
+d1840cd5|	tlbi ripas2le1, x17
+ff840cd5|	tlbi ripas2le1os, xzr
+33850cd5|	tlbi rvae2os, x19
+b4850cd5|	tlbi rvale2os, x20
+35860cd5|	tlbi rvae2, x21
+b6860cd5|	tlbi rvale2, x22
+37810ed5|	tlbi vae3os, x23
+b8810ed5|	tlbi vale3os, x24
+39820ed5|	tlbi rvae3is, x25
+ba820ed5|	tlbi rvale3is, x26
+3b850ed5|	tlbi rvae3os, x27
+bf850ed5|	tlbi rvale3os, xzr
+3d860ed5|	tlbi rvae3, x29
+be860ed5|	tlbi rvale3, x30
+207608d5|	dc ivac, x0
+417608d5|	dc isw, x1
+427a08d5|	dc csw, x2
+437e08d5|	dc cisw, x3
+24740bd5|	dc zva, x4
+257a0bd5|	dc cvac, x5
+267b0bd5|	dc cvau, x6
+277e0bd5|	dc civac, x7
+687608d5|	dc igvac, x8
+897608d5|	dc igsw, x9
+aa7608d5|	dc igdvac, x10
+cb7608d5|	dc igdsw, x11
+8c7a08d5|	dc cgsw, x12
+cd7a08d5|	dc cgdsw, x13
+8e7e08d5|	dc cigsw, x14
+cf7e08d5|	dc cigdsw, x15
+70740bd5|	dc gva, x16
+91740bd5|	dc gzva, x17
+7f7a0bd5|	dc cgvac, xzr
+b37a0bd5|	dc cgdvac, x19
+747c0bd5|	dc cgvap, x20
+b57c0bd5|	dc cgdvap, x21
+767d0bd5|	dc cgvadp, x22
+b77d0bd5|	dc cgdvadp, x23
+787e0bd5|	dc cigvac, x24
+b97e0bd5|	dc cigdvac, x25
+3a7c0bd5|	dc cvap, x26
+3b7d0bd5|	dc cvadp, x27
diff --git a/arm64/arm64asm/testdata/plan9cases.txt b/arm64/arm64asm/testdata/plan9cases.txt
index 1bbb2386..a1da4f87 100644
--- a/arm64/arm64asm/testdata/plan9cases.txt
+++ b/arm64/arm64asm/testdata/plan9cases.txt
@@ -4576,3 +4576,109 @@ d7061a6f|	VUSHR $6, V22.H8, V23.H8
 44786638|	MOVBU (R2)(R6<<0), R4
 ae7bbe38|	MOVB (R29)(R30<<0), R14
 ae6bbe38|	MOVB (R29)(R30), R14
+1f8308d5|	TLBI VMALLE1IS
+1f8708d5|	TLBI VMALLE1
+1f830cd5|	TLBI ALLE2IS
+9f830cd5|	TLBI ALLE1IS
+df830cd5|	TLBI VMALLS12E1IS
+1f870cd5|	TLBI ALLE2
+9f870cd5|	TLBI ALLE1
+df870cd5|	TLBI VMALLS12E1
+1f830ed5|	TLBI ALLE3IS
+1f870ed5|	TLBI ALLE3
+1f8108d5|	TLBI VMALLE1OS
+1f810cd5|	TLBI ALLE2OS
+9f810cd5|	TLBI ALLE1OS
+df810cd5|	TLBI VMALLS12E1OS
+1f810ed5|	TLBI ALLE3OS
+208308d5|	TLBI VAE1IS, R0
+418308d5|	TLBI ASIDE1IS, R1
+628308d5|	TLBI VAAE1IS, R2
+a38308d5|	TLBI VALE1IS, R3
+e48308d5|	TLBI VAALE1IS, R4
+258708d5|	TLBI VAE1, R5
+468708d5|	TLBI ASIDE1, R6
+678708d5|	TLBI VAAE1, R7
+a88708d5|	TLBI VALE1, R8
+e98708d5|	TLBI VAALE1, R9
+2a800cd5|	TLBI IPAS2E1IS, R10
+ab800cd5|	TLBI IPAS2LE1IS, R11
+2c830cd5|	TLBI VAE2IS, R12
+ad830cd5|	TLBI VALE2IS, R13
+2e840cd5|	TLBI IPAS2E1, R14
+af840cd5|	TLBI IPAS2LE1, R15
+30870cd5|	TLBI VAE2, R16
+b1870cd5|	TLBI VALE2, R17
+3f830ed5|	TLBI VAE3IS, ZR
+b3830ed5|	TLBI VALE3IS, R19
+34870ed5|	TLBI VAE3, R20
+b5870ed5|	TLBI VALE3, R21
+368108d5|	TLBI VAE1OS, R22
+578108d5|	TLBI ASIDE1OS, R23
+788108d5|	TLBI VAAE1OS, R24
+b98108d5|	TLBI VALE1OS, R25
+fa8108d5|	TLBI VAALE1OS, R26
+3b8208d5|	TLBI RVAE1IS, R27
+7f8208d5|	TLBI RVAAE1IS, ZR
+bd8208d5|	TLBI RVALE1IS, R29
+fe8208d5|	TLBI RVAALE1IS, R30
+3f8508d5|	TLBI RVAE1OS, ZR
+608508d5|	TLBI RVAAE1OS, R0
+a18508d5|	TLBI RVALE1OS, R1
+e28508d5|	TLBI RVAALE1OS, R2
+238608d5|	TLBI RVAE1, R3
+648608d5|	TLBI RVAAE1, R4
+a58608d5|	TLBI RVALE1, R5
+e68608d5|	TLBI RVAALE1, R6
+47800cd5|	TLBI RIPAS2E1IS, R7
+c8800cd5|	TLBI RIPAS2LE1IS, R8
+29810cd5|	TLBI VAE2OS, R9
+aa810cd5|	TLBI VALE2OS, R10
+2b820cd5|	TLBI RVAE2IS, R11
+ac820cd5|	TLBI RVALE2IS, R12
+0d840cd5|	TLBI IPAS2E1OS, R13
+4e840cd5|	TLBI RIPAS2E1, R14
+6f840cd5|	TLBI RIPAS2E1OS, R15
+90840cd5|	TLBI IPAS2LE1OS, R16
+d1840cd5|	TLBI RIPAS2LE1, R17
+ff840cd5|	TLBI RIPAS2LE1OS, ZR
+33850cd5|	TLBI RVAE2OS, R19
+b4850cd5|	TLBI RVALE2OS, R20
+35860cd5|	TLBI RVAE2, R21
+b6860cd5|	TLBI RVALE2, R22
+37810ed5|	TLBI VAE3OS, R23
+b8810ed5|	TLBI VALE3OS, R24
+39820ed5|	TLBI RVAE3IS, R25
+ba820ed5|	TLBI RVALE3IS, R26
+3b850ed5|	TLBI RVAE3OS, R27
+bf850ed5|	TLBI RVALE3OS, ZR
+3d860ed5|	TLBI RVAE3, R29
+be860ed5|	TLBI RVALE3, R30
+207608d5|	DC IVAC, R0
+417608d5|	DC ISW, R1
+427a08d5|	DC CSW, R2
+437e08d5|	DC CISW, R3
+24740bd5|	DC ZVA, R4
+257a0bd5|	DC CVAC, R5
+267b0bd5|	DC CVAU, R6
+277e0bd5|	DC CIVAC, R7
+687608d5|	DC IGVAC, R8
+897608d5|	DC IGSW, R9
+aa7608d5|	DC IGDVAC, R10
+cb7608d5|	DC IGDSW, R11
+8c7a08d5|	DC CGSW, R12
+cd7a08d5|	DC CGDSW, R13
+8e7e08d5|	DC CIGSW, R14
+cf7e08d5|	DC CIGDSW, R15
+70740bd5|	DC GVA, R16
+91740bd5|	DC GZVA, R17
+7f7a0bd5|	DC CGVAC, ZR
+b37a0bd5|	DC CGDVAC, R19
+747c0bd5|	DC CGVAP, R20
+b57c0bd5|	DC CGDVAP, R21
+767d0bd5|	DC CGVADP, R22
+b77d0bd5|	DC CGDVADP, R23
+787e0bd5|	DC CIGVAC, R24
+b97e0bd5|	DC CIGDVAC, R25
+3a7c0bd5|	DC CVAP, R26
+3b7d0bd5|	DC CVADP, R27

From cd2ec6f1ddf14e3a42861a19f145293ea4181414 Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@golang.org>
Date: Mon, 13 Sep 2021 16:42:27 -0400
Subject: [PATCH 007/200] x86/x86asm: fix plan9 print of PUSHQ/POPQ

The 5x instructions decode correctly and print correctly
in the GNU and Intel modes, but it was using the wrong
suffix in the Plan 9 mode. Fix that.

Change-Id: I8242d142ef56bf3e16e7535d59034c3932f5bbda
Reviewed-on: https://go-review.googlesource.com/c/arch/+/349689
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Auto-Submit: Russ Cox <rsc@golang.org>
---
 x86/x86asm/plan9x.go           | 4 ++++
 x86/x86asm/testdata/decode.txt | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/x86/x86asm/plan9x.go b/x86/x86asm/plan9x.go
index 59d8f977..de417946 100644
--- a/x86/x86asm/plan9x.go
+++ b/x86/x86asm/plan9x.go
@@ -66,6 +66,10 @@ func GoSyntax(inst Inst, pc uint64, symname SymLookup) string {
 		s := inst.DataSize
 		if inst.MemBytes != 0 {
 			s = inst.MemBytes * 8
+		} else if inst.Args[1] == nil { // look for register-only 64-bit instruction, like PUSHQ AX
+			if r, ok := inst.Args[0].(Reg); ok && RAX <= r && r <= R15 {
+				s = 64
+			}
 		}
 		switch s {
 		case 8:
diff --git a/x86/x86asm/testdata/decode.txt b/x86/x86asm/testdata/decode.txt
index 604123f8..cbd536a8 100644
--- a/x86/x86asm/testdata/decode.txt
+++ b/x86/x86asm/testdata/decode.txt
@@ -2772,12 +2772,12 @@
 50|11223344556677885f5f5f5f5f5f5f	32	plan9	PUSHL AX
 50|11223344556677885f5f5f5f5f5f5f	64	gnu	push %rax
 50|11223344556677885f5f5f5f5f5f5f	64	intel	push rax
-50|11223344556677885f5f5f5f5f5f5f	64	plan9	PUSHL AX
+50|11223344556677885f5f5f5f5f5f5f	64	plan9	PUSHQ AX
 58|11223344556677885f5f5f5f5f5f5f	32	intel	pop eax
 58|11223344556677885f5f5f5f5f5f5f	32	plan9	POPL AX
 58|11223344556677885f5f5f5f5f5f5f	64	gnu	pop %rax
 58|11223344556677885f5f5f5f5f5f5f	64	intel	pop rax
-58|11223344556677885f5f5f5f5f5f5f	64	plan9	POPL AX
+58|11223344556677885f5f5f5f5f5f5f	64	plan9	POPQ AX
 60|11223344556677885f5f5f5f5f5f5f	32	intel	pushad
 60|11223344556677885f5f5f5f5f5f5f	32	plan9	PUSHAD
 60|11223344556677885f5f5f5f5f5f5f	64	gnu	error: unrecognized instruction

From fc48f9fe4c157e3ed95b38adbda9b9fe5a31cf03 Mon Sep 17 00:00:00 2001
From: Dan Kortschak <dan@kortschak.io>
Date: Sat, 9 Apr 2022 08:19:57 +0930
Subject: [PATCH 008/200] ppc64,x86: fix code generation notice

Change-Id: I6de117af0ae5f9ccb0dbecad53bebf6241a13e38
Reviewed-on: https://go-review.googlesource.com/c/arch/+/399274
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
---
 ppc64/ppc64asm/tables.go | 3 +--
 ppc64/ppc64map/map.go    | 3 +--
 x86/x86asm/tables.go     | 3 +--
 x86/x86map/map.go        | 3 +--
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/ppc64/ppc64asm/tables.go b/ppc64/ppc64asm/tables.go
index d42ab0f2..9e3b26f1 100644
--- a/ppc64/ppc64asm/tables.go
+++ b/ppc64/ppc64asm/tables.go
@@ -1,5 +1,4 @@
-// DO NOT EDIT
-// generated by: ppc64map -fmt=decoder ../pp64.csv
+// Code generated by ppc64map -fmt=decoder pp64.csv DO NOT EDIT.
 
 package ppc64asm
 
diff --git a/ppc64/ppc64map/map.go b/ppc64/ppc64map/map.go
index 0acad44d..8d345533 100644
--- a/ppc64/ppc64map/map.go
+++ b/ppc64/ppc64map/map.go
@@ -704,8 +704,7 @@ var funcBodyTmpl = template.Must(template.New("funcBody").Parse(``))
 func printDecoder(p *Prog) {
 	var buf bytes.Buffer
 
-	fmt.Fprintf(&buf, "// DO NOT EDIT\n")
-	fmt.Fprintf(&buf, "// generated by: ppc64map -fmt=decoder %s\n", inputFile)
+	fmt.Fprintf(&buf, "// Code generated by ppc64map -fmt=decoder %s DO NOT EDIT.\n", inputFile)
 	fmt.Fprintf(&buf, "\n")
 
 	fmt.Fprintf(&buf, "package ppc64asm\n\n")
diff --git a/x86/x86asm/tables.go b/x86/x86asm/tables.go
index af3fb73c..6f57c70b 100644
--- a/x86/x86asm/tables.go
+++ b/x86/x86asm/tables.go
@@ -1,5 +1,4 @@
-// DO NOT EDIT
-// generated by: x86map -fmt=decoder ../x86.csv
+// Code generated by x86map -fmt=decoder x86.csv DO NOT EDIT.
 
 package x86asm
 
diff --git a/x86/x86map/map.go b/x86/x86map/map.go
index 4f64c0c8..df8c68e5 100644
--- a/x86/x86map/map.go
+++ b/x86/x86map/map.go
@@ -665,8 +665,7 @@ func printDecoder(p *Prog) {
 		"PAUSE": true,
 	}
 	printDecoderPass(p, 1, false, opMap)
-	fmt.Printf("// DO NOT EDIT\n")
-	fmt.Printf("// generated by: x86map -fmt=decoder %s\n", inputFile)
+	fmt.Printf("// Code generated by x86map -fmt=decoder %s DO NOT EDIT.\n", inputFile)
 	fmt.Printf("\n")
 	fmt.Printf("package x86asm\n\n")
 	fmt.Printf("var decoder = [...]uint16{\n\tuint16(xFail),\n")

From 00200b7164a7c6d68f74efd99f51b6100ea0a97d Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@golang.org>
Date: Fri, 22 Jul 2022 11:10:03 -0400
Subject: [PATCH 009/200] A+C: delete AUTHORS and CONTRIBUTORS

In 2009, Google's open-source lawyers asked us to create the AUTHORS
file to define "The Go Authors", and the CONTRIBUTORS file was in
keeping with open source best practices of the time.

Re-reviewing our repos now in 2022, the open-source lawyers are
comfortable with source control history taking the place of the
AUTHORS file, and most open source projects no longer maintain
CONTRIBUTORS files.

To ease maintenance, remove AUTHORS and CONTRIBUTORS from all repos.

For golang/go#53961.

Change-Id: I6aadbd7aaeee54c143dcbd5de6db48731d48c178
Reviewed-on: https://go-review.googlesource.com/c/arch/+/418900
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
---
 AUTHORS      | 3 ---
 CONTRIBUTORS | 3 ---
 2 files changed, 6 deletions(-)
 delete mode 100644 AUTHORS
 delete mode 100644 CONTRIBUTORS

diff --git a/AUTHORS b/AUTHORS
deleted file mode 100644
index 2b00ddba..00000000
--- a/AUTHORS
+++ /dev/null
@@ -1,3 +0,0 @@
-# This source code refers to The Go Authors for copyright purposes.
-# The master list of authors is in the main Go distribution,
-# visible at https://tip.golang.org/AUTHORS.
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
deleted file mode 100644
index 1fbd3e97..00000000
--- a/CONTRIBUTORS
+++ /dev/null
@@ -1,3 +0,0 @@
-# This source code was written by the Go contributors.
-# The master list of contributors is in the main Go distribution,
-# visible at https://tip.golang.org/CONTRIBUTORS.

From 13eedde4113cee87b1eaaf9e20cd6d4f812bf18b Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Wed, 20 Jul 2022 13:26:21 -0500
Subject: [PATCH 010/200] ppc64/ppc64asm: fix objdump tests

In short, these tests create an object file from a list of opcodes,
and expect objdump to generate exactly as many decoded opcodes.

Unfortunately, objdump generates two opcode entries for each invalid
prefixed instruction, which causes the the testing code to deadlock
itself.

For example, objdump decodes an invalid form of paddi like:

    .long ...
    addi ...

instead of something like:

    .quadword ...

Work around this by examing the primary opcode of any entry which
objdump reports as ".long", and skip over the next word if the
primary opcode is "1" (the prefix opcode). The test skips over
".long" entries, so it will continue to work as expected.

Change-Id: I9dd0fda10683f666aace4140b63e81fc0fea2ad0
Reviewed-on: https://go-review.googlesource.com/c/arch/+/418857
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Paul Murphy <murp@ibm.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
---
 ppc64/ppc64asm/ext_test.go        | 19 ++++++++++++-------
 ppc64/ppc64asm/objdumpext_test.go | 18 +++++++++++-------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/ppc64/ppc64asm/ext_test.go b/ppc64/ppc64asm/ext_test.go
index f9242e1d..806701b2 100644
--- a/ppc64/ppc64asm/ext_test.go
+++ b/ppc64/ppc64asm/ext_test.go
@@ -40,7 +40,7 @@ var (
 // from an external disassembler's output.
 type ExtInst struct {
 	addr uint32
-	enc  [4]byte
+	enc  [8]byte
 	nenc int
 	text string
 }
@@ -200,20 +200,25 @@ func writeInst(generate func(func([]byte))) (file string, f *os.File, size int,
 	defer w.Flush()
 	size = 0
 	generate(func(x []byte) {
-		if len(x) > 4 {
-			x = x[:4]
+		if len(x) != 4 && len(x) != 8 {
+			panic(fmt.Sprintf("Unexpected instruction %v\n", x))
+		}
+		izeros := zeros
+		if len(x) == 4 {
+			// Only pad to 4 bytes for a 4 byte instruction word.
+			izeros = izeros[4:]
 		}
 		if debug {
-			fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
+			fmt.Printf("%#x: %x%x\n", start+size, x, izeros[len(x):])
 		}
 		w.Write(x)
-		w.Write(zeros[len(x):])
-		size += len(zeros)
+		w.Write(izeros[len(x):])
+		size += len(izeros)
 	})
 	return file, f, size, nil
 }
 
-var zeros = []byte{0, 0, 0, 0}
+var zeros = []byte{0, 0, 0, 0, 0, 0, 0, 0}
 
 // pad pads the code sequence with pops.
 func pad(enc []byte) []byte {
diff --git a/ppc64/ppc64asm/objdumpext_test.go b/ppc64/ppc64asm/objdumpext_test.go
index 37aa2573..033f6708 100644
--- a/ppc64/ppc64asm/objdumpext_test.go
+++ b/ppc64/ppc64asm/objdumpext_test.go
@@ -64,7 +64,7 @@ func objdump(ext *ExtDis) error {
 		reading bool
 		next    uint32 = start
 		addr    uint32
-		encbuf  [4]byte
+		encbuf  [8]byte
 		enc     []byte
 		text    string
 	)
@@ -88,15 +88,19 @@ func objdump(ext *ExtDis) error {
 				text = "error: unknown instruction"
 				enc = nil
 			}
-			if len(enc) == 4 {
-				// prints as word but we want to record bytes
-				enc[0], enc[3] = enc[3], enc[0]
-				enc[1], enc[2] = enc[2], enc[1]
+			// Prefixed instructions may not decode as expected if
+			// they are an invalid form. Some are tested in decode.txt.
+			// objdump treats these like two instructions.
+			//
+			// Look for primary opcode 1 and advance an exta 4 bytes if
+			// this failed to decode.
+			if strings.HasPrefix(text, ".long") && enc[0]>>2 == 1 {
+				next += 4
 			}
 			ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
-			encbuf = [4]byte{}
+			encbuf = [8]byte{}
+			next += uint32(len(enc))
 			enc = nil
-			next += 4
 		}
 	}
 	var textangle = []byte("<.text>:")

From ada1728cebaa682942b88353968347233f495ce7 Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Wed, 20 Jul 2022 13:47:02 -0500
Subject: [PATCH 011/200] ppc64/ppc64asm,ppc64map: fix BHRBE argument
 translation

BHRBE stands for "branch history rolling buffer entry". This is not
an SPR. Treat it as an unsigned immediate type argument.

Similarly, DCRN, SR, TMR, PMRN fields are no longer present in ISA 3.1,
they can be removed and nearby code simplified.

Fix ppc64map and update tables.go.

Change-Id: Ie779d24ae9d24541db6565ea169be0d80b893ff8
Reviewed-on: https://go-review.googlesource.com/c/arch/+/418858
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
---
 ppc64/ppc64asm/tables.go |  4 ++--
 ppc64/ppc64map/map.go    | 13 +++++--------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/ppc64/ppc64asm/tables.go b/ppc64/ppc64asm/tables.go
index 9e3b26f1..8d0a2431 100644
--- a/ppc64/ppc64asm/tables.go
+++ b/ppc64/ppc64asm/tables.go
@@ -2906,7 +2906,7 @@ var (
 	ap_FPReg_11_15                   = &argField{Type: TypeFPReg, Shift: 0, BitFields: BitFields{{11, 5, 0}}}
 	ap_ImmUnsigned_7_10              = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{7, 4, 0}}}
 	ap_ImmUnsigned_31_31             = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{31, 1, 0}}}
-	ap_SpReg_11_20                   = &argField{Type: TypeSpReg, Shift: 0, BitFields: BitFields{{11, 10, 0}}}
+	ap_ImmUnsigned_11_20             = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{11, 10, 0}}}
 	ap_ImmUnsigned_20_20             = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{20, 1, 0}}}
 	ap_ImmUnsigned_16_16             = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{16, 1, 0}}}
 	ap_ImmUnsigned_17_20             = &argField{Type: TypeImmUnsigned, Shift: 0, BitFields: BitFields{{17, 4, 0}}}
@@ -3883,7 +3883,7 @@ var instFormats = [...]instFormat{
 	{LXSSPX, 0xfc0007fe00000000, 0x7c00041800000000, 0x0, // Load VSX Scalar Single-Precision Indexed X-form (lxsspx XT,RA,RB)
 		[6]*argField{ap_VecSReg_31_31_6_10, ap_Reg_11_15, ap_Reg_16_20}},
 	{MFBHRBE, 0xfc0007fe00000000, 0x7c00025c00000000, 0x100000000, // Move From BHRB XFX-form (mfbhrbe RT,BHRBE)
-		[6]*argField{ap_Reg_6_10, ap_SpReg_11_20}},
+		[6]*argField{ap_Reg_6_10, ap_ImmUnsigned_11_20}},
 	{MFVSRD, 0xfc0007fe00000000, 0x7c00006600000000, 0xf80000000000, // Move From VSR Doubleword X-form (mfvsrd RA,XS)
 		[6]*argField{ap_Reg_11_15, ap_VecSReg_31_31_6_10}},
 	{MFVSRWZ, 0xfc0007fe00000000, 0x7c0000e600000000, 0xf80000000000, // Move From VSR Word and Zero X-form (mfvsrwz RA,XS)
diff --git a/ppc64/ppc64map/map.go b/ppc64/ppc64map/map.go
index 8d345533..4f90ed90 100644
--- a/ppc64/ppc64map/map.go
+++ b/ppc64/ppc64map/map.go
@@ -5,12 +5,13 @@
 // ppc64map constructs the ppc64 opcode map from the instruction set CSV file.
 //
 // Usage:
+//
 //	ppc64map [-fmt=format] ppc64.csv
 //
 // The known output formats are:
 //
-//  text (default) - print decoding tree in text form
-//  decoder - print decoding tables for the ppc64asm package
+//	text (default) - print decoding tree in text form
+//	decoder - print decoding tables for the ppc64asm package
 package main
 
 import (
@@ -423,7 +424,7 @@ func add(p *Prog, text, mnemonics, encoding, tags string) {
 					opr = "BD"
 				}
 
-			case "XMSK", "YMSK", "PMSK", "IX":
+			case "XMSK", "YMSK", "PMSK", "IX", "BHRBE":
 				typ = asm.TypeImmUnsigned
 
 			case "IMM32":
@@ -559,12 +560,8 @@ func add(p *Prog, text, mnemonics, encoding, tags string) {
 			case "VRA", "VRB", "VRC", "VRS", "VRT":
 				typ = asm.TypeVecReg
 
-			case "SPR", "DCRN", "BHRBE", "TBR", "SR", "TMR", "PMRN": // Note: if you add to this list and the register field needs special handling, add it to switch statement below
+			case "SPR", "TBR":
 				typ = asm.TypeSpReg
-				switch opr {
-				case "DCRN":
-					opr = "DCR"
-				}
 				if n := strings.ToLower(opr); n != opr && args.Find(n) >= 0 {
 					opr = n // spr[5:9] || spr[0:4]
 				}

From 44deed04936c31acd3d9306129a66498fb79d2ef Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Mon, 24 May 2021 16:22:57 -0500
Subject: [PATCH 012/200] ppc64/ppc64map: add encoder functionality

Use the ISA information to generate bits for supporting ISA 3.1
(POWER10) instructions. This creates a new file asm9_gtables.go
to allow assembly of instructions defined in pp64.csv.

This uses the input pp64.csv file to generate an encoding function
for each "type" of instruction. Some encoder functions can be
shared (e.x fpr/gpr/vsr opcodes which share similar encoding). These
are named based on the oldest instruction which uses the function,
like "type_xxspltiw".

All functions share two tables which store the fixed bits of an
instruction. Non-prefixed instructions use GenOpcodes exclusively,
prefixed opcodes use the GenPfxOpcodes table to hold the suffix
instruction word bits. These are used to populate the instruction
specific encoding bits for a particular type.

Likewise, the function opsetGen is created to map opcodes which share
identical argument types. This plugs into the buildop function in
asm9.go.

Change-Id: I50cddfcec86b667774af858fb8efe8910dfe80b8
Reviewed-on: https://go-review.googlesource.com/c/arch/+/350609
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Paul Murphy <murp@ibm.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Heschi Kreinick <heschi@google.com>
---
 ppc64/ppc64map/map.go | 538 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 528 insertions(+), 10 deletions(-)

diff --git a/ppc64/ppc64map/map.go b/ppc64/ppc64map/map.go
index 4f90ed90..31d692d4 100644
--- a/ppc64/ppc64map/map.go
+++ b/ppc64/ppc64map/map.go
@@ -12,6 +12,10 @@
 //
 //	text (default) - print decoding tree in text form
 //	decoder - print decoding tables for the ppc64asm package
+//	encoder - generate a self-contained file which can be used to encode
+//		  go obj.Progs into machine code
+//	asm - generate a gnu asm file which can be compiled by gcc containing
+//	      all opcodes discovered in ppc64.csv using macro friendly arguments.
 package main
 
 import (
@@ -20,6 +24,7 @@ import (
 	"flag"
 	"fmt"
 	gofmt "go/format"
+	asm "golang.org/x/arch/ppc64/ppc64asm"
 	"log"
 	"math/bits"
 	"os"
@@ -28,8 +33,6 @@ import (
 	"strconv"
 	"strings"
 	"text/template"
-
-	asm "golang.org/x/arch/ppc64/ppc64asm"
 )
 
 var format = flag.String("fmt", "text", "output format: text, decoder, asm")
@@ -37,6 +40,45 @@ var debug = flag.Bool("debug", false, "enable debugging output")
 
 var inputFile string
 
+type isaversion uint32
+
+const (
+	// Sort as supersets of each other. Generally speaking, each newer ISA
+	// supports a superset of the previous instructions with a few exceptions
+	// throughout.
+	ISA_P1 isaversion = iota
+	ISA_P2
+	ISA_PPC
+	ISA_V200
+	ISA_V201
+	ISA_V202
+	ISA_V203
+	ISA_V205
+	ISA_V206
+	ISA_V207
+	ISA_V30
+	ISA_V30B
+	ISA_V30C
+	ISA_V31
+)
+
+var isaToISA = map[string]isaversion{
+	"P1":    ISA_P1,
+	"P2":    ISA_P2,
+	"PPC":   ISA_PPC,
+	"v2.00": ISA_V200,
+	"v2.01": ISA_V201,
+	"v2.02": ISA_V202,
+	"v2.03": ISA_V203,
+	"v2.05": ISA_V205,
+	"v2.06": ISA_V206,
+	"v2.07": ISA_V207,
+	"v3.0":  ISA_V30,
+	"v3.0B": ISA_V30B,
+	"v3.0C": ISA_V30C,
+	"v3.1":  ISA_V31,
+}
+
 func usage() {
 	fmt.Fprintf(os.Stderr, "usage: ppc64map [-fmt=format] ppc64.csv\n")
 	os.Exit(2)
@@ -64,6 +106,8 @@ func main() {
 		print = printDecoder
 	case "asm":
 		print = printASM
+	case "encoder":
+		print = printEncoder
 	}
 
 	p, err := readCSV(flag.Arg(0))
@@ -104,15 +148,17 @@ func readCSV(file string) (*Prog, error) {
 }
 
 type Prog struct {
-	Insts    []Inst
-	OpRanges map[string]string
+	Insts     []Inst
+	OpRanges  map[string]string
+	nextOrder int // Next position value (used for Insts[x].order)
 }
 
 type Field struct {
-	Name      string
-	BitFields asm.BitFields
-	Type      asm.ArgType
-	Shift     uint8
+	Name          string
+	BitFields     asm.BitFields
+	BitFieldNames []string
+	Type          asm.ArgType
+	Shift         uint8
 }
 
 func (f Field) String() string {
@@ -130,6 +176,12 @@ type Inst struct {
 	SValue    uint32 // Likewise for the Value
 	SDontCare uint32 // Likewise for the DontCare bits
 	Fields    []Field
+	Words     int // Number of words instruction encodes to.
+	Isa       isaversion
+	memOp     bool // Is this a memory operation?
+	memOpX    bool // Is this an x-form memory operation?
+	memOpSt   bool // Is this a store memory operations?
+	order     int  // Position in pp64.csv.
 }
 
 func (i Inst) String() string {
@@ -330,7 +382,7 @@ func computeMaskValueReserved(args Args, text string) (mask, value, reserved uin
 // detected instructions into p. One entry may generate multiple intruction
 // entries as each extended mnemonic listed in text is treated like a unique
 // instruction.
-func add(p *Prog, text, mnemonics, encoding, tags string) {
+func add(p *Prog, text, mnemonics, encoding, isa string) {
 	// Parse encoding, building size and offset of each field.
 	// The first field in the encoding is the smallest offset.
 	// And note the MSB is bit 0, not bit 31.
@@ -340,12 +392,18 @@ func add(p *Prog, text, mnemonics, encoding, tags string) {
 	iword := int8(0)
 	ispfx := false
 
+	isaLevel, fnd := isaToISA[isa]
+	if !fnd {
+		log.Fatalf("%s: ISA level '%s' is unknown\n", text, isa)
+		return
+	}
+
 	// Is this a prefixed instruction?
 	if encoding[0] == ',' {
 		pfields := strings.Split(encoding, ",")[1:]
 
 		if len(pfields) != 2 {
-			fmt.Fprintf(os.Stderr, "%s: Prefixed instruction must be 2 words long.\n", text)
+			log.Fatalf("%s: Prefixed instruction must be 2 words long.\n", text)
 			return
 		}
 		pargs = parseFields(pfields[0], text, iword)
@@ -617,17 +675,30 @@ func add(p *Prog, text, mnemonics, encoding, tags string) {
 				f1.Offs, f1.Bits, f1.Word = uint8(args[i].Offs), uint8(args[i].Bits), uint8(args[i].Word)
 			}
 			field.BitFields.Append(f1)
+			field.BitFieldNames = append(field.BitFieldNames, opr)
 			if f2.Bits > 0 {
 				field.BitFields.Append(f2)
+				field.BitFieldNames = append(field.BitFieldNames, opr2)
 			}
 			if f3.Bits > 0 {
 				field.BitFields.Append(f3)
+				field.BitFieldNames = append(field.BitFieldNames, opr3)
 			}
 			inst.Fields = append(inst.Fields, field)
 		}
 		if *debug {
 			fmt.Printf("%v\n", inst)
 		}
+		inst.Isa = isaLevel
+		inst.memOp = hasMemoryArg(&inst)
+		inst.memOpX = inst.memOp && inst.Op[len(inst.Op)-1] == 'x'
+		inst.memOpSt = inst.memOp && strings.Contains(inst.Text, "Store")
+		inst.Words = 1
+		inst.order = p.nextOrder
+		p.nextOrder++
+		if ispfx {
+			inst.Words = 2
+		}
 		foundInst = append(foundInst, inst)
 	}
 
@@ -658,6 +729,453 @@ func printText(p *Prog) {
 	log.Fatal("-fmt=text not implemented")
 }
 
+// Some ISA instructions look like memory ops, but are not.
+var isNotMemopMap = map[string]bool{
+	"lxvkq": true,
+	"lvsl":  true,
+	"lvsr":  true,
+}
+
+// Some ISA instructions are memops, but are not described like "Load ..." or "Store ..."
+var isMemopMap = map[string]bool{}
+
+// Does this instruction contain a memory argument (e.g x-form load or d-form store)
+func hasMemoryArg(insn *Inst) bool {
+	return ((strings.HasPrefix(insn.Text, "Load") || strings.HasPrefix(insn.Text, "Store") ||
+		strings.HasPrefix(insn.Text, "Prefixed Load") || strings.HasPrefix(insn.Text, "Prefixed Store")) && !isNotMemopMap[insn.Op]) ||
+		isMemopMap[insn.Op]
+}
+
+// Generate a function which takes an obj.Proj and convert it into
+// machine code in the supplied buffer. These functions are used
+// by asm9.go.
+func insnEncFuncStr(insn *Inst, firstName [2]string) string {
+	buf := new(bytes.Buffer)
+	// Argument packing order.
+	// Note, if a2 is not a register type, it is skipped.
+	argOrder := []string{
+		"p.To",               // a6
+		"p.From",             // a1
+		"p",                  // a2
+		"p.RestArgs[0].Addr", // a3
+		"p.RestArgs[1].Addr", // a4
+		"p.RestArgs[2].Addr", // a5
+	}
+	if len(insn.Fields) > len(argOrder) {
+		log.Fatalf("cannot handle %v. Only %d args supported.", insn, len(argOrder))
+	}
+
+	// Does this field require an obj.Addr.Offset?
+	isImmediate := func(t asm.ArgType) bool {
+		return t == asm.TypeImmUnsigned || t == asm.TypeSpReg || t == asm.TypeImmSigned || t == asm.TypeOffset
+	}
+
+	if insn.memOp {
+		// Swap to/from arguments if we are generating
+		// for a store operation.
+		if insn.memOpSt {
+			// Otherwise, order first three args as: p.From, p.To, p.To
+			argOrder[0], argOrder[1] = argOrder[1], argOrder[0]
+		}
+		argOrder[2] = argOrder[1] // p.Reg is either an Index or Offset (X or D-form)
+	} else if len(insn.Fields) > 2 && isImmediate(insn.Fields[2].Type) {
+		// Delete the a2 argument if it is not a register type.
+		argOrder = append(argOrder[0:2], argOrder[3:]...)
+	}
+
+	fmt.Fprintf(buf, "// %s\n", insn.Encoding)
+	fmt.Fprintf(buf, "func type_%s(c *ctxt9, p *obj.Prog, t *Optab, out *[5]uint32) {\n", insn.Op)
+	if insn.Words > 1 {
+		fmt.Fprintf(buf, "o0 := GenPfxOpcodes[p.As - A%s]\n", firstName[1])
+	}
+	fmt.Fprintf(buf, "o%d := GenOpcodes[p.As - A%s]\n", insn.Words-1, firstName[0])
+
+	errCheck := ""
+	for j, atype := range insn.Fields {
+		itype := ".Reg"
+		if isImmediate(atype.Type) {
+			itype = ".Offset"
+		} else if insn.memOpX && atype.Name == "RA" {
+			// X-form memory operations encode RA as the index register of memory type arg.
+			itype = ".Index"
+		}
+
+		bitPos := uint64(0)
+		// VecSpReg is encoded as an even numbered VSR. It is implicitly shifted by 1.
+		if atype.Type == asm.TypeVecSpReg {
+			bitPos += 1
+		}
+		// Count the total number of bits to work backwards when shifting
+		for _, f := range atype.BitFields {
+			bitPos += uint64(f.Bits)
+		}
+		// Adjust for any shifting (e.g DQ/DS shifted instructions)
+		bitPos += uint64(atype.Shift)
+		bits := bitPos
+
+		// Generate code to twirl the respective bits into the correct position, and mask off extras.
+		for i, f := range atype.BitFields {
+			bitPos -= uint64(f.Bits)
+			argStr := argOrder[j] + itype
+			if bitPos != 0 {
+				argStr = fmt.Sprintf("(%s>>%d)", argStr, bitPos)
+			}
+			mask := (1 << uint64(f.Bits)) - 1
+			shift := 32 - uint64(f.Offs) - uint64(f.Bits)
+			fmt.Fprintf(buf, "o%d |= uint32(%s&0x%x)<<%d // %s\n", f.Word, argStr, mask, shift, atype.BitFieldNames[i])
+		}
+
+		// Generate a check to verify shifted inputs satisfy their constraints.
+		// For historical reasons this is not needed for 16 bit values shifted by 16. (i.e SI/UI constants in addis/xoris)
+		if atype.Shift != 0 && atype.Shift != 16 && bits != 32 {
+			arg := argOrder[j] + itype
+			mod := (1 << atype.Shift) - 1
+			errCheck += fmt.Sprintf("if %s & 0x%x != 0 {\n", arg, mod)
+			errCheck += fmt.Sprintf("c.ctxt.Diag(\"Constant 0x%%x (%%d) is not a multiple of %d\\n%%v\",%s,%s,p)\n", mod+1, arg, arg)
+			errCheck += fmt.Sprintf("}\n")
+		}
+		j++
+	}
+	buf.WriteString(errCheck)
+	if insn.Words > 1 {
+		fmt.Fprintf(buf, "out[1] = o1\n")
+	}
+	fmt.Fprintf(buf, "out[0] = o0\n")
+	fmt.Fprintf(buf, "}\n")
+	return buf.String()
+}
+
+// Generate a stringed name representing the type of arguments ISA
+// instruction needs to be encoded into a usable machine instruction
+func insnTypeStr(insn *Inst, uniqueRegTypes bool) string {
+	if len(insn.Fields) == 0 {
+		return "type_none"
+	}
+
+	ret := "type_"
+
+	// Tag store opcodes to give special treatment when generating
+	// assembler function. They encode similarly to their load analogues.
+	if insn.memOp {
+		if insn.memOpSt {
+			ret += "st_"
+		} else {
+			ret += "ld_"
+		}
+	}
+
+	// TODO: this is only sufficient for ISA3.1.
+	for _, atype := range insn.Fields {
+		switch atype.Type {
+		// Simple, register like 5 bit field (CR bit, FPR, GPR, VR)
+		case asm.TypeReg, asm.TypeFPReg, asm.TypeVecReg, asm.TypeCondRegBit:
+			if uniqueRegTypes {
+				ret += map[asm.ArgType]string{asm.TypeReg: "R", asm.TypeFPReg: "F", asm.TypeVecReg: "V", asm.TypeCondRegBit: "C"}[atype.Type]
+				// Handle even/odd pairs in FPR/GPR args. They encode as 5 bits too, but odd values are invalid.
+				if atype.Name[len(atype.Name)-1] == 'p' {
+					ret += "p"
+				}
+			} else {
+				ret += "R"
+			}
+		case asm.TypeMMAReg, asm.TypeCondRegField: // 3 bit register fields (MMA or CR field)
+			ret += "M"
+		case asm.TypeSpReg:
+			ret += "P"
+		case asm.TypeVecSReg: // VSX register (6 bits, usually split into 2 fields)
+			ret += "X"
+		case asm.TypeVecSpReg: // VSX register pair (5 bits, maybe split fields)
+			ret += "Y"
+		case asm.TypeImmSigned, asm.TypeOffset, asm.TypeImmUnsigned:
+			if atype.Type == asm.TypeImmUnsigned {
+				ret += "I"
+			} else {
+				ret += "S"
+			}
+			if atype.Shift != 0 {
+				ret += fmt.Sprintf("%d", atype.Shift)
+			}
+		default:
+			log.Fatalf("Unhandled type in insnTypeStr: %v\n", atype)
+		}
+
+		// And add bit packing info
+		for _, bf := range atype.BitFields {
+			ret += fmt.Sprintf("_%d_%d", bf.Word*32+bf.Offs, bf.Bits)
+		}
+	}
+	return ret
+}
+
+type AggInfo struct {
+	Insns []*Inst // List of instructions sharing this type
+	Typef string  // The generated function name matching this
+}
+
+// Generate an Optab entry for a set of instructions with identical argument types
+// and write it to buf.
+func genOptabEntry(ta *AggInfo, typeMap map[string]*Inst) string {
+	buf := new(bytes.Buffer)
+	fitArg := func(f *Field, i *Inst) string {
+		argToRegType := map[asm.ArgType]string{
+			// TODO: only complete for ISA 3.1
+			asm.TypeReg:          "C_REG",
+			asm.TypeCondRegField: "C_CREG",
+			asm.TypeCondRegBit:   "C_CRBIT",
+			asm.TypeFPReg:        "C_FREG",
+			asm.TypeVecReg:       "C_VREG",
+			asm.TypeVecSReg:      "C_VSREG",
+			asm.TypeVecSpReg:     "C_VSREG",
+			asm.TypeMMAReg:       "C_AREG",
+			asm.TypeSpReg:        "C_SPR",
+		}
+		if t, fnd := argToRegType[f.Type]; fnd {
+			if f.Name[len(f.Name)-1] == 'p' {
+				return t + "P"
+			}
+			return t
+		}
+		bits := f.Shift
+		for _, sf := range f.BitFields {
+			bits += sf.Bits
+		}
+		shift := ""
+		if f.Shift != 0 {
+			shift = fmt.Sprintf("S%d", f.Shift)
+		}
+		sign := "U"
+		if f.Type == asm.TypeImmSigned || f.Type == asm.TypeOffset {
+			sign = "S"
+			// DS/DQ offsets should explicitly test their offsets to ensure
+			// they are aligned correctly. This makes tracking down bad offset
+			// passed to the compiler more straightfoward.
+			if f.Type == asm.TypeOffset {
+				shift = ""
+			}
+		}
+		return fmt.Sprintf("C_%s%d%sCON", sign, bits, shift)
+	}
+	insn := ta.Insns[0]
+	args := [6]string{}
+	// Note, a2 is skipped if the second input argument does not map to a reg.
+	argOrder := []int{
+		5,
+		0,
+		1,
+		2,
+		3,
+		4}
+
+	i := 0
+	for _, j := range insn.Fields {
+		// skip a2 if it isn't a reg type.
+		at := fitArg(&j, insn)
+		if argOrder[i] == 1 && !strings.HasSuffix(at, "REG") {
+			i++
+		}
+		args[argOrder[i]] = at
+		i++
+	}
+
+	// Likewise, fixup memory operations. Combine imm + reg, reg + reg
+	// operations into memory type arguments.
+	if insn.memOp {
+		switch args[0] + " " + args[1] {
+		case "C_REG C_REG":
+			args[0] = "C_XOREG"
+		case "C_S16CON C_REG":
+			args[0] = "C_SOREG"
+		case "C_S34CON C_REG":
+			args[0] = "C_LOREG"
+		}
+		args[1] = ""
+		// Finally, fixup store operand ordering to match golang
+		if insn.memOpSt {
+			args[0], args[5] = args[5], args[0]
+		}
+
+	}
+	fmt.Fprintf(buf, "{as: A%s,", opName(insn.Op))
+	for i, s := range args {
+		if len(s) <= 0 {
+			continue
+		}
+		fmt.Fprintf(buf, "a%d: %s, ", i+1, s)
+	}
+	typef := typeMap[ta.Typef].Op
+
+	pfx := ""
+	if insn.Words > 1 {
+		pfx = " ispfx: true,"
+	}
+	fmt.Fprintf(buf, "asmout: type_%s,%s size: %d},\n", typef, pfx, insn.Words*4)
+	return buf.String()
+}
+
+// printEncoder implements the -fmt=encoder mode. This generates a go file named
+// asm9_gtables.go.new. It is self-contained and is called into by the PPC64
+// assembler routines.
+//
+// For now it is restricted to generating code for ISA 3.1 and newer, but it could
+// support older ISA versions with some work, and integration effort.
+func printEncoder(p *Prog) {
+	const minISA = ISA_V31
+
+	// The type map separates based on obj.Addr to a bit field.  Register types
+	// for GPR, FPR, VR pack identically, but are classified differently.
+	typeMap := map[string]*Inst{}
+	typeAggMap := map[string]*AggInfo{}
+	var oplistBuf bytes.Buffer
+	var opnameBuf bytes.Buffer
+
+	// The first opcode of 32 or 64 bits to appear in the opcode tables.
+	firstInsn := [2]string{}
+
+	// Sort the instructions by word size, then by ISA version, oldest to newest.
+	sort.Slice(p.Insts, func(i, j int) bool {
+		if p.Insts[i].Words != p.Insts[j].Words {
+			return p.Insts[i].Words < p.Insts[j].Words
+		}
+		return p.Insts[i].order > p.Insts[j].order
+	})
+
+	// Classify each opcode and it's arguments, and generate opcode name/enum values.
+	for i, insn := range p.Insts {
+		if insn.Isa < minISA {
+			continue
+		}
+		extra := ""
+		if firstInsn[insn.Words-1] == "" {
+			firstInsn[insn.Words-1] = opName(insn.Op)
+			if insn.Words == 1 {
+				extra = " = ALASTAOUT + iota"
+			}
+		}
+		opType := insnTypeStr(&insn, false)
+		opTypeOptab := insnTypeStr(&insn, true)
+		fmt.Fprintf(&oplistBuf, "A%s%s\n", opName(insn.Op), extra)
+		fmt.Fprintf(&opnameBuf, "\"%s\",\n", opName(insn.Op))
+		// Use the oldest instruction to name the encoder function.  Some names
+		// may change if minISA is lowered.
+		if _, fnd := typeMap[opType]; !fnd {
+			typeMap[opType] = &p.Insts[i]
+		}
+		at, fnd := typeAggMap[opTypeOptab]
+		if !fnd {
+			typeAggMap[opTypeOptab] = &AggInfo{[]*Inst{&p.Insts[i]}, opType}
+		} else {
+			at.Insns = append(at.Insns, &p.Insts[i])
+		}
+	}
+	fmt.Fprintf(&oplistBuf, "ALASTGEN\n")
+	fmt.Fprintf(&oplistBuf, "AFIRSTGEN = A%s\n", firstInsn[0])
+
+	// Sort type information before outputing to ensure stable ordering
+	targ := struct {
+		InputFile   string
+		Insts       []Inst
+		MinISA      isaversion
+		TypeAggList []*AggInfo
+		TypeList    []*Inst
+		FirstInsn   [2]string
+		TypeMap     map[string]*Inst
+		Oplist      string
+		Opnames     string
+	}{InputFile: inputFile, Insts: p.Insts, MinISA: minISA, FirstInsn: firstInsn, TypeMap: typeMap, Oplist: oplistBuf.String(), Opnames: opnameBuf.String()}
+	for _, v := range typeAggMap {
+		targ.TypeAggList = append(targ.TypeAggList, v)
+	}
+	for _, v := range typeMap {
+		targ.TypeList = append(targ.TypeList, v)
+	}
+	sort.Slice(targ.TypeAggList, func(i, j int) bool {
+		// Sort based on the first entry, it is the last to appear in Appendix F.
+		return targ.TypeAggList[i].Insns[0].Op < targ.TypeAggList[j].Insns[0].Op
+	})
+	sort.Slice(targ.TypeList, func(i, j int) bool {
+		return targ.TypeList[i].Op < targ.TypeList[j].Op
+	})
+
+	// Generate asm9_gtable.go from the following template.
+	asm9_gtable_go := `
+		// DO NOT EDIT
+		// generated by: ppc64map -fmt=encoder {{.InputFile}}
+
+		package ppc64
+
+		import (
+			"cmd/internal/obj"
+		)
+
+		const (
+			{{print $.Oplist -}}
+		)
+
+		var GenAnames = []string {
+			{{print $.Opnames -}}
+		}
+
+		var GenOpcodes = [...]uint32 {
+			{{range $v := .Insts}}{{if ge $v.Isa $.MinISA -}}
+			{{if (eq $v.Words 1)}}{{printf "0x%08x, // A%s" $v.Value  (opname $v.Op)}}
+			{{else}}              {{printf "0x%08x, // A%s" $v.SValue (opname $v.Op)}}
+			{{end}}{{end}}{{end -}}
+		}
+
+		var GenPfxOpcodes = [...]uint32 {
+			{{range $v := .Insts}}{{if and (ge $v.Isa $.MinISA) (eq $v.Words 2) -}}
+			{{printf "0x%08x, // A%s" $v.Value (opname $v.Op)}}
+			{{end}}{{end -}}
+		}
+
+		var optabGen = []Optab {
+			{{range $v := .TypeAggList -}}
+			{{genoptabentry $v $.TypeMap -}}
+			{{end -}}
+		}
+
+		{{range $v := .TypeList}}
+		{{genencoderfunc $v $.FirstInsn}}
+		{{end}}
+
+		func opsetGen(from obj.As) bool {
+			r0 := from & obj.AMask
+			switch from {
+			{{range $v := .TypeAggList -}}
+			case A{{opname (index $v.Insns 0).Op}}:
+				{{range $w := (slice $v.Insns 1) -}}
+				opset(A{{opname $w.Op}},r0)
+				{{end -}}
+			{{end -}}
+			default:
+				return false
+			}
+			return true
+		}
+	`
+	tmpl := template.New("asm9_gtable.go")
+	tmpl.Funcs(template.FuncMap{
+		"opname":         opName,
+		"genencoderfunc": insnEncFuncStr,
+		"genoptabentry":  genOptabEntry,
+	})
+	tmpl.Parse(asm9_gtable_go)
+
+	// Write and gofmt the new file.
+	var tbuf bytes.Buffer
+	if err := tmpl.Execute(&tbuf, targ); err != nil {
+		log.Fatal(err)
+	}
+	tout, err := gofmt.Source(tbuf.Bytes())
+	if err != nil {
+		fmt.Printf("%s", tbuf.Bytes())
+		log.Fatalf("gofmt error: %v", err)
+	}
+	if err := os.WriteFile("asm9_gtables.go.new", tout, 0666); err != nil {
+		log.Fatalf("Failed to create asm9_gtables.new: %v", err)
+	}
+}
+
 // printASM implements the -fmt=asm mode.  This prints out a gnu assembler file
 // which can be used to used to generate test output to verify the golang
 // disassembler's gnu output matches gnu binutils. This is used as an input to

From 2926576b28c0567946e1a16de13155f56d9790ea Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Fri, 16 Sep 2022 16:11:33 -0500
Subject: [PATCH 013/200] ppc64/ppc64asm: fix decoding of several CC opcodes

A few of them decoded to invalid plan9 opcodes, and
some did not swap there input arguments similar to
their non-CC variants.

Change-Id: I26b2b4e318891a75d9c4973bb88efaff8b188bbf
Reviewed-on: https://go-review.googlesource.com/c/arch/+/431475
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
---
 ppc64/ppc64asm/plan9.go            |  8 ++++++--
 ppc64/ppc64asm/testdata/decode.txt | 11 +++++++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/ppc64/ppc64asm/plan9.go b/ppc64/ppc64asm/plan9.go
index 88e8e1c7..5fe4077c 100644
--- a/ppc64/ppc64asm/plan9.go
+++ b/ppc64/ppc64asm/plan9.go
@@ -251,7 +251,7 @@ func reverseOperandOrder(op Op) bool {
 		return true
 	case FADDCC, FADDSCC, FSUBCC, FMULCC, FDIVCC, FDIVSCC:
 		return true
-	case OR, ORC, AND, ANDC, XOR, NAND, EQV, NOR, ANDCC, ORCC, XORCC, EQVCC, NORCC, NANDCC:
+	case OR, ORCC, ORC, ORCCC, AND, ANDCC, ANDC, ANDCCC, XOR, XORCC, NAND, NANDCC, EQV, EQVCC, NOR, NORCC:
 		return true
 	case SLW, SLWCC, SLD, SLDCC, SRW, SRAW, SRWCC, SRAWCC, SRD, SRDCC, SRAD, SRADCC:
 		return true
@@ -305,6 +305,7 @@ var plan9OpMap = map[Op]string{
 	ORI:       "OR",
 	ANDICC:    "ANDCC",
 	ANDC:      "ANDN",
+	ANDCCC:    "ANDNCC",
 	ADDEO:     "ADDEV",
 	ADDEOCC:   "ADDEVCC",
 	ADDO:      "ADDV",
@@ -321,8 +322,12 @@ var plan9OpMap = map[Op]string{
 	SUBFZECC:  "SUBZECC",
 	SUBFZEO:   "SUBZEV",
 	SUBFZEOCC: "SUBZEVCC",
+	SUBF:      "SUB",
 	SUBFC:     "SUBC",
+	SUBFCC:    "SUBCC",
+	SUBFCCC:   "SUBCCC",
 	ORC:       "ORN",
+	ORCCC:     "ORNCC",
 	MULLWO:    "MULLWV",
 	MULLWOCC:  "MULLWVCC",
 	MULLDO:    "MULLDV",
@@ -334,7 +339,6 @@ var plan9OpMap = map[Op]string{
 	ADDI:      "ADD",
 	MULLI:     "MULLD",
 	SRADI:     "SRAD",
-	SUBF:      "SUB",
 	STBCXCC:   "STBCCC",
 	STWCXCC:   "STWCCC",
 	STDCXCC:   "STDCCC",
diff --git a/ppc64/ppc64asm/testdata/decode.txt b/ppc64/ppc64asm/testdata/decode.txt
index 3213903d..54fcafd6 100644
--- a/ppc64/ppc64asm/testdata/decode.txt
+++ b/ppc64/ppc64asm/testdata/decode.txt
@@ -126,24 +126,31 @@ b4830002|	plan9	MOVHU R4,2(R3)
 7c032000|	plan9	CMPW R3,R4
 7c032040|	plan9	CMPWU R3,R4
 7ca41a14|	plan9	ADD R3,R4,R5
+7ca41a15|	plan9	ADDCC R3,R4,R5
 7ca41814|	plan9	ADDC R3,R4,R5
 7ca41815|	plan9	ADDCCC R3,R4,R5
 7c851838|	plan9	AND R3,R4,R5
-7c851878|	plan9	ANDN R3,R4,R5
 7c851839|	plan9	ANDCC R3,R4,R5
+7c851878|	plan9	ANDN R3,R4,R5
+7c851879|	plan9	ANDNCC R3,R4,R5
 7c851b78|	plan9	OR R3,R4,R5
-7c851b38|	plan9	ORN R3,R4,R5
 7c851b79|	plan9	ORCC R3,R4,R5
+7c851b38|	plan9	ORN R3,R4,R5
+7c851b39|	plan9	ORNCC R3,R4,R5
 7c851a78|	plan9	XOR R3,R4,R5
 7c851a79|	plan9	XORCC R3,R4,R5
 7c851bb8|	plan9	NAND R3,R4,R5
 7c851bb9|	plan9	NANDCC R3,R4,R5
 7c851a38|	plan9	EQV R3,R4,R5
 7c851a39|	plan9	EQVCC R3,R4,R5
+7c8300d0|	plan9	NEG R3,R4
+7c8300d1|	plan9	NEGCC R3,R4
 7c8518f8|	plan9	NOR R3,R4,R5
 7c8518f9|	plan9	NORCC R3,R4,R5
 7ca32050|	plan9	SUB R3,R4,R5
+7ca32051|	plan9	SUBCC R3,R4,R5
 7ca32010|	plan9	SUBC R3,R4,R5
+7ca32011|	plan9	SUBCCC R3,R4,R5
 7ca419d6|	plan9	MULLW R3,R4,R5
 7ca419d7|	plan9	MULLWCC R3,R4,R5
 7ca41896|	plan9	MULHW R3,R4,R5

From 6a65923eb7420206543da015f2de19bf506b164a Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Tue, 20 Sep 2022 16:52:35 -0500
Subject: [PATCH 014/200] ppc64/ppc64asm: improve PCrel argument decoding

If an object is built for PIE, CALL opcodes will target the symbol's
local entry point. When disassembling, we should print the symbol
name if the target is the symbol+8.

The local entry offset on PPC64 is almost always 0 or 8. For pure go,
it is always 0 or 8 today.

If a call looks like it targets a local entry, print it as
"CALL symbol+8(SB)".

Change-Id: I72a2f1eaafd226ed5466384c63040d2f375a541f
Reviewed-on: https://go-review.googlesource.com/c/arch/+/432166
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
---
 ppc64/ppc64asm/decode_test.go      | 18 +++++++++++++++++-
 ppc64/ppc64asm/objdump_test.go     |  4 ++++
 ppc64/ppc64asm/plan9.go            | 12 ++++++++++--
 ppc64/ppc64asm/testdata/decode.txt |  3 +++
 4 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/ppc64/ppc64asm/decode_test.go b/ppc64/ppc64asm/decode_test.go
index 3337312a..83a3acd3 100644
--- a/ppc64/ppc64asm/decode_test.go
+++ b/ppc64/ppc64asm/decode_test.go
@@ -31,6 +31,15 @@ func TestDecode(t *testing.T) {
 	}
 }
 
+// Provide a fake symbol to verify PCrel argument decoding.
+func symlookup(pc uint64) (string, uint64) {
+	foopc := uint64(0x100000)
+	if pc >= foopc && pc < foopc+0x10 {
+		return "foo", foopc
+	}
+	return "", 0
+}
+
 func decode(data []byte, t *testing.T, filename string) {
 	all := string(data)
 	// Simulate PC based on number of instructions found in the test file.
@@ -68,7 +77,14 @@ func decode(data []byte, t *testing.T, filename string) {
 			case "gnu":
 				out = GNUSyntax(inst, pc)
 			case "plan9":
-				out = GoSyntax(inst, pc, nil)
+				pc := pc
+				// Hack: Setting PC to 0 effectively transforms the PC relative address
+				// of CALL (bl) into an absolute address when decoding in GoSyntax. This
+				// simplifies the testing of symbol lookups via symlookup above.
+				if inst.Op == BL {
+					pc = 0
+				}
+				out = GoSyntax(inst, pc, symlookup)
 			default:
 				t.Errorf("unknown syntax %q", syntax)
 				continue
diff --git a/ppc64/ppc64asm/objdump_test.go b/ppc64/ppc64asm/objdump_test.go
index e89146e5..414fada0 100644
--- a/ppc64/ppc64asm/objdump_test.go
+++ b/ppc64/ppc64asm/objdump_test.go
@@ -47,6 +47,10 @@ func allowedMismatchObjdump(text string, size int, inst *Inst, dec ExtInst) bool
 		return true
 	case SYNC, WAIT, RFEBB: // ISA 3.1 adds more bits and extended mnemonics for these book ii instructions.
 		return true
+	case BL:
+		// TODO: Ignore these for now. The output format from gnu objdump is dependent on more than the
+		// instruction itself e.g: decode(48100009) = "bl 0x100008", 4, want "bl .+0x100008", 4
+		return true
 	}
 
 	if len(dec.enc) >= 4 {
diff --git a/ppc64/ppc64asm/plan9.go b/ppc64/ppc64asm/plan9.go
index 5fe4077c..4bd1c7fc 100644
--- a/ppc64/ppc64asm/plan9.go
+++ b/ppc64/ppc64asm/plan9.go
@@ -168,8 +168,9 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 }
 
 // plan9Arg formats arg (which is the argIndex's arg in inst) according to Plan 9 rules.
+//
 // NOTE: because Plan9Syntax is the only caller of this func, and it receives a copy
-//       of inst, it's ok to modify inst.Args here.
+// of inst, it's ok to modify inst.Args here.
 func plan9Arg(inst *Inst, argIndex int, pc uint64, arg Arg, symname func(uint64) (string, uint64)) string {
 	// special cases for load/store instructions
 	if _, ok := arg.(Offset); ok {
@@ -211,9 +212,16 @@ func plan9Arg(inst *Inst, argIndex int, pc uint64, arg Arg, symname func(uint64)
 		return fmt.Sprintf("SPR(%d)", int(arg))
 	case PCRel:
 		addr := pc + uint64(int64(arg))
-		if s, base := symname(addr); s != "" && base == addr {
+		s, base := symname(addr)
+		if s != "" && addr == base {
 			return fmt.Sprintf("%s(SB)", s)
 		}
+		if inst.Op == BL && s != "" && (addr-base) == 8 {
+			// When decoding an object built for PIE, a CALL targeting
+			// a global entry point will be adjusted to the local entry
+			// if any. For now, assume any symname+8 PC is a local call.
+			return fmt.Sprintf("%s+%d(SB)", s, addr-base)
+		}
 		return fmt.Sprintf("%#x", addr)
 	case Label:
 		return fmt.Sprintf("%#x", int(arg))
diff --git a/ppc64/ppc64asm/testdata/decode.txt b/ppc64/ppc64asm/testdata/decode.txt
index 54fcafd6..7bf4355e 100644
--- a/ppc64/ppc64asm/testdata/decode.txt
+++ b/ppc64/ppc64asm/testdata/decode.txt
@@ -469,6 +469,9 @@ f0400fe0|	plan9	XVCVSXDDP VS1,VS2
 7c6802a6|	plan9	MOVD LR,R3
 7c6902a6|	plan9	MOVD CTR,R3
 4c8c0000|	plan9	MOVFL CR3,CR1
+48100001|	plan9	CALL foo(SB)
+48100009|	plan9	CALL foo+8(SB)
+4810000d|	plan9	CALL 0x10000c
 7c6803a6|	gnu	mtlr r3
 7c6802a6|	gnu	mflr r3
 7c6903a6|	gnu	mtctr r3

From e1262b008e86e4edcd9b1cafa48f5a7c113905e0 Mon Sep 17 00:00:00 2001
From: cui fliter <imcusg@gmail.com>
Date: Wed, 7 Sep 2022 13:19:54 +0000
Subject: [PATCH 015/200] all: remove redundant type conversion

Change-Id: I1bff578bdcacac6ea471ed9effb9d9ade573d813
GitHub-Last-Rev: 43904f8dd8f08028af8870fb8de4c1662594887e
GitHub-Pull-Request: golang/arch#6
Reviewed-on: https://go-review.googlesource.com/c/arch/+/428983
Run-TryBot: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 arm/armasm/decode.go    | 8 +++++---
 ppc64/ppc64asm/field.go | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arm/armasm/decode.go b/arm/armasm/decode.go
index 6b4d7384..f61ac124 100644
--- a/arm/armasm/decode.go
+++ b/arm/armasm/decode.go
@@ -17,7 +17,9 @@ import (
 // If x matches the format, then the rest of the fields describe how to interpret x.
 // The opBits describe bits that should be extracted from x and added to the opcode.
 // For example opBits = 0x1234 means that the value
+//
 //	(2 bits at offset 1) followed by (4 bits at offset 3)
+//
 // should be added to op.
 // Finally the args describe how to decode the instruction arguments.
 // args is stored as a fixed-size array; if there are fewer than len(args) arguments,
@@ -233,9 +235,9 @@ func decodeArg(aop instArg, x uint32) Arg {
 		typ, count := decodeShift(x)
 		// ROR #0 here means ROR #0, but decodeShift rewrites to RRX #1.
 		if typ == RotateRightExt {
-			return Reg(Rm)
+			return Rm
 		}
-		return RegShift{Rm, typ, uint8(count)}
+		return RegShift{Rm, typ, count}
 
 	case arg_R_shift_R:
 		Rm := Reg(x & (1<<4 - 1))
@@ -249,7 +251,7 @@ func decodeArg(aop instArg, x uint32) Arg {
 		if typ == ShiftLeft && count == 0 {
 			return Reg(Rm)
 		}
-		return RegShift{Rm, typ, uint8(count)}
+		return RegShift{Rm, typ, count}
 
 	case arg_R1_0:
 		return Reg((x & (1<<4 - 1)))
diff --git a/ppc64/ppc64asm/field.go b/ppc64/ppc64asm/field.go
index 882c91ae..13df063b 100644
--- a/ppc64/ppc64asm/field.go
+++ b/ppc64/ppc64asm/field.go
@@ -67,7 +67,7 @@ func (bs *BitFields) Append(b BitField) {
 // the sequence of bitfields is reasonable.
 func (bs BitFields) parse(i [2]uint32) (u uint64, Bits uint8) {
 	for _, b := range bs {
-		u = (uint64(u) << b.Bits) | uint64(b.Parse(i))
+		u = (u << b.Bits) | uint64(b.Parse(i))
 		Bits += b.Bits
 	}
 	return u, Bits

From 1bb480fc256aacee6555e668dedebd1f8225c946 Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Wed, 6 Jul 2022 15:19:45 -0500
Subject: [PATCH 016/200] ppc64/ppc64asm: add ISA 3.1B support

The new ISA fixes a couple typos, and adds special hashing
instructions to support ROP exploitation.

The hash instructions encode a negative offset in a novel
way which requires a bit of special handling.

Change-Id: I9491e10ac87efe37d93b6efaf7f108ae3a4402fd
Reviewed-on: https://go-review.googlesource.com/c/arch/+/418859
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Joedian Reid <joedian@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Paul Murphy <murp@ibm.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
---
 ppc64/pp64.csv                               | 12 +++--
 ppc64/ppc64asm/decode.go                     | 16 ++++++-
 ppc64/ppc64asm/field.go                      |  9 ++++
 ppc64/ppc64asm/field_test.go                 | 32 +++++++------
 ppc64/ppc64asm/plan9.go                      |  4 +-
 ppc64/ppc64asm/tables.go                     | 25 ++++++++--
 ppc64/ppc64asm/testdata/decode.txt           | 10 ++++
 ppc64/ppc64asm/testdata/decode_generated.txt | 15 +++---
 ppc64/ppc64map/map.go                        | 48 ++++++++++++++++++--
 ppc64/ppc64util/hack.h                       |  3 ++
 10 files changed, 141 insertions(+), 33 deletions(-)

diff --git a/ppc64/pp64.csv b/ppc64/pp64.csv
index 3150cade..b2aa6b37 100644
--- a/ppc64/pp64.csv
+++ b/ppc64/pp64.csv
@@ -1,4 +1,4 @@
-# POWER ISA 3.1 instruction description.
+# POWER ISA 3.1B instruction description.
 #
 # This file contains comment lines, each beginning with #,
 # followed by entries in CSV format.
@@ -13,8 +13,12 @@
 #    a list of sequences of the form (,sequence)+. A leading comma is used to signify an
 #    instruction encoding requiring multiple instruction words.
 # The fourth field represents the ISA version where the instruction was introduced as
-# stated in Appendix F. of ISA 3.1
+# stated in Appendix F. of ISA 3.1B
 #
+"Hash Check X-form","hashchk RB,offset(RA)","31@0|D@6|RA@11|RB@16|754@21|DX@31|","v3.1B"
+"Hash Check Privileged X-form","hashchkp RB,offset(RA)","31@0|D@6|RA@11|RB@16|690@21|DX@31|","v3.1B"
+"Hash Store X-form","hashst RB,offset(RA)","31@0|D@6|RA@11|RB@16|722@21|DX@31|","v3.1B"
+"Hash Store Privileged X-form","hashstp RB,offset(RA)","31@0|D@6|RA@11|RB@16|658@21|DX@31|","v3.1B"
 "Byte-Reverse Doubleword X-form","brd RA,RS","31@0|RS@6|RA@11|///@16|187@21|/@31|","v3.1"
 "Byte-Reverse Halfword X-form","brh RA,RS","31@0|RS@6|RA@11|///@16|219@21|/@31|","v3.1"
 "Byte-Reverse Word X-form","brw RA,RS","31@0|RS@6|RA@11|///@16|155@21|/@31|","v3.1"
@@ -209,7 +213,7 @@
 "VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive accumulate XX3-form","xvbf16ger2np AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|114@21|AX@29|BX@30|/@31|","v3.1"
 "VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative accumulate XX3-form","xvbf16ger2pn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|178@21|AX@29|BX@30|/@31|","v3.1"
 "VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate XX3-form","xvbf16ger2pp AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|50@21|AX@29|BX@30|/@31|","v3.1"
-"VSX Vector Convert bfloat16 to Single-Precision format XX2-form","xvcvbf16spn XT,XB","60@0|T@6|16@11|B@16|475@21|BX@30|TX@31|","v3.1"
+"VSX Vector Convert bfloat16 to Single-Precision format Non-signaling XX2-form","xvcvbf16spn XT,XB","60@0|T@6|16@11|B@16|475@21|BX@30|TX@31|","v3.1"
 "VSX Vector Convert with round Single-Precision to bfloat16 format XX2-form","xvcvspbf16 XT,XB","60@0|T@6|17@11|B@16|475@21|BX@30|TX@31|","v3.1"
 "VSX Vector 16-bit Floating-Point GER (rank-2 update) XX3-form","xvf16ger2 AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|19@21|AX@29|BX@30|/@31|","v3.1"
 "VSX Vector 16-bit Floating-Point GER (rank-2 update) Negative multiply, Negative accumulate XX3-form","xvf16ger2nn AT,XA,XB","59@0|AT@6|//@9|A@11|B@16|210@21|AX@29|BX@30|/@31|","v3.1"
@@ -1034,7 +1038,7 @@
 "Add Carrying XO-form","addc RT,RA,RB (OE=0 Rc=0)|addc. RT,RA,RB (OE=0 Rc=1)|addco RT,RA,RB (OE=1 Rc=0)|addco. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|10@22|Rc@31|","P1"
 "Add Extended XO-form","adde RT,RA,RB (OE=0 Rc=0)|adde. RT,RA,RB (OE=0 Rc=1)|addeo RT,RA,RB (OE=1 Rc=0)|addeo. RT,RA,RB (OE=1 Rc=1)","31@0|RT@6|RA@11|RB@16|OE@21|138@22|Rc@31|","P1"
 "Add Immediate D-form","addi RT,RA,SI|li RT,SI (RA=0)","14@0|RT@6|RA@11|SI@16|","P1"
-"Add Immediate Carrying D-formy","addic RT,RA,SI","12@0|RT@6|RA@11|SI@16|","P1"
+"Add Immediate Carrying D-form","addic RT,RA,SI","12@0|RT@6|RA@11|SI@16|","P1"
 "Add Immediate Carrying and Record D-form","addic. RT,RA,SI","13@0|RT@6|RA@11|SI@16|","P1"
 "Add Immediate Shifted D-form","addis RT,RA,SI|lis RT,SI (RA=0)","15@0|RT@6|RA@11|SI@16|","P1"
 "Add to Minus One Extended XO-form","addme RT,RA (OE=0 Rc=0)|addme. RT,RA (OE=0 Rc=1)|addmeo RT,RA (OE=1 Rc=0)|addmeo. RT,RA (OE=1 Rc=1)","31@0|RT@6|RA@11|///@16|OE@21|234@22|Rc@31|","P1"
diff --git a/ppc64/ppc64asm/decode.go b/ppc64/ppc64asm/decode.go
index 59bd3258..b8d857c6 100644
--- a/ppc64/ppc64asm/decode.go
+++ b/ppc64/ppc64asm/decode.go
@@ -22,9 +22,12 @@ const prefixOpcode = 1
 // The Args are stored in the same order as the instruction manual.
 //
 // Prefixed instructions are stored as:
-//   prefix << 32 | suffix,
+//
+//	prefix << 32 | suffix,
+//
 // Regular instructions are:
-//   inst << 32
+//
+//	inst << 32
 type instFormat struct {
 	Op       Op
 	Mask     uint64
@@ -77,6 +80,12 @@ func (a argField) Parse(i [2]uint32) Arg {
 		return Label(a.BitFields.ParseSigned(i) << a.Shift)
 	case TypeOffset:
 		return Offset(a.BitFields.ParseSigned(i) << a.Shift)
+	case TypeNegOffset:
+		// An oddball encoding of offset for hashchk and similar.
+		// e.g hashchk offset is 0b1111111000000000 | DX << 8 | D << 3
+		off := a.BitFields.ParseSigned(i) << a.Shift
+		neg := int64(-1) << (int(a.Shift) + a.BitFields.NumBits())
+		return Offset(neg | off)
 	}
 }
 
@@ -98,6 +107,7 @@ const (
 	TypeImmSigned            // signed immediate
 	TypeImmUnsigned          // unsigned immediate/flag/mask, this is the catch-all type
 	TypeOffset               // signed offset in load/store
+	TypeNegOffset            // A negative 16 bit value 0b1111111xxxxx000 encoded as 0bxxxxx (e.g in the hashchk instruction)
 	TypeLast                 // must be the last one
 )
 
@@ -135,6 +145,8 @@ func (t ArgType) String() string {
 		return "Label"
 	case TypeOffset:
 		return "Offset"
+	case TypeNegOffset:
+		return "NegOffset"
 	}
 }
 
diff --git a/ppc64/ppc64asm/field.go b/ppc64/ppc64asm/field.go
index 13df063b..37794460 100644
--- a/ppc64/ppc64asm/field.go
+++ b/ppc64/ppc64asm/field.go
@@ -86,3 +86,12 @@ func (bs BitFields) ParseSigned(i [2]uint32) int64 {
 	u, l := bs.parse(i)
 	return int64(u) << (64 - l) >> (64 - l)
 }
+
+// Count the number of bits in the aggregate BitFields
+func (bs BitFields) NumBits() int {
+	num := 0
+	for _, b := range bs {
+		num += int(b.Bits)
+	}
+	return num
+}
diff --git a/ppc64/ppc64asm/field_test.go b/ppc64/ppc64asm/field_test.go
index 01402b59..ce18ad50 100644
--- a/ppc64/ppc64asm/field_test.go
+++ b/ppc64/ppc64asm/field_test.go
@@ -65,26 +65,29 @@ func TestBitFields(t *testing.T) {
 		i    [2]uint32 // input
 		u    uint64    // unsigned output
 		s    int64     // signed output
+		nb   int       // Total number of bits in BitField
 		fail bool      // if the check should panic
 	}{
-		{BitFields{{0, 0, 1}}, [2]uint32{0, 0}, 0, 0, true},
-		{BitFields{{31, 2, 1}}, [2]uint32{0, 0}, 0, 0, true},
-		{BitFields{{31, 1, 1}}, [2]uint32{0, 1}, 1, -1, false},
-		{BitFields{{29, 2, 1}}, [2]uint32{0, 0 << 1}, 0, 0, false},
-		{BitFields{{29, 2, 1}}, [2]uint32{0, 1 << 1}, 1, 1, false},
-		{BitFields{{29, 2, 1}}, [2]uint32{0, 2 << 1}, 2, -2, false},
-		{BitFields{{29, 2, 1}}, [2]uint32{0, 3 << 1}, 3, -1, false},
-		{BitFields{{0, 32, 1}}, [2]uint32{0, 1<<32 - 1}, 1<<32 - 1, -1, false},
-		{BitFields{{16, 3, 1}}, [2]uint32{0, 1 << 15}, 4, -4, false},
-		{BitFields{{16, 16, 0}, {16, 16, 1}}, [2]uint32{0x8016, 0x32}, 0x80160032, -0x7FE9FFCE, false},
-		{BitFields{{14, 18, 0}, {16, 16, 1}}, [2]uint32{0x38016, 0x32}, 0x380160032, -0x07FE9FFCE, false},
+		{BitFields{{0, 0, 1}}, [2]uint32{0, 0}, 0, 0, 0, true},
+		{BitFields{{31, 2, 1}}, [2]uint32{0, 0}, 0, 0, 2, true},
+		{BitFields{{31, 1, 1}}, [2]uint32{0, 1}, 1, -1, 1, false},
+		{BitFields{{29, 2, 1}}, [2]uint32{0, 0 << 1}, 0, 0, 2, false},
+		{BitFields{{29, 2, 1}}, [2]uint32{0, 1 << 1}, 1, 1, 2, false},
+		{BitFields{{29, 2, 1}}, [2]uint32{0, 2 << 1}, 2, -2, 2, false},
+		{BitFields{{29, 2, 1}}, [2]uint32{0, 3 << 1}, 3, -1, 2, false},
+		{BitFields{{0, 32, 1}}, [2]uint32{0, 1<<32 - 1}, 1<<32 - 1, -1, 32, false},
+		{BitFields{{16, 3, 1}}, [2]uint32{0, 1 << 15}, 4, -4, 3, false},
+		{BitFields{{16, 16, 0}, {16, 16, 1}}, [2]uint32{0x8016, 0x32}, 0x80160032, -0x7FE9FFCE, 32, false},
+		{BitFields{{14, 18, 0}, {16, 16, 1}}, [2]uint32{0x38016, 0x32}, 0x380160032, -0x07FE9FFCE, 34, false},
 	}
 	for i, tst := range tests {
 		var (
-			ou uint64
-			os int64
+			ou  uint64
+			os  int64
+			onb int
 		)
 		failed := panicOrNot(func() {
+			onb = tst.b.NumBits()
 			ou = tst.b.Parse(tst.i)
 			os = tst.b.ParseSigned(tst.i)
 		})
@@ -99,5 +102,8 @@ func TestBitFields(t *testing.T) {
 		if os != tst.s {
 			t.Errorf("case %d: %v.ParseSigned(%d) returned %d, expected %d", i, tst.b, tst.i, os, tst.s)
 		}
+		if onb != tst.nb {
+			t.Errorf("case %d: %v.NumBits() returned %d, expected %d", i, tst.b, onb, tst.nb)
+		}
 	}
 }
diff --git a/ppc64/ppc64asm/plan9.go b/ppc64/ppc64asm/plan9.go
index 4bd1c7fc..fcb2a128 100644
--- a/ppc64/ppc64asm/plan9.go
+++ b/ppc64/ppc64asm/plan9.go
@@ -83,7 +83,9 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		STH, STHU,
 		STW, STWU,
 		STD, STDU,
-		STQ, STFD, STFDU, STFS, STFSU:
+		STFD, STFDU,
+		STFS, STFSU,
+		STQ, HASHST, HASHSTP:
 		return op + " " + strings.Join(args, ",")
 
 	case FCMPU, FCMPO, CMPD, CMPDI, CMPLD, CMPLDI, CMPW, CMPWI, CMPLW, CMPLWI:
diff --git a/ppc64/ppc64asm/tables.go b/ppc64/ppc64asm/tables.go
index 8d0a2431..8705077b 100644
--- a/ppc64/ppc64asm/tables.go
+++ b/ppc64/ppc64asm/tables.go
@@ -1,9 +1,13 @@
-// Code generated by ppc64map -fmt=decoder pp64.csv DO NOT EDIT.
+// Code generated by ppc64map -fmt=decoder ../pp64.csv DO NOT EDIT.
 
 package ppc64asm
 
 const (
 	_ Op = iota
+	HASHCHK
+	HASHCHKP
+	HASHST
+	HASHSTP
 	BRD
 	BRH
 	BRW
@@ -1420,6 +1424,10 @@ const (
 )
 
 var opstr = [...]string{
+	HASHCHK:        "hashchk",
+	HASHCHKP:       "hashchkp",
+	HASHST:         "hashst",
+	HASHSTP:        "hashstp",
 	BRD:            "brd",
 	BRH:            "brh",
 	BRW:            "brw",
@@ -2836,9 +2844,10 @@ var opstr = [...]string{
 }
 
 var (
+	ap_Reg_16_20                     = &argField{Type: TypeReg, Shift: 0, BitFields: BitFields{{16, 5, 0}}}
+	ap_NegOffset_31_31_6_10_shift3   = &argField{Type: TypeNegOffset, Shift: 3, BitFields: BitFields{{31, 1, 0}, {6, 5, 0}}}
 	ap_Reg_11_15                     = &argField{Type: TypeReg, Shift: 0, BitFields: BitFields{{11, 5, 0}}}
 	ap_Reg_6_10                      = &argField{Type: TypeReg, Shift: 0, BitFields: BitFields{{6, 5, 0}}}
-	ap_Reg_16_20                     = &argField{Type: TypeReg, Shift: 0, BitFields: BitFields{{16, 5, 0}}}
 	ap_FPReg_6_10                    = &argField{Type: TypeFPReg, Shift: 0, BitFields: BitFields{{6, 5, 0}}}
 	ap_VecReg_16_20                  = &argField{Type: TypeVecReg, Shift: 0, BitFields: BitFields{{16, 5, 0}}}
 	ap_VecReg_6_10                   = &argField{Type: TypeVecReg, Shift: 0, BitFields: BitFields{{6, 5, 0}}}
@@ -2942,6 +2951,14 @@ var (
 )
 
 var instFormats = [...]instFormat{
+	{HASHCHK, 0xfc0007fe00000000, 0x7c0005e400000000, 0x0, // Hash Check X-form (hashchk RB,offset(RA))
+		[6]*argField{ap_Reg_16_20, ap_NegOffset_31_31_6_10_shift3, ap_Reg_11_15}},
+	{HASHCHKP, 0xfc0007fe00000000, 0x7c00056400000000, 0x0, // Hash Check Privileged X-form (hashchkp RB,offset(RA))
+		[6]*argField{ap_Reg_16_20, ap_NegOffset_31_31_6_10_shift3, ap_Reg_11_15}},
+	{HASHST, 0xfc0007fe00000000, 0x7c0005a400000000, 0x0, // Hash Store X-form (hashst RB,offset(RA))
+		[6]*argField{ap_Reg_16_20, ap_NegOffset_31_31_6_10_shift3, ap_Reg_11_15}},
+	{HASHSTP, 0xfc0007fe00000000, 0x7c00052400000000, 0x0, // Hash Store Privileged X-form (hashstp RB,offset(RA))
+		[6]*argField{ap_Reg_16_20, ap_NegOffset_31_31_6_10_shift3, ap_Reg_11_15}},
 	{BRD, 0xfc0007fe00000000, 0x7c00017600000000, 0xf80100000000, // Byte-Reverse Doubleword X-form (brd RA,RS)
 		[6]*argField{ap_Reg_11_15, ap_Reg_6_10}},
 	{BRH, 0xfc0007fe00000000, 0x7c0001b600000000, 0xf80100000000, // Byte-Reverse Halfword X-form (brh RA,RS)
@@ -3344,7 +3361,7 @@ var instFormats = [...]instFormat{
 		[6]*argField{ap_MMAReg_6_8, ap_VecSReg_29_29_11_15, ap_VecSReg_30_30_16_20}},
 	{XVBF16GER2PP, 0xfc0007f800000000, 0xec00019000000000, 0x60000100000000, // VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive accumulate XX3-form (xvbf16ger2pp AT,XA,XB)
 		[6]*argField{ap_MMAReg_6_8, ap_VecSReg_29_29_11_15, ap_VecSReg_30_30_16_20}},
-	{XVCVBF16SPN, 0xfc1f07fc00000000, 0xf010076c00000000, 0x0, // VSX Vector Convert bfloat16 to Single-Precision format XX2-form (xvcvbf16spn XT,XB)
+	{XVCVBF16SPN, 0xfc1f07fc00000000, 0xf010076c00000000, 0x0, // VSX Vector Convert bfloat16 to Single-Precision format Non-signaling XX2-form (xvcvbf16spn XT,XB)
 		[6]*argField{ap_VecSReg_31_31_6_10, ap_VecSReg_30_30_16_20}},
 	{XVCVSPBF16, 0xfc1f07fc00000000, 0xf011076c00000000, 0x0, // VSX Vector Convert with round Single-Precision to bfloat16 format XX2-form (xvcvspbf16 XT,XB)
 		[6]*argField{ap_VecSReg_31_31_6_10, ap_VecSReg_30_30_16_20}},
@@ -5334,7 +5351,7 @@ var instFormats = [...]instFormat{
 		[6]*argField{ap_Reg_6_10, ap_ImmSigned_16_31}},
 	{ADDI, 0xfc00000000000000, 0x3800000000000000, 0x0, // Add Immediate D-form (addi RT,RA,SI)
 		[6]*argField{ap_Reg_6_10, ap_Reg_11_15, ap_ImmSigned_16_31}},
-	{ADDIC, 0xfc00000000000000, 0x3000000000000000, 0x0, // Add Immediate Carrying D-formy (addic RT,RA,SI)
+	{ADDIC, 0xfc00000000000000, 0x3000000000000000, 0x0, // Add Immediate Carrying D-form (addic RT,RA,SI)
 		[6]*argField{ap_Reg_6_10, ap_Reg_11_15, ap_ImmSigned_16_31}},
 	{ADDICCC, 0xfc00000000000000, 0x3400000000000000, 0x0, // Add Immediate Carrying and Record D-form (addic. RT,RA,SI)
 		[6]*argField{ap_Reg_6_10, ap_Reg_11_15, ap_ImmSigned_16_31}},
diff --git a/ppc64/ppc64asm/testdata/decode.txt b/ppc64/ppc64asm/testdata/decode.txt
index 7bf4355e..ef5c90e0 100644
--- a/ppc64/ppc64asm/testdata/decode.txt
+++ b/ppc64/ppc64asm/testdata/decode.txt
@@ -873,3 +873,13 @@ fc811000|	plan9	FCMPU F1,F2,CR1
 7c2311b8|	plan9	CFUGED R1,R2,R3
 04100016e4820032|	gnu	.quad 0x4100016e4820032
 0612000138820007|	gnu	.quad 0x612000138820007
+7fe20de5|	plan9	HASHCHK -8(R2),R1
+7fe20da5|	plan9	HASHST R1,-8(R2)
+7c020de4|	plan9	HASHCHK -512(R2),R1
+7c020da4|	plan9	HASHST R1,-512(R2)
+7c020de5|	plan9	HASHCHK -256(R2),R1
+7c020da5|	plan9	HASHST R1,-256(R2)
+7fe115a5|	plan9	HASHST R2,-8(R1)
+7fe11525|	plan9	HASHSTP R2,-8(R1)
+7fe115e5|	plan9	HASHCHK -8(R1),R2
+7fe11565|	plan9	HASHCHKP -8(R1),R2
diff --git a/ppc64/ppc64asm/testdata/decode_generated.txt b/ppc64/ppc64asm/testdata/decode_generated.txt
index d8619d7f..13345e99 100644
--- a/ppc64/ppc64asm/testdata/decode_generated.txt
+++ b/ppc64/ppc64asm/testdata/decode_generated.txt
@@ -1,3 +1,7 @@
+7e0115e5|	gnu	hashchk r2,-128(r1)
+7e011565|	gnu	hashchkp r2,-128(r1)
+7e0115a5|	gnu	hashst r2,-128(r1)
+7e011525|	gnu	hashstp r2,-128(r1)
 7c610176|	gnu	brd r1,r3
 7c6101b6|	gnu	brh r1,r3
 7c610136|	gnu	brw r1,r3
@@ -23,9 +27,9 @@ f03f0ad1|	gnu	lxvkq vs33,1
 7c611138|	gnu	pdepd r1,r3,r2
 7c611178|	gnu	pextd r1,r3,r2
 0610001688800032|	gnu	plbz r4,1441842
+60000000|	gnu	nop
 04100016e4800032|	gnu	pld r4,1441842
 06100016c8600032|	gnu	plfd f3,1441842
-60000000|	gnu	nop
 06100016c0600032|	gnu	plfs f3,1441842
 06100016a8800032|	gnu	plha r4,1441842
 06100016a0800032|	gnu	plhz r4,1441842
@@ -240,7 +244,6 @@ f02d1769|	gnu	xxgenpcvwm vs33,v2,13
 7f810162|	gnu	xxmtacc a7
 0500000188232a4f|	gnu	xxpermx vs33,vs35,vs37,vs41,1
 7f830162|	gnu	xxsetaccz a7
-60000000|	gnu	nop
 0500012380234567|	gnu	xxsplti32dx vs33,1,19088743
 0500012380254567|	gnu	xxspltidp vs33,19088743
 0500012380274567|	gnu	xxspltiw vs33,19088743
@@ -1214,13 +1217,13 @@ fc60382d|	gnu	fsqrt. f3,f7
 7c611079|	gnu	andc. r1,r3,r2
 70610000|	gnu	andi. r1,r3,0
 74610000|	gnu	andis. r1,r3,0
-48000690|	gnu	b 0x1a90
+48000690|	gnu	b 0x1a9c
 48000692|	gnu	ba 0x690
-48000691|	gnu	bl 0x1a98
+48000691|	gnu	bl 0x1aa4
 48000693|	gnu	bla 0x690
-40860690|	gnu	bne cr1,0x1aa0
+40860690|	gnu	bne cr1,0x1aac
 40860692|	gnu	bnea cr1,0x690
-40860691|	gnu	bnel cr1,0x1aa8
+40860691|	gnu	bnel cr1,0x1ab4
 40860693|	gnu	bnela cr1,0x690
 4c860420|	gnu	bnectr cr1
 4c860421|	gnu	bnectrl cr1
diff --git a/ppc64/ppc64map/map.go b/ppc64/ppc64map/map.go
index 31d692d4..1e3b1b6e 100644
--- a/ppc64/ppc64map/map.go
+++ b/ppc64/ppc64map/map.go
@@ -60,6 +60,7 @@ const (
 	ISA_V30B
 	ISA_V30C
 	ISA_V31
+	ISA_V31B
 )
 
 var isaToISA = map[string]isaversion{
@@ -77,6 +78,7 @@ var isaToISA = map[string]isaversion{
 	"v3.0B": ISA_V30B,
 	"v3.0C": ISA_V30C,
 	"v3.1":  ISA_V31,
+	"v3.1B": ISA_V31B,
 }
 
 func usage() {
@@ -482,6 +484,18 @@ func add(p *Prog, text, mnemonics, encoding, isa string) {
 					opr = "BD"
 				}
 
+			case "offset":
+				switch inst.Op {
+				// These encode a 6 bit displacement in the format of an X-form opcode.
+				// Allowable displaments are -8 to -8*64 in 8B increments.
+				case "hashchk", "hashchkp", "hashst", "hashstp":
+					typ = asm.TypeNegOffset
+					opr = "DX"
+					opr2 = "D"
+					shift = 3
+
+				}
+
 			case "XMSK", "YMSK", "PMSK", "IX", "BHRBE":
 				typ = asm.TypeImmUnsigned
 
@@ -737,7 +751,12 @@ var isNotMemopMap = map[string]bool{
 }
 
 // Some ISA instructions are memops, but are not described like "Load ..." or "Store ..."
-var isMemopMap = map[string]bool{}
+var isMemopMap = map[string]bool{
+	"hashst":   true,
+	"hashstp":  true,
+	"hashchk":  true,
+	"hashchkp": true,
+}
 
 // Does this instruction contain a memory argument (e.g x-form load or d-form store)
 func hasMemoryArg(insn *Inst) bool {
@@ -767,7 +786,7 @@ func insnEncFuncStr(insn *Inst, firstName [2]string) string {
 
 	// Does this field require an obj.Addr.Offset?
 	isImmediate := func(t asm.ArgType) bool {
-		return t == asm.TypeImmUnsigned || t == asm.TypeSpReg || t == asm.TypeImmSigned || t == asm.TypeOffset
+		return t == asm.TypeImmUnsigned || t == asm.TypeSpReg || t == asm.TypeImmSigned || t == asm.TypeOffset || t == asm.TypeNegOffset
 	}
 
 	if insn.memOp {
@@ -827,13 +846,26 @@ func insnEncFuncStr(insn *Inst, firstName [2]string) string {
 
 		// Generate a check to verify shifted inputs satisfy their constraints.
 		// For historical reasons this is not needed for 16 bit values shifted by 16. (i.e SI/UI constants in addis/xoris)
-		if atype.Shift != 0 && atype.Shift != 16 && bits != 32 {
+		if atype.Type != asm.TypeNegOffset && atype.Shift != 0 && atype.Shift != 16 && bits != 32 {
 			arg := argOrder[j] + itype
 			mod := (1 << atype.Shift) - 1
 			errCheck += fmt.Sprintf("if %s & 0x%x != 0 {\n", arg, mod)
 			errCheck += fmt.Sprintf("c.ctxt.Diag(\"Constant 0x%%x (%%d) is not a multiple of %d\\n%%v\",%s,%s,p)\n", mod+1, arg, arg)
 			errCheck += fmt.Sprintf("}\n")
 		}
+		// NegOffset requires a stronger offset check
+		if atype.Type == asm.TypeNegOffset {
+			arg := argOrder[j] + itype
+			mask := -1 << (atype.BitFields.NumBits() + int(atype.Shift))
+			maskl := mask // Sign bits are implied in this type.
+			mask |= (1 << atype.Shift) - 1
+			min := maskl
+			max := maskl | (^mask)
+			step := 1 << atype.Shift
+			errCheck += fmt.Sprintf("if %s & 0x%x != 0x%x {\n", arg, uint32(mask), uint32(maskl))
+			errCheck += fmt.Sprintf("c.ctxt.Diag(\"Constant(%%d) must within the range of [%d,%d] in steps of %d\\n%%v\",%s,p)\n", min, max, step, arg)
+			errCheck += fmt.Sprintf("}\n")
+		}
 		j++
 	}
 	buf.WriteString(errCheck)
@@ -895,6 +927,8 @@ func insnTypeStr(insn *Inst, uniqueRegTypes bool) string {
 			if atype.Shift != 0 {
 				ret += fmt.Sprintf("%d", atype.Shift)
 			}
+		case asm.TypeNegOffset: // e.g offset in hashst rb, offset(ra)
+			ret += "N"
 		default:
 			log.Fatalf("Unhandled type in insnTypeStr: %v\n", atype)
 		}
@@ -953,6 +987,14 @@ func genOptabEntry(ta *AggInfo, typeMap map[string]*Inst) string {
 				shift = ""
 			}
 		}
+		if f.Type == asm.TypeNegOffset {
+			// This is a hack, but allows hashchk and like to correctly
+			// merge there argument into a C_SOREG memory location type
+			// argument a little later.
+			sign = "S"
+			bits = 16
+			shift = ""
+		}
 		return fmt.Sprintf("C_%s%d%sCON", sign, bits, shift)
 	}
 	insn := ta.Insns[0]
diff --git a/ppc64/ppc64util/hack.h b/ppc64/ppc64util/hack.h
index e7dada22..3fd9f314 100644
--- a/ppc64/ppc64util/hack.h
+++ b/ppc64/ppc64util/hack.h
@@ -129,6 +129,9 @@
 #define Rpfx 1
 #define SIpfx 0xFFFFFFFE00010007
 
+// A valid displacement value for the hash check and hash store instructions.
+#define offset -128
+
 // These decode as m.fpr* or m.vr*.  This is a matter of preference.  We
 // don't support these mnemonics, and I don't think they improve reading
 // disassembled code in most cases. so ignore.

From 40c19ba4a7c5ffc92baaef5977ffc2b82dfb0e47 Mon Sep 17 00:00:00 2001
From: cui fliter <imcusg@gmail.com>
Date: Tue, 7 Feb 2023 22:40:17 +0800
Subject: [PATCH 017/200] all: fix some comments

Change-Id: Ic196f7a97e423708d0a86ff8da99871e966dc2fc
Reviewed-on: https://go-review.googlesource.com/c/arch/+/466000
Run-TryBot: Ian Lance Taylor <iant@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
---
 ppc64/ppc64asm/field.go | 2 +-
 x86/x86asm/decode.go    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ppc64/ppc64asm/field.go b/ppc64/ppc64asm/field.go
index 37794460..b47f672d 100644
--- a/ppc64/ppc64asm/field.go
+++ b/ppc64/ppc64asm/field.go
@@ -80,7 +80,7 @@ func (bs BitFields) Parse(i [2]uint32) uint64 {
 	return u
 }
 
-// Parse extracts the bitfields from i, concatenate them and return the result
+// ParseSigned extracts the bitfields from i, concatenate them and return the result
 // as a signed integer. Parse will panic if any bitfield in b is invalid.
 func (bs BitFields) ParseSigned(i [2]uint32) int64 {
 	u, l := bs.parse(i)
diff --git a/x86/x86asm/decode.go b/x86/x86asm/decode.go
index 8c984970..059b73d3 100644
--- a/x86/x86asm/decode.go
+++ b/x86/x86asm/decode.go
@@ -1550,7 +1550,7 @@ var addr16 = [8]Mem{
 	{Base: BX},
 }
 
-// baseReg returns the base register for a given register size in bits.
+// baseRegForBits returns the base register for a given register size in bits.
 func baseRegForBits(bits int) Reg {
 	switch bits {
 	case 8:

From 060bf14d30f8a6b2e19c8aab764c104725b1682f Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Thu, 22 Jun 2023 15:18:25 -0700
Subject: [PATCH 018/200] x86asm: disassemble CMP instruction's arguments in
 the opposite order

That way it matches what the compiler's -S flag generates, and what we write
in assembly.

  CMP AX, $16
  JLE foo

should get to foo if AX <= 16. Without this CL, the disassembly looks like

  CMP $16, AX
  JLE foo

which reads like we should get to foo if 16 <= AX, which is not what these
two instructions actually do.

It was originally this way because the CMP instruction parallels the SUB
instruction, except it throws away the non-flags result. We write that
subtraction as

  SUB $16, AX  // AX <- AX-16

but we don't need to match the SUB's disassembly order, as CMP is not
writing to a register output.

Update golang/go#60920
(This fixes the underlying issue, but the actual "fixes" comment needs to go
on the CL that vendors x/arch containing this CL into the main branch.)

Change-Id: Ifa8d3878453d6e33ae144bfdb01b34171c2106a1
Reviewed-on: https://go-review.googlesource.com/c/arch/+/505375
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
---
 x86/x86asm/plan9x.go           |  6 ++++
 x86/x86asm/testdata/decode.txt | 66 +++++++++++++++++-----------------
 2 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/x86/x86asm/plan9x.go b/x86/x86asm/plan9x.go
index de417946..9e866d87 100644
--- a/x86/x86asm/plan9x.go
+++ b/x86/x86asm/plan9x.go
@@ -83,6 +83,12 @@ func GoSyntax(inst Inst, pc uint64, symname SymLookup) string {
 		}
 	}
 
+	if inst.Op == CMP {
+		// Use reads-left-to-right ordering for comparisons.
+		// See issue 60920.
+		args[0], args[1] = args[1], args[0]
+	}
+
 	if args != nil {
 		op += " " + strings.Join(args, ", ")
 	}
diff --git a/x86/x86asm/testdata/decode.txt b/x86/x86asm/testdata/decode.txt
index cbd536a8..af840c2f 100644
--- a/x86/x86asm/testdata/decode.txt
+++ b/x86/x86asm/testdata/decode.txt
@@ -2125,35 +2125,35 @@
 37|11223344556677885f5f5f5f5f5f5f	64	intel	error: unrecognized instruction
 37|11223344556677885f5f5f5f5f5f5f	64	plan9	error: unrecognized instruction
 3811|223344556677885f5f5f5f5f5f5f	32	intel	cmp byte ptr [ecx], dl
-3811|223344556677885f5f5f5f5f5f5f	32	plan9	CMPB DL, 0(CX)
+3811|223344556677885f5f5f5f5f5f5f	32	plan9	CMPB 0(CX), DL
 3811|223344556677885f5f5f5f5f5f5f	64	gnu	cmp %dl,(%rcx)
 3811|223344556677885f5f5f5f5f5f5f	64	intel	cmp byte ptr [rcx], dl
-3811|223344556677885f5f5f5f5f5f5f	64	plan9	CMPB DL, 0(CX)
+3811|223344556677885f5f5f5f5f5f5f	64	plan9	CMPB 0(CX), DL
 3911|223344556677885f5f5f5f5f5f5f	32	intel	cmp dword ptr [ecx], edx
-3911|223344556677885f5f5f5f5f5f5f	32	plan9	CMPL DX, 0(CX)
+3911|223344556677885f5f5f5f5f5f5f	32	plan9	CMPL 0(CX), DX
 3911|223344556677885f5f5f5f5f5f5f	64	gnu	cmp %edx,(%rcx)
 3911|223344556677885f5f5f5f5f5f5f	64	intel	cmp dword ptr [rcx], edx
-3911|223344556677885f5f5f5f5f5f5f	64	plan9	CMPL DX, 0(CX)
+3911|223344556677885f5f5f5f5f5f5f	64	plan9	CMPL 0(CX), DX
 3a11|223344556677885f5f5f5f5f5f5f	32	intel	cmp dl, byte ptr [ecx]
-3a11|223344556677885f5f5f5f5f5f5f	32	plan9	CMPB 0(CX), DL
+3a11|223344556677885f5f5f5f5f5f5f	32	plan9	CMPB DL, 0(CX)
 3a11|223344556677885f5f5f5f5f5f5f	64	gnu	cmp (%rcx),%dl
 3a11|223344556677885f5f5f5f5f5f5f	64	intel	cmp dl, byte ptr [rcx]
-3a11|223344556677885f5f5f5f5f5f5f	64	plan9	CMPB 0(CX), DL
+3a11|223344556677885f5f5f5f5f5f5f	64	plan9	CMPB DL, 0(CX)
 3b11|223344556677885f5f5f5f5f5f5f	32	intel	cmp edx, dword ptr [ecx]
-3b11|223344556677885f5f5f5f5f5f5f	32	plan9	CMPL 0(CX), DX
+3b11|223344556677885f5f5f5f5f5f5f	32	plan9	CMPL DX, 0(CX)
 3b11|223344556677885f5f5f5f5f5f5f	64	gnu	cmp (%rcx),%edx
 3b11|223344556677885f5f5f5f5f5f5f	64	intel	cmp edx, dword ptr [rcx]
-3b11|223344556677885f5f5f5f5f5f5f	64	plan9	CMPL 0(CX), DX
+3b11|223344556677885f5f5f5f5f5f5f	64	plan9	CMPL DX, 0(CX)
 3c11|223344556677885f5f5f5f5f5f5f	32	intel	cmp al, 0x11
-3c11|223344556677885f5f5f5f5f5f5f	32	plan9	CMPL $0x11, AL
+3c11|223344556677885f5f5f5f5f5f5f	32	plan9	CMPL AL, $0x11
 3c11|223344556677885f5f5f5f5f5f5f	64	gnu	cmp $0x11,%al
 3c11|223344556677885f5f5f5f5f5f5f	64	intel	cmp al, 0x11
-3c11|223344556677885f5f5f5f5f5f5f	64	plan9	CMPL $0x11, AL
+3c11|223344556677885f5f5f5f5f5f5f	64	plan9	CMPL AL, $0x11
 3d11223344|556677885f5f5f5f5f5f5f	32	intel	cmp eax, 0x44332211
-3d11223344|556677885f5f5f5f5f5f5f	32	plan9	CMPL $0x44332211, AX
+3d11223344|556677885f5f5f5f5f5f5f	32	plan9	CMPL AX, $0x44332211
 3d11223344|556677885f5f5f5f5f5f5f	64	gnu	cmp $0x44332211,%eax
 3d11223344|556677885f5f5f5f5f5f5f	64	intel	cmp eax, 0x44332211
-3d11223344|556677885f5f5f5f5f5f5f	64	plan9	CMPL $0x44332211, AX
+3d11223344|556677885f5f5f5f5f5f5f	64	plan9	CMPL AX, $0x44332211
 3e67e011|223344556677885f5f5f5f5f	32	intel	addr16 loopne .+0x11
 3e67e011|223344556677885f5f5f5f5f	32	plan9	LOOPNE .+17
 3e67e011|223344556677885f5f5f5f5f	64	gnu	loopne,pt .+0x11
@@ -2482,13 +2482,13 @@
 483511223344|556677885f5f5f5f5f5f	64	plan9	XORQ $0x44332211, AX
 483911|223344556677885f5f5f5f5f5f	64	gnu	cmp %rdx,(%rcx)
 483911|223344556677885f5f5f5f5f5f	64	intel	cmp qword ptr [rcx], rdx
-483911|223344556677885f5f5f5f5f5f	64	plan9	CMPQ DX, 0(CX)
+483911|223344556677885f5f5f5f5f5f	64	plan9	CMPQ 0(CX), DX
 483b11|223344556677885f5f5f5f5f5f	64	gnu	cmp (%rcx),%rdx
 483b11|223344556677885f5f5f5f5f5f	64	intel	cmp rdx, qword ptr [rcx]
-483b11|223344556677885f5f5f5f5f5f	64	plan9	CMPQ 0(CX), DX
+483b11|223344556677885f5f5f5f5f5f	64	plan9	CMPQ DX, 0(CX)
 483d11223344|556677885f5f5f5f5f5f	64	gnu	cmp $0x44332211,%rax
 483d11223344|556677885f5f5f5f5f5f	64	intel	cmp rax, 0x44332211
-483d11223344|556677885f5f5f5f5f5f	64	plan9	CMPQ $0x44332211, AX
+483d11223344|556677885f5f5f5f5f5f	64	plan9	CMPQ AX, $0x44332211
 4850|11223344556677885f5f5f5f5f5f	64	gnu	push %rax
 4850|11223344556677885f5f5f5f5f5f	64	intel	push rax
 4850|11223344556677885f5f5f5f5f5f	64	plan9	PUSHQ AX
@@ -2536,7 +2536,7 @@
 48813011223344|556677885f5f5f5f5f	64	plan9	XORQ $0x44332211, 0(AX)
 48813811223344|556677885f5f5f5f5f	64	gnu	cmpq $0x44332211,(%rax)
 48813811223344|556677885f5f5f5f5f	64	intel	cmp qword ptr [rax], 0x44332211
-48813811223344|556677885f5f5f5f5f	64	plan9	CMPQ $0x44332211, 0(AX)
+48813811223344|556677885f5f5f5f5f	64	plan9	CMPQ 0(AX), $0x44332211
 48830011|223344556677885f5f5f5f5f	64	gnu	addq $0x11,(%rax)
 48830011|223344556677885f5f5f5f5f	64	intel	add qword ptr [rax], 0x11
 48830011|223344556677885f5f5f5f5f	64	plan9	ADDQ $0x11, 0(AX)
@@ -2560,7 +2560,7 @@
 48833011|223344556677885f5f5f5f5f	64	plan9	XORQ $0x11, 0(AX)
 48833811|223344556677885f5f5f5f5f	64	gnu	cmpq $0x11,(%rax)
 48833811|223344556677885f5f5f5f5f	64	intel	cmp qword ptr [rax], 0x11
-48833811|223344556677885f5f5f5f5f	64	plan9	CMPQ $0x11, 0(AX)
+48833811|223344556677885f5f5f5f5f	64	plan9	CMPQ 0(AX), $0x11
 488511|223344556677885f5f5f5f5f5f	64	gnu	test %rdx,(%rcx)
 488511|223344556677885f5f5f5f5f5f	64	intel	test qword ptr [rcx], rdx
 488511|223344556677885f5f5f5f5f5f	64	plan9	TESTQ DX, 0(CX)
@@ -4233,20 +4233,20 @@
 66351122|3344556677885f5f5f5f5f5f	64	intel	xor ax, 0x2211
 66351122|3344556677885f5f5f5f5f5f	64	plan9	XORW $0x2211, AX
 663911|223344556677885f5f5f5f5f5f	32	intel	cmp word ptr [ecx], dx
-663911|223344556677885f5f5f5f5f5f	32	plan9	CMPW DX, 0(CX)
+663911|223344556677885f5f5f5f5f5f	32	plan9	CMPW 0(CX), DX
 663911|223344556677885f5f5f5f5f5f	64	gnu	cmp %dx,(%rcx)
 663911|223344556677885f5f5f5f5f5f	64	intel	cmp word ptr [rcx], dx
-663911|223344556677885f5f5f5f5f5f	64	plan9	CMPW DX, 0(CX)
+663911|223344556677885f5f5f5f5f5f	64	plan9	CMPW 0(CX), DX
 663b11|223344556677885f5f5f5f5f5f	32	intel	cmp dx, word ptr [ecx]
-663b11|223344556677885f5f5f5f5f5f	32	plan9	CMPW 0(CX), DX
+663b11|223344556677885f5f5f5f5f5f	32	plan9	CMPW DX, 0(CX)
 663b11|223344556677885f5f5f5f5f5f	64	gnu	cmp (%rcx),%dx
 663b11|223344556677885f5f5f5f5f5f	64	intel	cmp dx, word ptr [rcx]
-663b11|223344556677885f5f5f5f5f5f	64	plan9	CMPW 0(CX), DX
+663b11|223344556677885f5f5f5f5f5f	64	plan9	CMPW DX, 0(CX)
 663d1122|3344556677885f5f5f5f5f5f	32	intel	cmp ax, 0x2211
-663d1122|3344556677885f5f5f5f5f5f	32	plan9	CMPW $0x2211, AX
+663d1122|3344556677885f5f5f5f5f5f	32	plan9	CMPW AX, $0x2211
 663d1122|3344556677885f5f5f5f5f5f	64	gnu	cmp $0x2211,%ax
 663d1122|3344556677885f5f5f5f5f5f	64	intel	cmp ax, 0x2211
-663d1122|3344556677885f5f5f5f5f5f	64	plan9	CMPW $0x2211, AX
+663d1122|3344556677885f5f5f5f5f5f	64	plan9	CMPW AX, $0x2211
 6640|11223344556677885f5f5f5f5f5f	32	intel	inc ax
 6640|11223344556677885f5f5f5f5f5f	32	plan9	INCW AX
 66480f3a161122|3344556677885f5f5f	64	gnu	pextrq $0x22,%xmm2,(%rcx)
@@ -4343,10 +4343,10 @@
 6681301122|3344556677885f5f5f5f5f	64	intel	xor word ptr [rax], 0x2211
 6681301122|3344556677885f5f5f5f5f	64	plan9	XORW $0x2211, 0(AX)
 6681381122|3344556677885f5f5f5f5f	32	intel	cmp word ptr [eax], 0x2211
-6681381122|3344556677885f5f5f5f5f	32	plan9	CMPW $0x2211, 0(AX)
+6681381122|3344556677885f5f5f5f5f	32	plan9	CMPW 0(AX), $0x2211
 6681381122|3344556677885f5f5f5f5f	64	gnu	cmpw $0x2211,(%rax)
 6681381122|3344556677885f5f5f5f5f	64	intel	cmp word ptr [rax], 0x2211
-6681381122|3344556677885f5f5f5f5f	64	plan9	CMPW $0x2211, 0(AX)
+6681381122|3344556677885f5f5f5f5f	64	plan9	CMPW 0(AX), $0x2211
 66830011|223344556677885f5f5f5f5f	32	intel	add word ptr [eax], 0x11
 66830011|223344556677885f5f5f5f5f	32	plan9	ADDW $0x11, 0(AX)
 66830011|223344556677885f5f5f5f5f	64	gnu	addw $0x11,(%rax)
@@ -4383,10 +4383,10 @@
 66833011|223344556677885f5f5f5f5f	64	intel	xor word ptr [rax], 0x11
 66833011|223344556677885f5f5f5f5f	64	plan9	XORW $0x11, 0(AX)
 66833811|223344556677885f5f5f5f5f	32	intel	cmp word ptr [eax], 0x11
-66833811|223344556677885f5f5f5f5f	32	plan9	CMPW $0x11, 0(AX)
+66833811|223344556677885f5f5f5f5f	32	plan9	CMPW 0(AX), $0x11
 66833811|223344556677885f5f5f5f5f	64	gnu	cmpw $0x11,(%rax)
 66833811|223344556677885f5f5f5f5f	64	intel	cmp word ptr [rax], 0x11
-66833811|223344556677885f5f5f5f5f	64	plan9	CMPW $0x11, 0(AX)
+66833811|223344556677885f5f5f5f5f	64	plan9	CMPW 0(AX), $0x11
 668511|223344556677885f5f5f5f5f5f	32	intel	test word ptr [ecx], dx
 668511|223344556677885f5f5f5f5f5f	32	plan9	TESTW DX, 0(CX)
 668511|223344556677885f5f5f5f5f5f	64	gnu	test %dx,(%rcx)
@@ -4959,10 +4959,10 @@
 803011|223344556677885f5f5f5f5f5f	64	intel	xor byte ptr [rax], 0x11
 803011|223344556677885f5f5f5f5f5f	64	plan9	XORB $0x11, 0(AX)
 803811|223344556677885f5f5f5f5f5f	32	intel	cmp byte ptr [eax], 0x11
-803811|223344556677885f5f5f5f5f5f	32	plan9	CMPB $0x11, 0(AX)
+803811|223344556677885f5f5f5f5f5f	32	plan9	CMPB 0(AX), $0x11
 803811|223344556677885f5f5f5f5f5f	64	gnu	cmpb $0x11,(%rax)
 803811|223344556677885f5f5f5f5f5f	64	intel	cmp byte ptr [rax], 0x11
-803811|223344556677885f5f5f5f5f5f	64	plan9	CMPB $0x11, 0(AX)
+803811|223344556677885f5f5f5f5f5f	64	plan9	CMPB 0(AX), $0x11
 810011223344|556677885f5f5f5f5f5f	32	intel	add dword ptr [eax], 0x44332211
 810011223344|556677885f5f5f5f5f5f	32	plan9	ADDL $0x44332211, 0(AX)
 810011223344|556677885f5f5f5f5f5f	64	gnu	addl $0x44332211,(%rax)
@@ -4999,10 +4999,10 @@
 813011223344|556677885f5f5f5f5f5f	64	intel	xor dword ptr [rax], 0x44332211
 813011223344|556677885f5f5f5f5f5f	64	plan9	XORL $0x44332211, 0(AX)
 813811223344|556677885f5f5f5f5f5f	32	intel	cmp dword ptr [eax], 0x44332211
-813811223344|556677885f5f5f5f5f5f	32	plan9	CMPL $0x44332211, 0(AX)
+813811223344|556677885f5f5f5f5f5f	32	plan9	CMPL 0(AX), $0x44332211
 813811223344|556677885f5f5f5f5f5f	64	gnu	cmpl $0x44332211,(%rax)
 813811223344|556677885f5f5f5f5f5f	64	intel	cmp dword ptr [rax], 0x44332211
-813811223344|556677885f5f5f5f5f5f	64	plan9	CMPL $0x44332211, 0(AX)
+813811223344|556677885f5f5f5f5f5f	64	plan9	CMPL 0(AX), $0x44332211
 830011|223344556677885f5f5f5f5f5f	32	intel	add dword ptr [eax], 0x11
 830011|223344556677885f5f5f5f5f5f	32	plan9	ADDL $0x11, 0(AX)
 830011|223344556677885f5f5f5f5f5f	64	gnu	addl $0x11,(%rax)
@@ -5039,10 +5039,10 @@
 833011|223344556677885f5f5f5f5f5f	64	intel	xor dword ptr [rax], 0x11
 833011|223344556677885f5f5f5f5f5f	64	plan9	XORL $0x11, 0(AX)
 833811|223344556677885f5f5f5f5f5f	32	intel	cmp dword ptr [eax], 0x11
-833811|223344556677885f5f5f5f5f5f	32	plan9	CMPL $0x11, 0(AX)
+833811|223344556677885f5f5f5f5f5f	32	plan9	CMPL 0(AX), $0x11
 833811|223344556677885f5f5f5f5f5f	64	gnu	cmpl $0x11,(%rax)
 833811|223344556677885f5f5f5f5f5f	64	intel	cmp dword ptr [rax], 0x11
-833811|223344556677885f5f5f5f5f5f	64	plan9	CMPL $0x11, 0(AX)
+833811|223344556677885f5f5f5f5f5f	64	plan9	CMPL 0(AX), $0x11
 8411|223344556677885f5f5f5f5f5f5f	32	intel	test byte ptr [ecx], dl
 8411|223344556677885f5f5f5f5f5f5f	32	plan9	TESTB DL, 0(CX)
 8411|223344556677885f5f5f5f5f5f5f	64	gnu	test %dl,(%rcx)

From b6e875325b9240a588005b57569516391c352e78 Mon Sep 17 00:00:00 2001
From: cui fliter <imcusg@gmail.com>
Date: Fri, 18 Aug 2023 10:29:34 +0800
Subject: [PATCH 019/200] all: gofmt format

Change-Id: Iaea9ce0d3b237123cdb4315790960aeee1b13a80
Reviewed-on: https://go-review.googlesource.com/c/arch/+/520577
Run-TryBot: shuang cui <imcusg@gmail.com>
Auto-Submit: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
---
 arm/armasm/plan9x.go      |  9 ++++++---
 arm/armmap/map.go         |  5 +++--
 arm/armspec/spec.go       | 13 ++++++-------
 arm64/arm64asm/inst.go    |  6 ++++--
 ppc64/ppc64asm/gnu.go     |  2 +-
 ppc64/ppc64spec/spec.go   | 12 ++++++------
 x86/x86asm/gnu.go         |  2 +-
 x86/x86asm/inst.go        |  2 +-
 x86/x86avxgen/generate.go |  4 ++--
 x86/x86avxgen/main.go     | 20 ++++++++++----------
 x86/x86csv/x86csv.go      |  2 +-
 x86/x86map/map.go         |  7 ++++---
 x86/x86spec/spec.go       | 19 +++++++++----------
 x86/xeddata/database.go   |  2 ++
 x86/xeddata/doc.go        | 23 ++++++++++++-----------
 x86/xeddata/reader.go     |  1 +
 16 files changed, 69 insertions(+), 60 deletions(-)

diff --git a/arm/armasm/plan9x.go b/arm/armasm/plan9x.go
index a143d2ef..842ab980 100644
--- a/arm/armasm/plan9x.go
+++ b/arm/armasm/plan9x.go
@@ -253,10 +253,13 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg
 // [r2, r0, ror #1] -> (R2)(R0@>1)
 // inst [r2, -r0, ror #1] -> INST.U (R2)(R0@>1)
 // input:
-//   a memory operand
+//
+//	a memory operand
+//
 // return values:
-//   corresponding memory operand in Plan 9 syntax
-//   .W/.P/.U suffix
+//
+//	corresponding memory operand in Plan 9 syntax
+//	.W/.P/.U suffix
 func memOpTrans(mem Mem) (string, string) {
 	suffix := ""
 	switch mem.Mode {
diff --git a/arm/armmap/map.go b/arm/armmap/map.go
index f5053052..3e8f3072 100644
--- a/arm/armmap/map.go
+++ b/arm/armmap/map.go
@@ -5,12 +5,13 @@
 // Armmap constructs the ARM opcode map from the instruction set CSV file.
 //
 // Usage:
+//
 //	armmap [-fmt=format] arm.csv
 //
 // The known output formats are:
 //
-//  text (default) - print decoding tree in text form
-//  decoder - print decoding tables for the armasm package
+//	text (default) - print decoding tree in text form
+//	decoder - print decoding tables for the armasm package
 package main
 
 import (
diff --git a/arm/armspec/spec.go b/arm/armspec/spec.go
index f5bad801..108f7ed3 100644
--- a/arm/armspec/spec.go
+++ b/arm/armspec/spec.go
@@ -8,15 +8,14 @@
 
 // ... see golang.org/issue/12840
 
-// Armspec reads the ``ARM Architecture Reference Manual''
+// Armspec reads the “ARM Architecture Reference Manual”
 // to collect instruction encoding details and writes those details to standard output
 // in JSON format.
 //
-// Warning Warning Warning
+// # Warning Warning Warning
 //
 // This program is unfinished. It is being published in this incomplete form
 // for interested readers, but do not expect it to be runnable or useful.
-//
 package main
 
 import (
@@ -606,10 +605,10 @@ func sameFont(f1, f2 string) bool {
 }
 
 var jsFix = strings.NewReplacer(
-//	`\u003c`, `<`,
-//	`\u003e`, `>`,
-//	`\u0026`, `&`,
-//	`\u0009`, `\t`,
+// `\u003c`, `<`,
+// `\u003e`, `>`,
+// `\u0026`, `&`,
+// `\u0009`, `\t`,
 )
 
 func printTable(name string, table []Inst) {
diff --git a/arm64/arm64asm/inst.go b/arm64/arm64asm/inst.go
index 8c633fef..866e399c 100644
--- a/arm64/arm64asm/inst.go
+++ b/arm64/arm64asm/inst.go
@@ -934,8 +934,10 @@ func (r RegisterWithArrangement) String() string {
 	return result
 }
 
-// Register with arrangement and index: <Vm>.<Ts>[<index>],
-//   { <Vt>.B, <Vt2>.B }[<index>].
+// Register with arrangement and index:
+//
+//	<Vm>.<Ts>[<index>],
+//	{ <Vt>.B, <Vt2>.B }[<index>].
 type RegisterWithArrangementAndIndex struct {
 	r     Reg
 	a     Arrangement
diff --git a/ppc64/ppc64asm/gnu.go b/ppc64/ppc64asm/gnu.go
index b4c9bf8d..367acdd4 100644
--- a/ppc64/ppc64asm/gnu.go
+++ b/ppc64/ppc64asm/gnu.go
@@ -359,7 +359,7 @@ func GNUSyntax(inst Inst, pc uint64) string {
 
 // gnuArg formats arg (which is the argIndex's arg in inst) according to GNU rules.
 // NOTE: because GNUSyntax is the only caller of this func, and it receives a copy
-//       of inst, it's ok to modify inst.Args here.
+// of inst, it's ok to modify inst.Args here.
 func gnuArg(inst *Inst, argIndex int, arg Arg, pc uint64) string {
 	// special cases for load/store instructions
 	if _, ok := arg.(Offset); ok {
diff --git a/ppc64/ppc64spec/spec.go b/ppc64/ppc64spec/spec.go
index 54e05353..55cb2756 100644
--- a/ppc64/ppc64spec/spec.go
+++ b/ppc64/ppc64spec/spec.go
@@ -5,11 +5,12 @@
 //go:build (go1.6 && amd64) || go1.8
 // +build go1.6,amd64 go1.8
 
-// Power64spec reads the ``Power ISA V2.07'' Manual
+// Power64spec reads the “Power ISA V2.07” Manual
 // to collect instruction encoding details and writes those details to standard output
 // in CSV format.
 //
 // Usage:
+//
 //	ppc64spec PowerISA_V2.07_PUBLIC.pdf >ppc64.csv
 //
 // Each CSV line contains four fields:
@@ -24,7 +25,6 @@
 //		For now, empty.
 //
 // For more on the exact meaning of these fields, see the Power manual.
-//
 package main
 
 import (
@@ -495,10 +495,10 @@ func sameFont(f1, f2 string) bool {
 }
 
 var jsFix = strings.NewReplacer(
-//	`\u003c`, `<`,
-//	`\u003e`, `>`,
-//	`\u0026`, `&`,
-//	`\u0009`, `\t`,
+// `\u003c`, `<`,
+// `\u003e`, `>`,
+// `\u0026`, `&`,
+// `\u0009`, `\t`,
 )
 
 func printTable(name string, table []Inst) {
diff --git a/x86/x86asm/gnu.go b/x86/x86asm/gnu.go
index 75cff72b..8eba1fd0 100644
--- a/x86/x86asm/gnu.go
+++ b/x86/x86asm/gnu.go
@@ -10,7 +10,7 @@ import (
 )
 
 // GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils.
-// This general form is often called ``AT&T syntax'' as a reference to AT&T System V Unix.
+// This general form is often called “AT&T syntax” as a reference to AT&T System V Unix.
 func GNUSyntax(inst Inst, pc uint64, symname SymLookup) string {
 	// Rewrite instruction to mimic GNU peculiarities.
 	// Note that inst has been passed by value and contains
diff --git a/x86/x86asm/inst.go b/x86/x86asm/inst.go
index 4632b506..e98f1a84 100644
--- a/x86/x86asm/inst.go
+++ b/x86/x86asm/inst.go
@@ -144,7 +144,7 @@ type Arg interface {
 // the interface value instead of requiring an allocation.
 
 // A Reg is a single register.
-// The zero Reg value has no name but indicates ``no register.''
+// The zero Reg value has no name but indicates “no register.”
 type Reg uint8
 
 const (
diff --git a/x86/x86avxgen/generate.go b/x86/x86avxgen/generate.go
index 14985cb3..da7fd318 100644
--- a/x86/x86avxgen/generate.go
+++ b/x86/x86avxgen/generate.go
@@ -230,8 +230,8 @@ func (gen *generator) makeYtab(zoffset int, zform string, args []*argument) ytab
 //
 // This is required due to how masking is implemented in asm6.
 // Single MASK1() instruction produces 2 ytabs, for example:
-//	1. OP xmm, mem     | Yxr, Yxm         | Does not permit K arguments (K0 implied)
-//	2. OP xmm, K2, mem | Yxr, Yknot0, Yxm | Does not permit K0 argument
+//  1. OP xmm, mem     | Yxr, Yxm         | Does not permit K arguments (K0 implied)
+//  2. OP xmm, K2, mem | Yxr, Yknot0, Yxm | Does not permit K0 argument
 //
 // This function also exploits that both ytab entries have same opbytes,
 // hence it is efficient to emit only one opbytes line and 0 Z-offset
diff --git a/x86/x86avxgen/main.go b/x86/x86avxgen/main.go
index 9fdf262e..b759c505 100644
--- a/x86/x86avxgen/main.go
+++ b/x86/x86avxgen/main.go
@@ -224,16 +224,16 @@ func assignZforms(ctx *context) {
 // elements order inside ytabList.
 //
 // We want these rules to be satisfied:
-//	- EVEX-encoded entries go after VEX-encoded entries.
-//	  This way, VEX forms are selected over EVEX variants.
-//	- EVEX forms with SAE/RC must go before forms without them.
-//	  This helps to avoid problems with reg-reg instructions
-//	  that encode either of them in ModRM.R/M which causes
-//	  ambiguity in ytabList (more than 1 ytab can match args).
-//	  If first matching ytab has SAE/RC, problem will not occur.
-//	- Memory argument position affects order.
-//	  Required to be in sync with XED encoder when there
-//	  are multiple choices of how to encode instruction.
+//   - EVEX-encoded entries go after VEX-encoded entries.
+//     This way, VEX forms are selected over EVEX variants.
+//   - EVEX forms with SAE/RC must go before forms without them.
+//     This helps to avoid problems with reg-reg instructions
+//     that encode either of them in ModRM.R/M which causes
+//     ambiguity in ytabList (more than 1 ytab can match args).
+//     If first matching ytab has SAE/RC, problem will not occur.
+//   - Memory argument position affects order.
+//     Required to be in sync with XED encoder when there
+//     are multiple choices of how to encode instruction.
 func sortGroups(ctx *context) {
 	sort.SliceStable(ctx.groups, func(i, j int) bool {
 		return ctx.groups[i].opcode < ctx.groups[j].opcode
diff --git a/x86/x86csv/x86csv.go b/x86/x86csv/x86csv.go
index e205c1b4..6f6b68c7 100644
--- a/x86/x86csv/x86csv.go
+++ b/x86/x86csv/x86csv.go
@@ -6,7 +6,7 @@
 // Only latest version of "x86.csv" format is supported.
 //
 // Terminology:
-//   given "OPCODE [ARGS...]" line;
+// given "OPCODE [ARGS...]" line;
 // Opcode - instruction name/mnemonic/class.
 // Args   - instruction operands.
 // Syntax - Opcode with Args.
diff --git a/x86/x86map/map.go b/x86/x86map/map.go
index df8c68e5..9d45a704 100644
--- a/x86/x86map/map.go
+++ b/x86/x86map/map.go
@@ -5,13 +5,14 @@
 // X86map constructs the x86 opcode map from the instruction set CSV file.
 //
 // Usage:
+//
 //	x86map [-fmt=format] x86.csv
 //
 // The known output formats are:
 //
-//  text (default) - print decoding tree in text form
-//  decoder - print decoding tables for the x86asm package
-//  scanner - print scanning tables for x86scan package
+//	text (default) - print decoding tree in text form
+//	decoder - print decoding tables for the x86asm package
+//	scanner - print scanning tables for x86scan package
 package main
 
 import (
diff --git a/x86/x86spec/spec.go b/x86/x86spec/spec.go
index 25267941..57f3276c 100644
--- a/x86/x86spec/spec.go
+++ b/x86/x86spec/spec.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// X86spec reads the ``Intel® 64 and IA-32 Architectures Software Developer's Manual''
+// X86spec reads the “Intel® 64 and IA-32 Architectures Software Developer's Manual”
 // to collect instruction encoding details and writes those details to standard output
 // in CSV format.
 //
@@ -20,7 +20,7 @@
 //
 // There are additional debugging flags, not shown. Run x86spec -help for the list.
 //
-// File Format
+// # File Format
 //
 // TODO: Mention comments at top of file.
 // TODO: Mention that this is version 0.2 of the file.
@@ -59,7 +59,7 @@
 //
 //	"SHR r/m32, imm8","SHRL imm8, r/m32","shrl imm8, r/m32","C1 /5 ib","V","V","","operand32","rw,r","Y","32"
 //
-// Mnemonics
+// # Mnemonics
 //
 // The instruction mnemonics are as used in the Intel manual, with a few exceptions.
 //
@@ -109,7 +109,7 @@
 // moffs8, moffs16, moffs32, moffs64, vm32x, vm32y, vm64x, and vm64y
 // are all as in the Intel manual.
 //
-// Encodings
+// # Encodings
 //
 // The encodings are also as used in the Intel manual, with automated corrections.
 // For example, the Intel manual sometimes omits the modrm /r indicator or other trailing bytes,
@@ -118,7 +118,7 @@
 // tools for processing x86 machine code.
 // See https://golang.org/x/arch/x86/x86map for one such generator.
 //
-// Valid32 and Valid64
+// # Valid32 and Valid64
 //
 // These columns hold validity abbreviations as defined in the Intel manual:
 // V, I, N.E., N.P., N.S., or N.I.
@@ -128,7 +128,7 @@
 // For example, the manual lists many instruction forms using REX bytes
 // with an incorrect "V" in the Valid32 column.
 //
-// CPUID Feature Flags
+// # CPUID Feature Flags
 //
 // This column specifies CPUID feature flags that must be present in order
 // to use the instruction. If multiple flags are required,
@@ -136,7 +136,7 @@
 // The column can also list one of the values 486, Pentium, PentiumII, and P6,
 // indicating that the instruction was introduced on that architecture version.
 //
-// Tags
+// # Tags
 //
 // The tag column does not correspond to a traditional column in the Intel manual tables.
 // Instead, it is itself a comma-separated list of tags or hints derived by analysis
@@ -169,7 +169,7 @@
 // Since most decoders will handle the REX byte separately, the form with the
 // unnecessary REX is tagged pseudo64.
 //
-// Corrections and Additions
+// # Corrections and Additions
 //
 // The x86spec program makes various corrections to the Intel manual data
 // as part of extracting the information. Those corrections are described above.
@@ -177,7 +177,7 @@
 // The x86spec program also adds a few well-known undocumented instructions,
 // such as UD1 and FFREEP.
 //
-// Examples
+// # Examples
 //
 // The latest version of the CSV file is available in this Git repository and also
 // online at https://golang.org/s/x86.csv. It is meant to be human-readable for
@@ -193,7 +193,6 @@
 // reads the CSV file and generates an x86 instruction decoder in the form
 // of a simple byte-code program. This decoder is the core of the disassembler
 // in the x86asm package (https://golang.org/x/arch/x86/x86asm).
-//
 package main
 
 import (
diff --git a/x86/xeddata/database.go b/x86/xeddata/database.go
index 35d86d98..94d21de0 100644
--- a/x86/xeddata/database.go
+++ b/x86/xeddata/database.go
@@ -104,9 +104,11 @@ type xtype struct {
 // parsing of found file is.
 //
 // Lookup:
+//
 //	"$xedPath/all-state.txt" => db.LoadStates()
 //	"$xedPath/all-widths.txt" => db.LoadWidths()
 //	"$xedPath/all-element-types.txt" => db.LoadXtypes()
+//
 // $xedPath is the interpolated value of function argument.
 //
 // The call NewDatabase("") is valid and returns empty database.
diff --git a/x86/xeddata/doc.go b/x86/xeddata/doc.go
index bb1a96af..23d51dc5 100644
--- a/x86/xeddata/doc.go
+++ b/x86/xeddata/doc.go
@@ -5,17 +5,18 @@
 // Package xeddata provides utilities to work with XED datafiles.
 //
 // Main features:
-//	* Fundamental XED enumerations (CPU modes, operand sizes, ...)
-//	* XED objects and their components
-//	* XED datafiles reader (see below)
-//	* Utility functions like ExpandStates
+//   - Fundamental XED enumerations (CPU modes, operand sizes, ...)
+//   - XED objects and their components
+//   - XED datafiles reader (see below)
+//   - Utility functions like ExpandStates
 //
 // The amount of file formats that is understood is a minimal
 // set required to generate x86.csv from XED tables:
-//	* states - simple macro substitutions used in patterns
-//	* widths - mappings from width names to their size
-//	* element-types - XED xtype information
-//	* objects - XED objects that constitute "the tables"
+//   - states - simple macro substitutions used in patterns
+//   - widths - mappings from width names to their size
+//   - element-types - XED xtype information
+//   - objects - XED objects that constitute "the tables"
+//
 // Collectively, those files are called "datafiles".
 //
 // Terminology is borrowed from XED itself,
@@ -26,9 +27,9 @@
 // file under local XED source repository folder.
 //
 // The default usage scheme:
-//	1. Open "XED database" to load required metadata.
-//	2. Read XED file with objects definitions.
-//	3. Operate on XED objects.
+//  1. Open "XED database" to load required metadata.
+//  2. Read XED file with objects definitions.
+//  3. Operate on XED objects.
 //
 // See example_test.go for complete examples.
 //
diff --git a/x86/xeddata/reader.go b/x86/xeddata/reader.go
index 4176b66e..fc8aa94b 100644
--- a/x86/xeddata/reader.go
+++ b/x86/xeddata/reader.go
@@ -92,6 +92,7 @@ func (r *Reader) ReadAll() ([]*Object, error) {
 // It expects lines that are joined by '\' to be concatenated.
 //
 // The format can be described as:
+//
 //	unquoted field name "[A-Z_]+" (captured)
 //	field value delimiter ":"
 //	field value string (captured)

From 05c9512268b810910595e592c68436f27594f3c1 Mon Sep 17 00:00:00 2001
From: Dmitri Shuralyov <dmitshur@golang.org>
Date: Tue, 10 Oct 2023 18:35:22 -0400
Subject: [PATCH 020/200] all: update go directive to 1.18

Done with:

go get go@1.18
go mod tidy
go fix ./...

Using go1.21.3.

Also delete the build constraints that are always satisfied
when using supported Go versions.

For golang/go#60268.

Change-Id: Iab4a7237a368b1ac05bb72a646501defb51503f1
Reviewed-on: https://go-review.googlesource.com/c/arch/+/534197
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org>
---
 arm/armspec/spec.go     | 4 ----
 arm/armspec/specmap.go  | 1 -
 arm64/arm64spec/spec.go | 3 ---
 go.mod                  | 2 +-
 ppc64/ppc64spec/spec.go | 3 ---
 ppc64/ppc64util/util.go | 1 -
 6 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/arm/armspec/spec.go b/arm/armspec/spec.go
index 108f7ed3..f755579b 100644
--- a/arm/armspec/spec.go
+++ b/arm/armspec/spec.go
@@ -2,10 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build go1.6 && (!386 || go1.8)
-// +build go1.6
-// +build !386 go1.8
-
 // ... see golang.org/issue/12840
 
 // Armspec reads the “ARM Architecture Reference Manual”
diff --git a/arm/armspec/specmap.go b/arm/armspec/specmap.go
index b881082a..973030f9 100644
--- a/arm/armspec/specmap.go
+++ b/arm/armspec/specmap.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build ignore
-// +build ignore
 
 package main
 
diff --git a/arm64/arm64spec/spec.go b/arm64/arm64spec/spec.go
index 08b487cd..ee784e55 100644
--- a/arm64/arm64spec/spec.go
+++ b/arm64/arm64spec/spec.go
@@ -2,9 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build (go1.6 && amd64) || go1.8
-// +build go1.6,amd64 go1.8
-
 // arm64spec reads the ``ARMv8-A Reference Manual''
 // to collect instruction encoding details and writes those
 // details to standard output in JSON format.
diff --git a/go.mod b/go.mod
index d29c9298..355098da 100644
--- a/go.mod
+++ b/go.mod
@@ -1,5 +1,5 @@
 module golang.org/x/arch
 
-go 1.17
+go 1.18
 
 require rsc.io/pdf v0.1.1
diff --git a/ppc64/ppc64spec/spec.go b/ppc64/ppc64spec/spec.go
index 55cb2756..4167d6dc 100644
--- a/ppc64/ppc64spec/spec.go
+++ b/ppc64/ppc64spec/spec.go
@@ -2,9 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build (go1.6 && amd64) || go1.8
-// +build go1.6,amd64 go1.8
-
 // Power64spec reads the “Power ISA V2.07” Manual
 // to collect instruction encoding details and writes those details to standard output
 // in CSV format.
diff --git a/ppc64/ppc64util/util.go b/ppc64/ppc64util/util.go
index b2f19103..dcb8e428 100644
--- a/ppc64/ppc64util/util.go
+++ b/ppc64/ppc64util/util.go
@@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 
 //go:build ignore
-// +build ignore
 
 // Generate interesting test cases from ppc64 objdump via
 // go run util.go

From a6bdeed4930798f0aa566beb7883ab0d88dc9646 Mon Sep 17 00:00:00 2001
From: Dmitri Shuralyov <dmitshur@golang.org>
Date: Wed, 11 Oct 2023 00:55:13 +0000
Subject: [PATCH 021/200] arm/armspec: remove obsolete comment

I didn't realize it at the time, but this comment was referring
to the build constraint that was removed in CL 534197.

Updates golang/go#12840.

Change-Id: I1e6694a6c05f4b2e6dbffe6488c69f9d530bdaf4
Reviewed-on: https://go-review.googlesource.com/c/arch/+/534221
Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 arm/armspec/spec.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arm/armspec/spec.go b/arm/armspec/spec.go
index f755579b..60579a05 100644
--- a/arm/armspec/spec.go
+++ b/arm/armspec/spec.go
@@ -2,8 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// ... see golang.org/issue/12840
-
 // Armspec reads the “ARM Architecture Reference Manual”
 // to collect instruction encoding details and writes those details to standard output
 // in JSON format.

From a85057043824df19248fb9070bc44f3403f8876e Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@golang.org>
Date: Tue, 5 Dec 2023 18:00:47 -0500
Subject: [PATCH 022/200] x86avxgen/testdata/xedpath: replace "INTEL
 CONFIDENTIAL" files

These files are not really confidential - they were released in
github.com/intelxed/xed with incorrect copyright notices.
The copyright notices were updated in
https://github.com/intelxed/xed/commit/5c538047876feecf080d9441110f81d0e67b5de8
but the files had also changed a bit by then.

Replace the two mislabeled files with the latest versions,
bringing in the updated Apache license as well as assorted other changes.
The tests still pass, so these changes must not matter too much.

Fixes golang/go#64315.

[git-generate]

cd x86/x86avxgen/testdata/xedpath

rm -rf _xed
git clone https://github.com/intelxed/xed _xed
cd _xed
git checkout d41e876  # "2019 copyright"
cd ..

echo '
e all-dec-instructions.txt
/^###FILE:.*avx512-foundation-isa.xed.txt/+1;/^###FILE/-3 d
/^###FILE:.*avx512-foundation-isa.xed.txt/+1r _xed/datafiles/avx512f/avx512-foundation-isa.xed.txt
/^###FILE:.*skx-isa.xed.txt/+1;/^###FILE/-3 d
/^###FILE:.*skx-isa.xed.txt/+1r _xed/datafiles/avx512-skx/skx-isa.xed.txt
,s/ +$//g
w
q
' | sam -d

Change-Id: I60fb4b9a420b8962fbbdd026cb6229d55144908d
Reviewed-on: https://go-review.googlesource.com/c/arch/+/547775
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 .../testdata/xedpath/all-dec-instructions.txt | 634 +++++++++++-------
 1 file changed, 385 insertions(+), 249 deletions(-)

diff --git a/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt b/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt
index 07cbc41b..aad0b816 100644
--- a/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt
+++ b/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt
@@ -11873,29 +11873,22 @@ IFORM:       VPOPCNTQ_ZMMu64_MASKmskw_MEMu64_AVX512
 
 
 ###FILE: ./datafiles/avx512f/avx512-foundation-isa.xed.txt
-
 #BEGIN_LEGAL
-#INTEL CONFIDENTIAL
-#
-#Copyright (c) 2017, Intel Corporation. All rights reserved.
-#
-#The source code contained or described herein and all documents
-#related to the source code ("Material") are owned by Intel Corporation
-#or its suppliers or licensors. Title to the Material remains with
-#Intel Corporation or its suppliers and licensors. The Material
-#contains trade secrets and proprietary and confidential information of
-#Intel or its suppliers and licensors. The Material is protected by
-#worldwide copyright and trade secret laws and treaty provisions. No
-#part of the Material may be used, copied, reproduced, modified,
-#published, uploaded, posted, transmitted, distributed, or disclosed in
-#any way without Intel's prior express written permission.
-#
-#No license under any patent, copyright, trade secret or other
-#intellectual property right is granted to or conferred upon you by
-#disclosure or delivery of the Materials, either expressly, by
-#implication, inducement, estoppel or otherwise. Any license under such
-#intellectual property rights must be express and approved by Intel in
-#writing.
+#
+#Copyright (c) 2019 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
 #END_LEGAL
 #
 #
@@ -13103,7 +13096,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f64
+IFORM:       VCVTSD2SI_GPR32i32_XMMf64_AVX512
+PATTERN:    EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64 W0  NOEVSR  ZEROING=0 MASK=0 EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTSD2SI_GPR32i32_XMMf64_AVX512
 }
@@ -13117,7 +13113,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:i32:TXT=ROUNDC REG1=XMM_B3():r:dq:f64
+IFORM:       VCVTSD2SI_GPR32i32_XMMf64_AVX512
+PATTERN:    EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64 W0  NOEVSR  ZEROING=0 MASK=0 EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32:TXT=ROUNDC REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTSD2SI_GPR32i32_XMMf64_AVX512
 }
@@ -13131,7 +13130,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q
-PATTERN:    EVV 0x2D VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+PATTERN:    EVV 0x2D VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  not64  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+OPERANDS:    REG0=GPR32_R():w:d:i32 MEM0:r:q:f64
+IFORM:       VCVTSD2SI_GPR32i32_MEMf64_AVX512
+PATTERN:    EVV 0x2D VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32 MEM0:r:q:f64
 IFORM:       VCVTSD2SI_GPR32i32_MEMf64_AVX512
 }
@@ -13147,7 +13149,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64 REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTSD2SI_GPR64i64_XMMf64_AVX512
 }
@@ -13161,7 +13163,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2D VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64:TXT=ROUNDC REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTSD2SI_GPR64i64_XMMf64_AVX512
 }
@@ -13175,7 +13177,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q
-PATTERN:    EVV 0x2D VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+PATTERN:    EVV 0x2D VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64 MEM0:r:q:f64
 IFORM:       VCVTSD2SI_GPR64i64_MEMf64_AVX512
 }
@@ -13235,7 +13237,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f64
+IFORM:       VCVTSD2USI_GPR32u32_XMMf64_AVX512
+PATTERN:    EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64 W0  NOEVSR  ZEROING=0 MASK=0 EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTSD2USI_GPR32u32_XMMf64_AVX512
 }
@@ -13249,7 +13254,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:u32:TXT=ROUNDC REG1=XMM_B3():r:dq:f64
+IFORM:       VCVTSD2USI_GPR32u32_XMMf64_AVX512
+PATTERN:    EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64 W0  NOEVSR  ZEROING=0 MASK=0 EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32:TXT=ROUNDC REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTSD2USI_GPR32u32_XMMf64_AVX512
 }
@@ -13263,7 +13271,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q
-PATTERN:    EVV 0x79 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+PATTERN:    EVV 0x79 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  not64  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+OPERANDS:    REG0=GPR32_R():w:d:u32 MEM0:r:q:f64
+IFORM:       VCVTSD2USI_GPR32u32_MEMf64_AVX512
+PATTERN:    EVV 0x79 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32 MEM0:r:q:f64
 IFORM:       VCVTSD2USI_GPR32u32_MEMf64_AVX512
 }
@@ -13279,7 +13290,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64 REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTSD2USI_GPR64u64_XMMf64_AVX512
 }
@@ -13293,7 +13304,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x79 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64:TXT=ROUNDC REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTSD2USI_GPR64u64_XMMf64_AVX512
 }
@@ -13307,7 +13318,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q
-PATTERN:    EVV 0x79 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+PATTERN:    EVV 0x79 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64 MEM0:r:q:f64
 IFORM:       VCVTSD2USI_GPR64u64_MEMf64_AVX512
 }
@@ -13323,7 +13334,11 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E10NF
 REAL_OPCODE: Y
 ATTRIBUTES:  SIMD_SCALAR
-PATTERN:    EVV 0x2A VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0    ZEROING=0 MASK=0
+COMMENT: Ignores rounding controls: 32b-INT-to-FP64 does not need rounding
+PATTERN:    EVV 0x2A VF2 V0F MOD[0b11] MOD=3  REG[rrr] RM[nnn]  not64    ZEROING=0 MASK=0
+OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR32_B():r:d:i32
+IFORM:       VCVTSI2SD_XMMf64_XMMf64_GPR32i32_AVX512
+PATTERN:    EVV 0x2A VF2 V0F MOD[0b11] MOD=3  REG[rrr] RM[nnn]  mode64 W0    ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR32_B():r:d:i32
 IFORM:       VCVTSI2SD_XMMf64_XMMf64_GPR32i32_AVX512
 }
@@ -13337,7 +13352,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E10NF
 REAL_OPCODE: Y
 ATTRIBUTES:  SIMD_SCALAR DISP8_GPR_READER
-PATTERN:    EVV 0x2A VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0    ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
+PATTERN:    EVV 0x2A VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  not64 ZEROING=0 MASK=0 BCRC=0  ESIZE_32_BITS() NELEM_GPR_READER()
+OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:d:i32
+IFORM:       VCVTSI2SD_XMMf64_XMMf64_MEMi32_AVX512
+PATTERN:    EVV 0x2A VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  mode64 W0 ZEROING=0 MASK=0 BCRC=0  ESIZE_32_BITS() NELEM_GPR_READER()
 OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:d:i32
 IFORM:       VCVTSI2SD_XMMf64_XMMf64_MEMi32_AVX512
 }
@@ -13353,7 +13371,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2A VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64    ZEROING=0 MASK=0
+PATTERN:    EVV 0x2A VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] mode64 W1 ZEROING=0 MASK=0 BCRC=0
 OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR64_B():r:q:i64
 IFORM:       VCVTSI2SD_XMMf64_XMMf64_GPR64i64_AVX512
 }
@@ -13367,7 +13385,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2A VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W1  mode64    ZEROING=0 MASK=0
+PATTERN:    EVV 0x2A VF2 V0F MOD[0b11] MOD=3  REG[rrr] RM[nnn] mode64 W1 ZEROING=0 MASK=0 BCRC=1 FIX_ROUND_LEN128() AVX512_ROUND()
 OPERANDS:    REG0=XMM_R3():w:dq:f64:TXT=ROUNDC REG1=XMM_N3():r:dq:f64 REG2=GPR64_B():r:q:i64
 IFORM:       VCVTSI2SD_XMMf64_XMMf64_GPR64i64_AVX512
 }
@@ -13381,7 +13399,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_READER
-PATTERN:    EVV 0x2A VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64    ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_READER()
+PATTERN:    EVV 0x2A VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM() mode64 W1 ZEROING=0 MASK=0 BCRC=0  ESIZE_64_BITS() NELEM_GPR_READER()
 OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:q:i64
 IFORM:       VCVTSI2SD_XMMf64_XMMf64_MEMi64_AVX512
 }
@@ -13397,7 +13415,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0    ZEROING=0 MASK=0
+PATTERN:    EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  not64    ZEROING=0 MASK=0
+OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:i32
+IFORM:       VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512
+PATTERN:    EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64 W0    ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:i32
 IFORM:       VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512
 }
@@ -13411,7 +13432,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W0    ZEROING=0 MASK=0
+PATTERN:    EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  not64    ZEROING=0 MASK=0
+OPERANDS:    REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:i32
+IFORM:       VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512
+PATTERN:    EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64 W0    ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:i32
 IFORM:       VCVTSI2SS_XMMf32_XMMf32_GPR32i32_AVX512
 }
@@ -13425,7 +13449,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_READER
-PATTERN:    EVV 0x2A VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0    ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
+PATTERN:    EVV 0x2A VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  not64    ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
+OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:d:i32
+IFORM:       VCVTSI2SS_XMMf32_XMMf32_MEMi32_AVX512
+PATTERN:    EVV 0x2A VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64 W0    ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
 OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:d:i32
 IFORM:       VCVTSI2SS_XMMf32_XMMf32_MEMi32_AVX512
 }
@@ -13441,7 +13468,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64    ZEROING=0 MASK=0
+PATTERN:    EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64  W1    ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR64_B():r:q:i64
 IFORM:       VCVTSI2SS_XMMf32_XMMf32_GPR64i64_AVX512
 }
@@ -13455,7 +13482,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W1  mode64    ZEROING=0 MASK=0
+PATTERN:    EVV 0x2A VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64  W1    ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR64_B():r:q:i64
 IFORM:       VCVTSI2SS_XMMf32_XMMf32_GPR64i64_AVX512
 }
@@ -13469,12 +13496,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_READER
-PATTERN:    EVV 0x2A VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64    ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_READER()
+PATTERN:    EVV 0x2A VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64  W1    ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_READER()
 OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:q:i64
 IFORM:       VCVTSI2SS_XMMf32_XMMf32_MEMi64_AVX512
 }
-
-
 # EMITTING VCVTSS2SD (VCVTSS2SD-128-1)
 {
 ICLASS:      VCVTSS2SD
@@ -13529,7 +13554,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f32
+IFORM:       VCVTSS2SI_GPR32i32_XMMf32_AVX512
+PATTERN:    EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTSS2SI_GPR32i32_XMMf32_AVX512
 }
@@ -13543,7 +13571,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:i32:TXT=ROUNDC REG1=XMM_B3():r:dq:f32
+IFORM:       VCVTSS2SI_GPR32i32_XMMf32_AVX512
+PATTERN:    EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32:TXT=ROUNDC REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTSS2SI_GPR32i32_XMMf32_AVX512
 }
@@ -13557,7 +13588,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2D VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+PATTERN:    EVV 0x2D VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  not64  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+OPERANDS:    REG0=GPR32_R():w:d:i32 MEM0:r:d:f32
+IFORM:       VCVTSS2SI_GPR32i32_MEMf32_AVX512
+PATTERN:    EVV 0x2D VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32 MEM0:r:d:f32
 IFORM:       VCVTSS2SI_GPR32i32_MEMf32_AVX512
 }
@@ -13573,7 +13607,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64 REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTSS2SI_GPR64i64_XMMf32_AVX512
 }
@@ -13587,7 +13621,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2D VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64:TXT=ROUNDC REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTSS2SI_GPR64i64_XMMf32_AVX512
 }
@@ -13601,7 +13635,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2D VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+PATTERN:    EVV 0x2D VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64 MEM0:r:d:f32
 IFORM:       VCVTSS2SI_GPR64i64_MEMf32_AVX512
 }
@@ -13617,7 +13651,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f32
+IFORM:       VCVTSS2USI_GPR32u32_XMMf32_AVX512
+PATTERN:    EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTSS2USI_GPR32u32_XMMf32_AVX512
 }
@@ -13631,7 +13668,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:u32:TXT=ROUNDC REG1=XMM_B3():r:dq:f32
+IFORM:       VCVTSS2USI_GPR32u32_XMMf32_AVX512
+PATTERN:    EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32:TXT=ROUNDC REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTSS2USI_GPR32u32_XMMf32_AVX512
 }
@@ -13645,7 +13685,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x79 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+PATTERN:    EVV 0x79 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  not64  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+OPERANDS:    REG0=GPR32_R():w:d:u32 MEM0:r:d:f32
+IFORM:       VCVTSS2USI_GPR32u32_MEMf32_AVX512
+PATTERN:    EVV 0x79 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32 MEM0:r:d:f32
 IFORM:       VCVTSS2USI_GPR32u32_MEMf32_AVX512
 }
@@ -13661,7 +13704,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64 REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTSS2USI_GPR64u64_XMMf32_AVX512
 }
@@ -13675,7 +13718,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x79 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64:TXT=ROUNDC REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTSS2USI_GPR64u64_XMMf32_AVX512
 }
@@ -13689,7 +13732,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x79 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+PATTERN:    EVV 0x79 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64 MEM0:r:d:f32
 IFORM:       VCVTSS2USI_GPR64u64_MEMf32_AVX512
 }
@@ -13881,7 +13924,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f64
+IFORM:       VCVTTSD2SI_GPR32i32_XMMf64_AVX512
+PATTERN:    EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTTSD2SI_GPR32i32_XMMf64_AVX512
 }
@@ -13895,7 +13941,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:i32:TXT=SAESTR REG1=XMM_B3():r:dq:f64
+IFORM:       VCVTTSD2SI_GPR32i32_XMMf64_AVX512
+PATTERN:    EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32:TXT=SAESTR REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTTSD2SI_GPR32i32_XMMf64_AVX512
 }
@@ -13909,7 +13958,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q
-PATTERN:    EVV 0x2C VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+PATTERN:    EVV 0x2C VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  not64  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+OPERANDS:    REG0=GPR32_R():w:d:i32 MEM0:r:q:f64
+IFORM:       VCVTTSD2SI_GPR32i32_MEMf64_AVX512
+PATTERN:    EVV 0x2C VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32 MEM0:r:q:f64
 IFORM:       VCVTTSD2SI_GPR32i32_MEMf64_AVX512
 }
@@ -13925,7 +13977,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64 REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTTSD2SI_GPR64i64_XMMf64_AVX512
 }
@@ -13939,7 +13991,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2C VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64:TXT=SAESTR REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTTSD2SI_GPR64i64_XMMf64_AVX512
 }
@@ -13953,7 +14005,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q
-PATTERN:    EVV 0x2C VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+PATTERN:    EVV 0x2C VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64 MEM0:r:q:f64
 IFORM:       VCVTTSD2SI_GPR64i64_MEMf64_AVX512
 }
@@ -13969,7 +14021,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f64
+IFORM:       VCVTTSD2USI_GPR32u32_XMMf64_AVX512
+PATTERN:    EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTTSD2USI_GPR32u32_XMMf64_AVX512
 }
@@ -13983,7 +14038,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:u32:TXT=SAESTR REG1=XMM_B3():r:dq:f64
+IFORM:       VCVTTSD2USI_GPR32u32_XMMf64_AVX512
+PATTERN:    EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32:TXT=SAESTR REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTTSD2USI_GPR32u32_XMMf64_AVX512
 }
@@ -13997,7 +14055,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q
-PATTERN:    EVV 0x78 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+PATTERN:    EVV 0x78 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  not64  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+OPERANDS:    REG0=GPR32_R():w:d:u32 MEM0:r:q:f64
+IFORM:       VCVTTSD2USI_GPR32u32_MEMf64_AVX512
+PATTERN:    EVV 0x78 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32 MEM0:r:q:f64
 IFORM:       VCVTTSD2USI_GPR32u32_MEMf64_AVX512
 }
@@ -14013,7 +14074,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64 REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTTSD2USI_GPR64u64_XMMf64_AVX512
 }
@@ -14027,7 +14088,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x78 VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64:TXT=SAESTR REG1=XMM_B3():r:dq:f64
 IFORM:       VCVTTSD2USI_GPR64u64_XMMf64_AVX512
 }
@@ -14041,7 +14102,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_WRITER_LDOP_Q
-PATTERN:    EVV 0x78 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q()
+PATTERN:    EVV 0x78 VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_WRITER_LDOP_Q() EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64 MEM0:r:q:f64
 IFORM:       VCVTTSD2USI_GPR64u64_MEMf64_AVX512
 }
@@ -14057,7 +14118,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f32
+IFORM:       VCVTTSS2SI_GPR32i32_XMMf32_AVX512
+PATTERN:    EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32 REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTTSS2SI_GPR32i32_XMMf32_AVX512
 }
@@ -14071,7 +14135,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:i32:TXT=SAESTR REG1=XMM_B3():r:dq:f32
+IFORM:       VCVTTSS2SI_GPR32i32_XMMf32_AVX512
+PATTERN:    EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32:TXT=SAESTR REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTTSS2SI_GPR32i32_XMMf32_AVX512
 }
@@ -14085,7 +14152,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2C VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+PATTERN:    EVV 0x2C VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  not64  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+OPERANDS:    REG0=GPR32_R():w:d:i32 MEM0:r:d:f32
+IFORM:       VCVTTSS2SI_GPR32i32_MEMf32_AVX512
+PATTERN:    EVV 0x2C VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:i32 MEM0:r:d:f32
 IFORM:       VCVTTSS2SI_GPR32i32_MEMf32_AVX512
 }
@@ -14101,7 +14171,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64 REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTTSS2SI_GPR64i64_XMMf32_AVX512
 }
@@ -14115,7 +14185,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x2C VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64:TXT=SAESTR REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTTSS2SI_GPR64i64_XMMf32_AVX512
 }
@@ -14129,7 +14199,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x2C VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+PATTERN:    EVV 0x2C VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:i64 MEM0:r:d:f32
 IFORM:       VCVTTSS2SI_GPR64i64_MEMf32_AVX512
 }
@@ -14145,7 +14215,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f32
+IFORM:       VCVTTSS2USI_GPR32u32_XMMf32_AVX512
+PATTERN:    EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32 REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTTSS2USI_GPR32u32_XMMf32_AVX512
 }
@@ -14159,7 +14232,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_R():w:d:u32:TXT=SAESTR REG1=XMM_B3():r:dq:f32
+IFORM:       VCVTTSS2USI_GPR32u32_XMMf32_AVX512
+PATTERN:    EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32:TXT=SAESTR REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTTSS2USI_GPR32u32_XMMf32_AVX512
 }
@@ -14173,7 +14249,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x78 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+PATTERN:    EVV 0x78 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  not64  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+OPERANDS:    REG0=GPR32_R():w:d:u32 MEM0:r:d:f32
+IFORM:       VCVTTSS2USI_GPR32u32_MEMf32_AVX512
+PATTERN:    EVV 0x78 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64 W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE
 OPERANDS:    REG0=GPR32_R():w:d:u32 MEM0:r:d:f32
 IFORM:       VCVTTSS2USI_GPR32u32_MEMf32_AVX512
 }
@@ -14189,7 +14268,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64 REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTTSS2USI_GPR64u64_XMMf32_AVX512
 }
@@ -14203,7 +14282,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  W1  mode64  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x78 VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() SAE()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64:TXT=SAESTR REG1=XMM_B3():r:dq:f32
 IFORM:       VCVTTSS2USI_GPR64u64_XMMf32_AVX512
 }
@@ -14217,7 +14296,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_WRITER_LDOP_D MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x78 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D()
+PATTERN:    EVV 0x78 VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_LDOP_D() EVEXRR_ONE
 OPERANDS:    REG0=GPR64_R():w:q:u64 MEM0:r:d:f32
 IFORM:       VCVTTSS2USI_GPR64u64_MEMf32_AVX512
 }
@@ -14307,7 +14386,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E10NF
 REAL_OPCODE: Y
 ATTRIBUTES:  SIMD_SCALAR
-PATTERN:    EVV 0x7B VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0    ZEROING=0 MASK=0
+PATTERN:    EVV 0x7B VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] not64 ZEROING=0 MASK=0
+OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR32_B():r:d:u32
+IFORM:       VCVTUSI2SD_XMMf64_XMMf64_GPR32u32_AVX512
+PATTERN:    EVV 0x7B VF2 V0F MOD[0b11] MOD=3 REG[rrr] RM[nnn] mode64 W0 ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR32_B():r:d:u32
 IFORM:       VCVTUSI2SD_XMMf64_XMMf64_GPR32u32_AVX512
 }
@@ -14321,7 +14403,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E10NF
 REAL_OPCODE: Y
 ATTRIBUTES:  SIMD_SCALAR DISP8_GPR_READER
-PATTERN:    EVV 0x7B VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0    ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
+PATTERN:    EVV 0x7B VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  not64    ZEROING=0 MASK=0 BCRC=0  ESIZE_32_BITS() NELEM_GPR_READER()
+OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:d:u32
+IFORM:       VCVTUSI2SD_XMMf64_XMMf64_MEMu32_AVX512
+PATTERN:    EVV 0x7B VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  mode64 W0    ZEROING=0 MASK=0 BCRC=0 ESIZE_32_BITS() NELEM_GPR_READER()
 OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:d:u32
 IFORM:       VCVTUSI2SD_XMMf64_XMMf64_MEMu32_AVX512
 }
@@ -14337,7 +14422,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x7B VF2 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64    ZEROING=0 MASK=0
+PATTERN:    EVV 0x7B VF2 V0F MOD[0b11] MOD=3  REG[rrr] RM[nnn]  mode64 W1 ZEROING=0 MASK=0 BCRC=0
 OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 REG2=GPR64_B():r:q:u64
 IFORM:       VCVTUSI2SD_XMMf64_XMMf64_GPR64u64_AVX512
 }
@@ -14351,7 +14436,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x7B VF2 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W1  mode64    ZEROING=0 MASK=0
+PATTERN:    EVV 0x7B VF2 V0F MOD[0b11] MOD=3  REG[rrr] RM[nnn] mode64 W1 ZEROING=0 MASK=0 BCRC=1 FIX_ROUND_LEN128() AVX512_ROUND()
 OPERANDS:    REG0=XMM_R3():w:dq:f64:TXT=ROUNDC REG1=XMM_N3():r:dq:f64 REG2=GPR64_B():r:q:u64
 IFORM:       VCVTUSI2SD_XMMf64_XMMf64_GPR64u64_AVX512
 }
@@ -14365,7 +14450,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_READER
-PATTERN:    EVV 0x7B VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64    ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_READER()
+PATTERN:    EVV 0x7B VF2 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM() mode64 W1 ZEROING=0 MASK=0 BCRC=0  ESIZE_64_BITS() NELEM_GPR_READER()
 OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:q:u64
 IFORM:       VCVTUSI2SD_XMMf64_XMMf64_MEMu64_AVX512
 }
@@ -14381,7 +14466,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W0    ZEROING=0 MASK=0
+PATTERN:    EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  not64    ZEROING=0 MASK=0
+OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:u32
+IFORM:       VCVTUSI2SS_XMMf32_XMMf32_GPR32u32_AVX512
+PATTERN:    EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64 W0    ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:u32
 IFORM:       VCVTUSI2SS_XMMf32_XMMf32_GPR32u32_AVX512
 }
@@ -14395,7 +14483,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W0    ZEROING=0 MASK=0
+PATTERN:    EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  not64    ZEROING=0 MASK=0
+OPERANDS:    REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:u32
+IFORM:       VCVTUSI2SS_XMMf32_XMMf32_GPR32u32_AVX512
+PATTERN:    EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64 W0    ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR32_B():r:d:u32
 IFORM:       VCVTUSI2SS_XMMf32_XMMf32_GPR32u32_AVX512
 }
@@ -14409,7 +14500,10 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_READER
-PATTERN:    EVV 0x7B VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W0    ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
+PATTERN:    EVV 0x7B VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  not64    ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
+OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:d:u32
+IFORM:       VCVTUSI2SS_XMMf32_XMMf32_MEMu32_AVX512
+PATTERN:    EVV 0x7B VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64 W0    ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
 OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:d:u32
 IFORM:       VCVTUSI2SS_XMMf32_XMMf32_MEMu32_AVX512
 }
@@ -14425,7 +14519,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  mode64    ZEROING=0 MASK=0
+PATTERN:    EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  mode64  W1    ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 REG2=GPR64_B():r:q:u64
 IFORM:       VCVTUSI2SS_XMMf32_XMMf32_GPR64u64_AVX512
 }
@@ -14439,7 +14533,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR
-PATTERN:    EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  W1  mode64    ZEROING=0 MASK=0
+PATTERN:    EVV 0x7B VF3 V0F MOD[0b11] MOD=3 BCRC=1 REG[rrr] RM[nnn] FIX_ROUND_LEN128() AVX512_ROUND()  mode64  W1    ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:f32:TXT=ROUNDC REG1=XMM_N3():r:dq:f32 REG2=GPR64_B():r:q:u64
 IFORM:       VCVTUSI2SS_XMMf32_XMMf32_GPR64u64_AVX512
 }
@@ -14453,7 +14547,7 @@ ISA_SET:     AVX512F_SCALAR
 EXCEPTIONS:     AVX512-E3NF
 REAL_OPCODE: Y
 ATTRIBUTES:  MXCSR SIMD_SCALAR DISP8_GPR_READER
-PATTERN:    EVV 0x7B VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  mode64    ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_READER()
+PATTERN:    EVV 0x7B VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  mode64  W1    ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_GPR_READER()
 OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=XMM_N3():r:dq:f32 MEM0:r:q:u64
 IFORM:       VCVTUSI2SS_XMMf32_XMMf32_MEMu64_AVX512
 }
@@ -18722,7 +18816,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
-PATTERN:    EVV 0x6E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x6E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=GPR32_B():r:d:u32
+IFORM:       VMOVD_XMMu32_GPR32u32_AVX512
+PATTERN:    EVV 0x6E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  mode64 W0  NOEVSR  ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=GPR32_B():r:d:u32
 IFORM:       VMOVD_XMMu32_GPR32u32_AVX512
 }
@@ -18736,7 +18833,10 @@ ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_READER
-PATTERN:    EVV 0x6E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
+PATTERN:    EVV 0x6E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  not64  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
+OPERANDS:    REG0=XMM_R3():w:dq:u32 MEM0:r:d:u32
+IFORM:       VMOVD_XMMu32_MEMu32_AVX512
+PATTERN:    EVV 0x6E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  mode64 W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_READER()
 OPERANDS:    REG0=XMM_R3():w:dq:u32 MEM0:r:d:u32
 IFORM:       VMOVD_XMMu32_MEMu32_AVX512
 }
@@ -18751,7 +18851,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
-PATTERN:    EVV 0x7E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W0  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x7E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  not64  NOEVSR  ZEROING=0 MASK=0
+OPERANDS:    REG0=GPR32_B():w:d:u32 REG1=XMM_R3():r:dq:u32
+IFORM:       VMOVD_GPR32u32_XMMu32_AVX512
+PATTERN:    EVV 0x7E V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  mode64 W0  NOEVSR  ZEROING=0 MASK=0
 OPERANDS:    REG0=GPR32_B():w:d:u32 REG1=XMM_R3():r:dq:u32
 IFORM:       VMOVD_GPR32u32_XMMu32_AVX512
 }
@@ -18765,7 +18868,10 @@ ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_WRITER_STORE
-PATTERN:    EVV 0x7E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_STORE()
+PATTERN:    EVV 0x7E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  not64  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_STORE()
+OPERANDS:    MEM0:w:d:u32 REG0=XMM_R3():r:dq:u32
+IFORM:       VMOVD_MEMu32_XMMu32_AVX512
+PATTERN:    EVV 0x7E V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  mode64 W0  NOEVSR  ZEROING=0 MASK=0  ESIZE_32_BITS() NELEM_GPR_WRITER_STORE()
 OPERANDS:    MEM0:w:d:u32 REG0=XMM_R3():r:dq:u32
 IFORM:       VMOVD_MEMu32_XMMu32_AVX512
 }
@@ -19074,7 +19180,7 @@ ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_SCALAR
-PATTERN:    EVV 0x16 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1    ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
+PATTERN:    EVV 0x16 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128 W1    ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
 OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:q:f64 MEM0:r:q:f64
 IFORM:       VMOVHPD_XMMf64_XMMf64_MEMf64_AVX512
 }
@@ -19090,7 +19196,7 @@ ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_SCALAR
-PATTERN:    EVV 0x17 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
+PATTERN:    EVV 0x17 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
 OPERANDS:    MEM0:w:q:f64 REG0=XMM_R3():r:dq:f64
 IFORM:       VMOVHPD_MEMf64_XMMf64_AVX512
 }
@@ -19153,7 +19259,7 @@ ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_SCALAR
-PATTERN:    EVV 0x12 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1    ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
+PATTERN:    EVV 0x12 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128  W1    ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
 OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=XMM_N3():r:dq:f64 MEM0:r:q:f64
 IFORM:       VMOVLPD_XMMf64_XMMf64_MEMf64_AVX512
 }
@@ -19169,7 +19275,7 @@ ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_SCALAR
-PATTERN:    EVV 0x13 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
+PATTERN:    EVV 0x13 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM() VL128 W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
 OPERANDS:    MEM0:w:q:f64 REG0=XMM_R3():r:q:f64
 IFORM:       VMOVLPD_MEMf64_XMMf64_AVX512
 }
@@ -19338,7 +19444,7 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
-PATTERN:    EVV 0x7E VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0x7E VF3 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn] VL128 W1  NOEVSR  ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=XMM_B3():r:dq:u64
 IFORM:       VMOVQ_XMMu64_XMMu64_AVX512
 }
@@ -19352,7 +19458,7 @@ ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_SCALAR
-PATTERN:    EVV 0x7E VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
+PATTERN:    EVV 0x7E VF3 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128 W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
 OPERANDS:    REG0=XMM_R3():w:dq:u64 MEM0:r:q:u64
 IFORM:       VMOVQ_XMMu64_MEMu64_AVX512
 }
@@ -19367,7 +19473,7 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
-PATTERN:    EVV 0xD6 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  W1  NOEVSR  ZEROING=0 MASK=0
+PATTERN:    EVV 0xD6 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128 W1  NOEVSR  ZEROING=0 MASK=0
 OPERANDS:    REG0=XMM_B3():w:dq:u64 REG1=XMM_R3():r:dq:u64
 IFORM:       VMOVQ_XMMu64_XMMu64_AVX512
 }
@@ -19381,7 +19487,7 @@ ISA_SET:     AVX512F_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_SCALAR
-PATTERN:    EVV 0xD6 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
+PATTERN:    EVV 0xD6 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128 W1  NOEVSR  ZEROING=0 MASK=0  ESIZE_64_BITS() NELEM_SCALAR()
 OPERANDS:    MEM0:w:q:u64 REG0=XMM_R3():r:dq:u64
 IFORM:       VMOVQ_MEMu64_XMMu64_AVX512
 }
@@ -20217,7 +20323,10 @@ ISA_SET:     AVX512F_512
 EXCEPTIONS:     AVX512-E7NM
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
-PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W0  NOEVSR
+PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  not64  NOEVSR
+OPERANDS:    REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO16_32
+IFORM:       VPBROADCASTD_ZMMu32_MASKmskw_GPR32u32_AVX512
+PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  mode64 W0  NOEVSR
 OPERANDS:    REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO16_32
 IFORM:       VPBROADCASTD_ZMMu32_MASKmskw_GPR32u32_AVX512
 }
@@ -20265,7 +20374,7 @@ ISA_SET:     AVX512F_512
 EXCEPTIONS:     AVX512-E7NM
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
-PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W1  mode64  NOEVSR
+PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  mode64  W1  NOEVSR
 OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR64_B():r:q:u64 EMX_BROADCAST_1TO8_64
 IFORM:       VPBROADCASTQ_ZMMu64_MASKmskw_GPR64u64_AVX512
 }
@@ -22272,9 +22381,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_512
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x28 V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W1
-OPERANDS:    REG0=ZMM_R3():w:zi64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zi32 REG3=ZMM_B3():r:zi32
+OPERANDS:    REG0=ZMM_R3():w:zi64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zi64 REG3=ZMM_B3():r:zi64
 IFORM:       VPMULDQ_ZMMi64_MASKmskw_ZMMi32_ZMMi32_AVX512
 }
 
@@ -22286,9 +22396,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_512
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX
 PATTERN:    EVV 0x28 V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL512  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=ZMM_R3():w:zi64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zi32 MEM0:r:vv:i32:TXT=BCASTSTR
+OPERANDS:    REG0=ZMM_R3():w:zi64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zi64 MEM0:r:vv:i64:TXT=BCASTSTR
 IFORM:       VPMULDQ_ZMMi64_MASKmskw_ZMMi32_MEMi32_AVX512
 }
 
@@ -22332,9 +22443,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_512
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0xF4 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W1
-OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 REG3=ZMM_B3():r:zu32
+OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 REG3=ZMM_B3():r:zu64
 IFORM:       VPMULUDQ_ZMMu64_MASKmskw_ZMMu32_ZMMu32_AVX512
 }
 
@@ -22346,9 +22458,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_512
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX
 PATTERN:    EVV 0xF4 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL512  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 MEM0:r:vv:u32:TXT=BCASTSTR
+OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 MEM0:r:vv:u64:TXT=BCASTSTR
 IFORM:       VPMULUDQ_ZMMu64_MASKmskw_ZMMu32_MEMu32_AVX512
 }
 
@@ -25350,7 +25463,6 @@ IFORM:       KXORW_MASKmskw_MASKmskw_MASKmskw_AVX512
 
 
 
-
 ###FILE: ./datafiles/avx512cd/vconflict-isa.xed.txt
 
 #BEGIN_LEGAL
@@ -25533,29 +25645,22 @@ IFORM:       VPLZCNTQ_ZMMu64_MASKmskw_MEMu64_AVX512CD
 
 
 ###FILE: ./datafiles/avx512-skx/skx-isa.xed.txt
-
 #BEGIN_LEGAL
-#INTEL CONFIDENTIAL
-#
-#Copyright (c) 2017, Intel Corporation. All rights reserved.
-#
-#The source code contained or described herein and all documents
-#related to the source code ("Material") are owned by Intel Corporation
-#or its suppliers or licensors. Title to the Material remains with
-#Intel Corporation or its suppliers and licensors. The Material
-#contains trade secrets and proprietary and confidential information of
-#Intel or its suppliers and licensors. The Material is protected by
-#worldwide copyright and trade secret laws and treaty provisions. No
-#part of the Material may be used, copied, reproduced, modified,
-#published, uploaded, posted, transmitted, distributed, or disclosed in
-#any way without Intel's prior express written permission.
-#
-#No license under any patent, copyright, trade secret or other
-#intellectual property right is granted to or conferred upon you by
-#disclosure or delivery of the Materials, either expressly, by
-#implication, inducement, estoppel or otherwise. Any license under such
-#intellectual property rights must be express and approved by Intel in
-#writing.
+#
+#Copyright (c) 2019 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
 #END_LEGAL
 #
 #
@@ -25818,8 +25923,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x55 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W1
-OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 REG3=XMM_B3():r:dq:f64
-IFORM:       VANDNPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 REG3=XMM_B3():r:dq:u64
+IFORM:       VANDNPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512
 }
 
 {
@@ -25832,8 +25937,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x55 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL128  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VANDNPD_XMMf64_MASKmskw_XMMf64_MEMf64_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VANDNPD_XMMu64_MASKmskw_XMMu64_MEMu64_AVX512
 }
 
 
@@ -25848,8 +25953,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x55 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W1
-OPERANDS:    REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 REG3=YMM_B3():r:qq:f64
-IFORM:       VANDNPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 REG3=YMM_B3():r:qq:u64
+IFORM:       VANDNPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512
 }
 
 {
@@ -25862,8 +25967,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x55 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL256  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VANDNPD_YMMf64_MASKmskw_YMMf64_MEMf64_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VANDNPD_YMMu64_MASKmskw_YMMu64_MEMu64_AVX512
 }
 
 
@@ -25878,8 +25983,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x55 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W1
-OPERANDS:    REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 REG3=ZMM_B3():r:zf64
-IFORM:       VANDNPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 REG3=ZMM_B3():r:zu64
+IFORM:       VANDNPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512
 }
 
 {
@@ -25892,8 +25997,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x55 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL512  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VANDNPD_ZMMf64_MASKmskw_ZMMf64_MEMf64_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VANDNPD_ZMMu64_MASKmskw_ZMMu64_MEMu64_AVX512
 }
 
 
@@ -25908,8 +26013,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x55 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W0
-OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 REG3=XMM_B3():r:dq:f32
-IFORM:       VANDNPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 REG3=XMM_B3():r:dq:u32
+IFORM:       VANDNPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512
 }
 
 {
@@ -25922,8 +26027,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x55 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL128  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VANDNPS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VANDNPS_XMMu32_MASKmskw_XMMu32_MEMu32_AVX512
 }
 
 
@@ -25938,8 +26043,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x55 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W0
-OPERANDS:    REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 REG3=YMM_B3():r:qq:f32
-IFORM:       VANDNPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 REG3=YMM_B3():r:qq:u32
+IFORM:       VANDNPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512
 }
 
 {
@@ -25952,8 +26057,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x55 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL256  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VANDNPS_YMMf32_MASKmskw_YMMf32_MEMf32_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VANDNPS_YMMu32_MASKmskw_YMMu32_MEMu32_AVX512
 }
 
 
@@ -25968,8 +26073,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x55 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W0
-OPERANDS:    REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 REG3=ZMM_B3():r:zf32
-IFORM:       VANDNPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 REG3=ZMM_B3():r:zu32
+IFORM:       VANDNPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512
 }
 
 {
@@ -25982,8 +26087,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x55 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL512  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VANDNPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VANDNPS_ZMMu32_MASKmskw_ZMMu32_MEMu32_AVX512
 }
 
 
@@ -25998,8 +26103,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x54 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W1
-OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 REG3=XMM_B3():r:dq:f64
-IFORM:       VANDPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 REG3=XMM_B3():r:dq:u64
+IFORM:       VANDPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512
 }
 
 {
@@ -26012,8 +26117,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x54 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL128  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VANDPD_XMMf64_MASKmskw_XMMf64_MEMf64_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VANDPD_XMMu64_MASKmskw_XMMu64_MEMu64_AVX512
 }
 
 
@@ -26028,8 +26133,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x54 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W1
-OPERANDS:    REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 REG3=YMM_B3():r:qq:f64
-IFORM:       VANDPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 REG3=YMM_B3():r:qq:u64
+IFORM:       VANDPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512
 }
 
 {
@@ -26042,8 +26147,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x54 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL256  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VANDPD_YMMf64_MASKmskw_YMMf64_MEMf64_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VANDPD_YMMu64_MASKmskw_YMMu64_MEMu64_AVX512
 }
 
 
@@ -26058,8 +26163,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x54 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W1
-OPERANDS:    REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 REG3=ZMM_B3():r:zf64
-IFORM:       VANDPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 REG3=ZMM_B3():r:zu64
+IFORM:       VANDPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512
 }
 
 {
@@ -26072,8 +26177,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x54 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL512  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VANDPD_ZMMf64_MASKmskw_ZMMf64_MEMf64_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VANDPD_ZMMu64_MASKmskw_ZMMu64_MEMu64_AVX512
 }
 
 
@@ -26088,8 +26193,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x54 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W0
-OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 REG3=XMM_B3():r:dq:f32
-IFORM:       VANDPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 REG3=XMM_B3():r:dq:u32
+IFORM:       VANDPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512
 }
 
 {
@@ -26102,8 +26207,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x54 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL128  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VANDPS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VANDPS_XMMu32_MASKmskw_XMMu32_MEMu32_AVX512
 }
 
 
@@ -26118,8 +26223,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x54 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W0
-OPERANDS:    REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 REG3=YMM_B3():r:qq:f32
-IFORM:       VANDPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 REG3=YMM_B3():r:qq:u32
+IFORM:       VANDPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512
 }
 
 {
@@ -26132,8 +26237,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x54 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL256  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VANDPS_YMMf32_MASKmskw_YMMf32_MEMf32_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VANDPS_YMMu32_MASKmskw_YMMu32_MEMu32_AVX512
 }
 
 
@@ -26148,8 +26253,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x54 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W0
-OPERANDS:    REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 REG3=ZMM_B3():r:zf32
-IFORM:       VANDPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 REG3=ZMM_B3():r:zu32
+IFORM:       VANDPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512
 }
 
 {
@@ -26162,8 +26267,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x54 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL512  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VANDPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VANDPS_ZMMu32_MASKmskw_ZMMu32_MEMu32_AVX512
 }
 
 
@@ -34886,8 +34991,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x56 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W1
-OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 REG3=XMM_B3():r:dq:f64
-IFORM:       VORPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 REG3=XMM_B3():r:dq:u64
+IFORM:       VORPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512
 }
 
 {
@@ -34900,8 +35005,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x56 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL128  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VORPD_XMMf64_MASKmskw_XMMf64_MEMf64_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VORPD_XMMu64_MASKmskw_XMMu64_MEMu64_AVX512
 }
 
 
@@ -34916,8 +35021,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x56 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W1
-OPERANDS:    REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 REG3=YMM_B3():r:qq:f64
-IFORM:       VORPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 REG3=YMM_B3():r:qq:u64
+IFORM:       VORPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512
 }
 
 {
@@ -34930,8 +35035,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x56 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL256  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VORPD_YMMf64_MASKmskw_YMMf64_MEMf64_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VORPD_YMMu64_MASKmskw_YMMu64_MEMu64_AVX512
 }
 
 
@@ -34946,8 +35051,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x56 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W1
-OPERANDS:    REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 REG3=ZMM_B3():r:zf64
-IFORM:       VORPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 REG3=ZMM_B3():r:zu64
+IFORM:       VORPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512
 }
 
 {
@@ -34960,8 +35065,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x56 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL512  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VORPD_ZMMf64_MASKmskw_ZMMf64_MEMf64_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VORPD_ZMMu64_MASKmskw_ZMMu64_MEMu64_AVX512
 }
 
 
@@ -34976,8 +35081,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x56 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W0
-OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 REG3=XMM_B3():r:dq:f32
-IFORM:       VORPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 REG3=XMM_B3():r:dq:u32
+IFORM:       VORPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512
 }
 
 {
@@ -34990,8 +35095,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x56 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL128  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VORPS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VORPS_XMMu32_MASKmskw_XMMu32_MEMu32_AVX512
 }
 
 
@@ -35006,8 +35111,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x56 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W0
-OPERANDS:    REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 REG3=YMM_B3():r:qq:f32
-IFORM:       VORPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 REG3=YMM_B3():r:qq:u32
+IFORM:       VORPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512
 }
 
 {
@@ -35020,8 +35125,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x56 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL256  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VORPS_YMMf32_MASKmskw_YMMf32_MEMf32_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VORPS_YMMu32_MASKmskw_YMMu32_MEMu32_AVX512
 }
 
 
@@ -35036,8 +35141,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x56 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W0
-OPERANDS:    REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 REG3=ZMM_B3():r:zf32
-IFORM:       VORPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 REG3=ZMM_B3():r:zu32
+IFORM:       VORPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512
 }
 
 {
@@ -35050,8 +35155,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x56 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL512  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VORPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VORPS_ZMMu32_MASKmskw_ZMMu32_MEMu32_AVX512
 }
 
 
@@ -37365,7 +37470,10 @@ ISA_SET:     AVX512F_128
 EXCEPTIONS:     AVX512-E7NM
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
-PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W0  NOEVSR
+PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  not64  NOEVSR
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO4_32
+IFORM:       VPBROADCASTD_XMMu32_MASKmskw_GPR32u32_AVX512
+PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  mode64 W0  NOEVSR
 OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO4_32
 IFORM:       VPBROADCASTD_XMMu32_MASKmskw_GPR32u32_AVX512
 }
@@ -37413,7 +37521,10 @@ ISA_SET:     AVX512F_256
 EXCEPTIONS:     AVX512-E7NM
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
-PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W0  NOEVSR
+PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  not64  NOEVSR
+OPERANDS:    REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO8_32
+IFORM:       VPBROADCASTD_YMMu32_MASKmskw_GPR32u32_AVX512
+PATTERN:    EVV 0x7C V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  mode64 W0  NOEVSR
 OPERANDS:    REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=GPR32_B():r:d:u32 EMX_BROADCAST_1TO8_32
 IFORM:       VPBROADCASTD_YMMu32_MASKmskw_GPR32u32_AVX512
 }
@@ -40497,7 +40608,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512DQ_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
-PATTERN:    EVV 0x16 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W0  NOEVSR  ZEROING=0 MASK=0 UIMM8()
+PATTERN:    EVV 0x16 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  not64  NOEVSR  ZEROING=0 MASK=0 UIMM8()
+OPERANDS:    REG0=GPR32_B():w:d:u32 REG1=XMM_R3():r:dq:u32 IMM0:r:b
+IFORM:       VPEXTRD_GPR32u32_XMMu32_IMM8_AVX512
+PATTERN:    EVV 0x16 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  mode64 W0  NOEVSR  ZEROING=0 MASK=0 UIMM8()
 OPERANDS:    REG0=GPR32_B():w:d:u32 REG1=XMM_R3():r:dq:u32 IMM0:r:b
 IFORM:       VPEXTRD_GPR32u32_XMMu32_IMM8_AVX512
 }
@@ -40511,7 +40625,10 @@ ISA_SET:     AVX512DQ_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_WRITER_STORE
-PATTERN:    EVV 0x16 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  W0  NOEVSR  ZEROING=0 MASK=0 UIMM8()  ESIZE_32_BITS() NELEM_GPR_WRITER_STORE()
+PATTERN:    EVV 0x16 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  not64  NOEVSR  ZEROING=0 MASK=0 UIMM8()  ESIZE_32_BITS() NELEM_GPR_WRITER_STORE()
+OPERANDS:    MEM0:w:d:u32 REG0=XMM_R3():r:dq:u32 IMM0:r:b
+IFORM:       VPEXTRD_MEMu32_XMMu32_IMM8_AVX512
+PATTERN:    EVV 0x16 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  mode64 W0  NOEVSR  ZEROING=0 MASK=0 UIMM8()  ESIZE_32_BITS() NELEM_GPR_WRITER_STORE()
 OPERANDS:    MEM0:w:d:u32 REG0=XMM_R3():r:dq:u32 IMM0:r:b
 IFORM:       VPEXTRD_MEMu32_XMMu32_IMM8_AVX512
 }
@@ -40577,16 +40694,22 @@ IFORM:       VPEXTRW_MEMu16_XMMu16_IMM8_AVX512
 
 # EMITTING VPEXTRW (VPEXTRW-128-2)
 {
-ICLASS:      VPEXTRW
+ICLASS:      VPEXTRW_C5
+DISASM:      vpextrw
 CPL:         3
 CATEGORY:    AVX512
 EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512BW_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
-PATTERN:    EVV 0xC5 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128    NOEVSR  ZEROING=0 MASK=0 UIMM8()
+
+PATTERN:    EVV 0xC5 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128    NOEVSR  ZEROING=0 MASK=0 UIMM8() not64
 OPERANDS:    REG0=GPR32_R():w:d:u16 REG1=XMM_B3():r:dq:u16 IMM0:r:b
-IFORM:       VPEXTRW_GPR32u16_XMMu16_IMM8_AVX512
+IFORM:       VPEXTRW_GPR32u16_XMMu16_IMM8_AVX512_C5
+
+PATTERN:    EVV 0xC5 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128    NOEVSR  ZEROING=0 MASK=0 UIMM8() mode64 EVEXRR_ONE
+OPERANDS:    REG0=GPR32_R():w:d:u16 REG1=XMM_B3():r:dq:u16 IMM0:r:b
+IFORM:       VPEXTRW_GPR32u16_XMMu16_IMM8_AVX512_C5
 }
 
 
@@ -40756,7 +40879,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512DQ_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
-PATTERN:    EVV 0x22 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W0    ZEROING=0 MASK=0 UIMM8()
+PATTERN:    EVV 0x22 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  not64    ZEROING=0 MASK=0 UIMM8()
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=XMM_N3():r:dq:u32 REG2=GPR32_B():r:d:u32 IMM0:r:b
+IFORM:       VPINSRD_XMMu32_XMMu32_GPR32u32_IMM8_AVX512
+PATTERN:    EVV 0x22 V66 V0F3A MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  mode64 W0    ZEROING=0 MASK=0 UIMM8()
 OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=XMM_N3():r:dq:u32 REG2=GPR32_B():r:d:u32 IMM0:r:b
 IFORM:       VPINSRD_XMMu32_XMMu32_GPR32u32_IMM8_AVX512
 }
@@ -40770,7 +40896,10 @@ ISA_SET:     AVX512DQ_128N
 EXCEPTIONS:     AVX512-E9NF
 REAL_OPCODE: Y
 ATTRIBUTES:  DISP8_GPR_READER
-PATTERN:    EVV 0x22 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  W0    ZEROING=0 MASK=0 UIMM8()  ESIZE_32_BITS() NELEM_GPR_READER()
+PATTERN:    EVV 0x22 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  not64    ZEROING=0 MASK=0 UIMM8()  ESIZE_32_BITS() NELEM_GPR_READER()
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=XMM_N3():r:dq:u32 MEM0:r:d:u32 IMM0:r:b
+IFORM:       VPINSRD_XMMu32_XMMu32_MEMu32_IMM8_AVX512
+PATTERN:    EVV 0x22 V66 V0F3A MOD[mm] MOD!=3 REG[rrr] RM[nnn] BCRC=0 MODRM()  VL128  mode64 W0    ZEROING=0 MASK=0 UIMM8()  ESIZE_32_BITS() NELEM_GPR_READER()
 OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=XMM_N3():r:dq:u32 MEM0:r:d:u32 IMM0:r:b
 IFORM:       VPINSRD_XMMu32_XMMu32_MEMu32_IMM8_AVX512
 }
@@ -44731,9 +44860,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_128
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x28 V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W1
-OPERANDS:    REG0=XMM_R3():w:dq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:i32 REG3=XMM_B3():r:dq:i32
+OPERANDS:    REG0=XMM_R3():w:dq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:i64 REG3=XMM_B3():r:dq:i64
 IFORM:       VPMULDQ_XMMi64_MASKmskw_XMMi32_XMMi32_AVX512
 }
 
@@ -44745,9 +44875,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_128
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX
 PATTERN:    EVV 0x28 V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL128  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=XMM_R3():w:dq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:i32 MEM0:r:vv:i32:TXT=BCASTSTR
+OPERANDS:    REG0=XMM_R3():w:dq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:i64 MEM0:r:vv:i64:TXT=BCASTSTR
 IFORM:       VPMULDQ_XMMi64_MASKmskw_XMMi32_MEMi32_AVX512
 }
 
@@ -44761,9 +44892,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_256
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x28 V66 V0F38 MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W1
-OPERANDS:    REG0=YMM_R3():w:qq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:i32 REG3=YMM_B3():r:qq:i32
+OPERANDS:    REG0=YMM_R3():w:qq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:i64 REG3=YMM_B3():r:qq:i64
 IFORM:       VPMULDQ_YMMi64_MASKmskw_YMMi32_YMMi32_AVX512
 }
 
@@ -44775,9 +44907,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_256
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX
 PATTERN:    EVV 0x28 V66 V0F38 MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL256  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=YMM_R3():w:qq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:i32 MEM0:r:vv:i32:TXT=BCASTSTR
+OPERANDS:    REG0=YMM_R3():w:qq:i64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:i64 MEM0:r:vv:i64:TXT=BCASTSTR
 IFORM:       VPMULDQ_YMMi64_MASKmskw_YMMi32_MEMi32_AVX512
 }
 
@@ -45301,9 +45434,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_128
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0xF4 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W1
-OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 REG3=XMM_B3():r:dq:u32
+OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 REG3=XMM_B3():r:dq:u64
 IFORM:       VPMULUDQ_XMMu64_MASKmskw_XMMu32_XMMu32_AVX512
 }
 
@@ -45315,9 +45449,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_128
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX
 PATTERN:    EVV 0xF4 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL128  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 MEM0:r:vv:u32:TXT=BCASTSTR
+OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 MEM0:r:vv:u64:TXT=BCASTSTR
 IFORM:       VPMULUDQ_XMMu64_MASKmskw_XMMu32_MEMu32_AVX512
 }
 
@@ -45331,9 +45466,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_256
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0xF4 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W1
-OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 REG3=YMM_B3():r:qq:u32
+OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 REG3=YMM_B3():r:qq:u64
 IFORM:       VPMULUDQ_YMMu64_MASKmskw_YMMu32_YMMu32_AVX512
 }
 
@@ -45345,9 +45481,10 @@ EXTENSION:   AVX512EVEX
 ISA_SET:     AVX512F_256
 EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
+COMMENT:     Strange instruction that uses 32b of each 64b input element
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION DOUBLE_WIDE_MEMOP DISP8_FULL BROADCAST_ENABLED MASKOP_EVEX
 PATTERN:    EVV 0xF4 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL256  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 MEM0:r:vv:u32:TXT=BCASTSTR
+OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 MEM0:r:vv:u64:TXT=BCASTSTR
 IFORM:       VPMULUDQ_YMMu64_MASKmskw_YMMu32_MEMu32_AVX512
 }
 
@@ -52592,8 +52729,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x57 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W1
-OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 REG3=XMM_B3():r:dq:f64
-IFORM:       VXORPD_XMMf64_MASKmskw_XMMf64_XMMf64_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 REG3=XMM_B3():r:dq:u64
+IFORM:       VXORPD_XMMu64_MASKmskw_XMMu64_XMMu64_AVX512
 }
 
 {
@@ -52606,8 +52743,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x57 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL128  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=XMM_R3():w:dq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VXORPD_XMMf64_MASKmskw_XMMf64_MEMf64_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VXORPD_XMMu64_MASKmskw_XMMu64_MEMu64_AVX512
 }
 
 
@@ -52622,8 +52759,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x57 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W1
-OPERANDS:    REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 REG3=YMM_B3():r:qq:f64
-IFORM:       VXORPD_YMMf64_MASKmskw_YMMf64_YMMf64_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 REG3=YMM_B3():r:qq:u64
+IFORM:       VXORPD_YMMu64_MASKmskw_YMMu64_YMMu64_AVX512
 }
 
 {
@@ -52636,8 +52773,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x57 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL256  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=YMM_R3():w:qq:f64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VXORPD_YMMf64_MASKmskw_YMMf64_MEMf64_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VXORPD_YMMu64_MASKmskw_YMMu64_MEMu64_AVX512
 }
 
 
@@ -52652,8 +52789,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x57 V66 V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W1
-OPERANDS:    REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 REG3=ZMM_B3():r:zf64
-IFORM:       VXORPD_ZMMf64_MASKmskw_ZMMf64_ZMMf64_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 REG3=ZMM_B3():r:zu64
+IFORM:       VXORPD_ZMMu64_MASKmskw_ZMMu64_ZMMu64_AVX512
 }
 
 {
@@ -52666,8 +52803,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x57 V66 V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL512  W1    ESIZE_64_BITS() NELEM_FULL()
-OPERANDS:    REG0=ZMM_R3():w:zf64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf64 MEM0:r:vv:f64:TXT=BCASTSTR
-IFORM:       VXORPD_ZMMf64_MASKmskw_ZMMf64_MEMf64_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu64 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu64 MEM0:r:vv:u64:TXT=BCASTSTR
+IFORM:       VXORPD_ZMMu64_MASKmskw_ZMMu64_MEMu64_AVX512
 }
 
 
@@ -52682,8 +52819,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x57 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL128  W0
-OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 REG3=XMM_B3():r:dq:f32
-IFORM:       VXORPS_XMMf32_MASKmskw_XMMf32_XMMf32_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 REG3=XMM_B3():r:dq:u32
+IFORM:       VXORPS_XMMu32_MASKmskw_XMMu32_XMMu32_AVX512
 }
 
 {
@@ -52696,8 +52833,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x57 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL128  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=XMM_R3():w:dq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:f32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VXORPS_XMMf32_MASKmskw_XMMf32_MEMf32_AVX512
+OPERANDS:    REG0=XMM_R3():w:dq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=XMM_N3():r:dq:u32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VXORPS_XMMu32_MASKmskw_XMMu32_MEMu32_AVX512
 }
 
 
@@ -52712,8 +52849,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x57 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL256  W0
-OPERANDS:    REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 REG3=YMM_B3():r:qq:f32
-IFORM:       VXORPS_YMMf32_MASKmskw_YMMf32_YMMf32_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 REG3=YMM_B3():r:qq:u32
+IFORM:       VXORPS_YMMu32_MASKmskw_YMMu32_YMMu32_AVX512
 }
 
 {
@@ -52726,8 +52863,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x57 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL256  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=YMM_R3():w:qq:f32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:f32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VXORPS_YMMf32_MASKmskw_YMMf32_MEMf32_AVX512
+OPERANDS:    REG0=YMM_R3():w:qq:u32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=YMM_N3():r:qq:u32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VXORPS_YMMu32_MASKmskw_YMMu32_MEMu32_AVX512
 }
 
 
@@ -52742,8 +52879,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MASKOP_EVEX
 PATTERN:    EVV 0x57 VNP V0F MOD[0b11] MOD=3 BCRC=0 REG[rrr] RM[nnn]  VL512  W0
-OPERANDS:    REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 REG3=ZMM_B3():r:zf32
-IFORM:       VXORPS_ZMMf32_MASKmskw_ZMMf32_ZMMf32_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 REG3=ZMM_B3():r:zu32
+IFORM:       VXORPS_ZMMu32_MASKmskw_ZMMu32_ZMMu32_AVX512
 }
 
 {
@@ -52756,8 +52893,8 @@ EXCEPTIONS:     AVX512-E4
 REAL_OPCODE: Y
 ATTRIBUTES:  MEMORY_FAULT_SUPPRESSION MASKOP_EVEX DISP8_FULL BROADCAST_ENABLED
 PATTERN:    EVV 0x57 VNP V0F MOD[mm] MOD!=3 REG[rrr] RM[nnn]  MODRM()  VL512  W0    ESIZE_32_BITS() NELEM_FULL()
-OPERANDS:    REG0=ZMM_R3():w:zf32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zf32 MEM0:r:vv:f32:TXT=BCASTSTR
-IFORM:       VXORPS_ZMMf32_MASKmskw_ZMMf32_MEMf32_AVX512
+OPERANDS:    REG0=ZMM_R3():w:zu32 REG1=MASK1():r:mskw:TXT=ZEROSTR REG2=ZMM_N3():r:zu32 MEM0:r:vv:u32:TXT=BCASTSTR
+IFORM:       VXORPS_ZMMu32_MASKmskw_ZMMu32_MEMu32_AVX512
 }
 
 
@@ -53606,7 +53743,6 @@ IFORM:       KXORQ_MASKmskw_MASKmskw_MASKmskw_AVX512
 
 
 
-
 ###FILE: ./datafiles/avx512ifma/ifma-isa.xed.txt
 
 #BEGIN_LEGAL

From 5d0b1102bee052ce00bf47f9cfef8f73d2ac6d46 Mon Sep 17 00:00:00 2001
From: cuishuang <imcusg@gmail.com>
Date: Tue, 16 Apr 2024 18:07:52 +0800
Subject: [PATCH 023/200] x86/x86asm: fix function name in comment

Change-Id: Ie70c842161c96948098082d3c0ff1b026bcfd8de
Reviewed-on: https://go-review.googlesource.com/c/arch/+/579198
Reviewed-by: qiu laidongfeng2 <2645477756@qq.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Commit-Queue: Ian Lance Taylor <iant@google.com>
Run-TryBot: shuang cui <imcusg@gmail.com>
---
 x86/x86asm/ext_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/x86/x86asm/ext_test.go b/x86/x86asm/ext_test.go
index e63f1138..2e31dd30 100644
--- a/x86/x86asm/ext_test.go
+++ b/x86/x86asm/ext_test.go
@@ -653,7 +653,7 @@ func enum8bit(try func([]byte)) {
 	}
 }
 
-// enum8bit generates all possible 2-byte sequences, followed by distinctive padding.
+// enum16bit generates all possible 2-byte sequences, followed by distinctive padding.
 func enum16bit(try func([]byte)) {
 	for i := 0; i < 1<<16; i++ {
 		try([]byte{byte(i), byte(i >> 8), 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88})

From b863392466ea228f6359643b2e2b4c658761ba39 Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@golang.org>
Date: Tue, 16 Jul 2024 11:35:16 -0400
Subject: [PATCH 024/200] LICENSE: update per Google Legal

Very minor tweaks:
 - Remove (c) pseudosymbol.
 - Remove "All Rights Reserved."
 - Change "Google Inc." (no longer exists) to "Google LLC".

[git-generate]
echo '
,s/\(c\) //
,s/ All rights reserved.//
,s/Google Inc./Google LLC/
w
q
' | sam -d LICENSE

Change-Id: I44ceee02758453e6afee1e63518aa275f53429d8
Reviewed-on: https://go-review.googlesource.com/c/arch/+/598518
Auto-Submit: Russ Cox <rsc@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
---
 LICENSE | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/LICENSE b/LICENSE
index d29b3726..686d8a91 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2015 The Go Authors. All rights reserved.
+Copyright 2015 The Go Authors.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
@@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer.
 copyright notice, this list of conditions and the following disclaimer
 in the documentation and/or other materials provided with the
 distribution.
-   * Neither the name of Google Inc. nor the names of its
+   * Neither the name of Google LLC nor the names of its
 contributors may be used to endorse or promote products derived from
 this software without specific prior written permission.
 

From 9d90945922a772f53487baa5b1b03f061aebb164 Mon Sep 17 00:00:00 2001
From: Vishwanatha HD <Vishwanatha.HD@ibm.com>
Date: Sat, 13 Jul 2024 19:52:41 +0000
Subject: [PATCH 025/200] s390x: add s390x disassembler support, GNU syntax

Change-Id: Idd91cc89510ce117e49db541fd68b0fa113b92fa
Reviewed-on: https://go-review.googlesource.com/c/arch/+/575675
Auto-Submit: Cherry Mui <cherryyz@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Bill O'Farrell <billotosyr@gmail.com>
Reviewed-by: Srinivas Pokala <Pokala.Srinivas@ibm.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 s390x/s390x.csv                              | 1277 +++++
 s390x/s390xasm/Makefile                      |    2 +
 s390x/s390xasm/decode.go                     |  241 +
 s390x/s390xasm/decode_test.go                |   88 +
 s390x/s390xasm/field.go                      |   98 +
 s390x/s390xasm/gnu.go                        | 1018 ++++
 s390x/s390xasm/inst.go                       |  399 ++
 s390x/s390xasm/tables.go                     | 5046 ++++++++++++++++++
 s390x/s390xasm/testdata/decode_generated.txt | 1245 +++++
 s390x/s390xmap/map.go                        |  636 +++
 s390x/s390xspec/spec.go                      | 1059 ++++
 s390x/s390xutil/hack.h                       |   56 +
 s390x/s390xutil/util.go                      |   90 +
 13 files changed, 11255 insertions(+)
 create mode 100644 s390x/s390x.csv
 create mode 100644 s390x/s390xasm/Makefile
 create mode 100644 s390x/s390xasm/decode.go
 create mode 100644 s390x/s390xasm/decode_test.go
 create mode 100644 s390x/s390xasm/field.go
 create mode 100644 s390x/s390xasm/gnu.go
 create mode 100644 s390x/s390xasm/inst.go
 create mode 100644 s390x/s390xasm/tables.go
 create mode 100644 s390x/s390xasm/testdata/decode_generated.txt
 create mode 100644 s390x/s390xmap/map.go
 create mode 100644 s390x/s390xspec/spec.go
 create mode 100644 s390x/s390xutil/hack.h
 create mode 100644 s390x/s390xutil/util.go

diff --git a/s390x/s390x.csv b/s390x/s390x.csv
new file mode 100644
index 00000000..a53942d8
--- /dev/null
+++ b/s390x/s390x.csv
@@ -0,0 +1,1277 @@
+# Copyright 2024 The Go Authors.  All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+#
+# This file is generated by the s390xspec program.
+#
+# Command to generate this file is:
+# ./s390xspec <IBM Z-ISA Principles of Operation PDF file name> > s390x.csv
+#
+# For eg: ./s390xspec z_Architecture_Principles_of_Operation.pdf > s390x.csv
+#
+# Specific Edition of the PDF manual used (Publication No): SA22-7832-13
+# Document link: https://www.ibm.com/docs/en/module_1678991624569/pdf/SA22-7832-13.pdf
+#
+# IBM Z-ISA Principles of Operation PDF instruction description.
+#
+# This file contains comment lines, each beginning with #,
+# followed by entries in CSV format.
+#
+# Each line in the CSV section contains 4 fields:
+#
+#       instruction mnemonic encoding isa-level
+#
+# The instruction is list of instructions picked from the Appendix-B "Lists of Instructions" heading.
+# The mnemonic is the instruction mnemonics, separated by | characters.
+# The encoding is the encoding, a sequence of name@startbit| describing each bit field in turn or
+#    a list of sequences of the form (,sequence)+. A leading comma is used to signify an
+#    instruction encoding requiring multiple instruction words.
+# The fourth field represents instruction characteristics string
+#
+
+
+"ADD (32)","A R1,D2(X2,B2)","90@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"ADD (32)","AR R1,R2","26@0|R1@8|R2@12|??@16","1A"
+"ADD (32)","ARK R1,R2,R3","47608@0|R3@16|//@20|R1@24|R2@28|??@32","B9F8"
+"ADD (32)","AY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|90@40|??@48","B"
+"ADD (64)","AG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|8@40|??@48","B"
+"ADD (64)","AGR R1,R2","47368@0|//@16|R1@24|R2@28|??@32","B908"
+"ADD (64)","AGRK R1,R2,R3","47592@0|R3@16|//@20|R1@24|R2@28|??@32","B9E8"
+"ADD (64←32)","AGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|24@40|??@48","B"
+"ADD (64←32)","AGFR R1,R2","47384@0|//@16|R1@24|R2@28|??@32","B918"
+"ADD (extended BFP)","AXBR R1,R2","45898@0|//@16|R1@24|R2@28|??@32","SP Db"
+"ADD (extended DFP)","AXTR R1,R2,R3","46042@0|R3@16|//@20|R1@24|R2@28|??@32","SP Dt"
+"ADD (extended DFP)","AXTRA R1,R2,R3,M4","46042@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"ADD (long BFP)","ADB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|26@40|??@48","Db"
+"ADD (long BFP)","ADBR R1,R2","45850@0|//@16|R1@24|R2@28|??@32","Db"
+"ADD (long DFP)","ADTR R1,R2,R3","46034@0|R3@16|//@20|R1@24|R2@28|??@32","Dt"
+"ADD (long DFP)","ADTRA R1,R2,R3,M4","46034@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"ADD (short BFP)","AEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|10@40|??@48","Db"
+"ADD (short BFP)","AEBR R1,R2","45834@0|//@16|R1@24|R2@28|??@32","Db"
+"ADD DECIMAL","AP D1(L1,B1),D2(L2,B2)","250@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","Dg"
+"ADD HALFWORD (32←16)","AH R1,D2(X2,B2)","74@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"ADD HALFWORD (32←16)","AHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|122@40|??@48","B"
+"ADD HALFWORD (64→16)","AGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|56@40|??@48","B"
+"ADD HALFWORD IMMEDIATE (32←16)","AHI R1,I2","167@0|R1@8|10@12|I2@16|??@32","A7A"
+"ADD HALFWORD IMMEDIATE (64←16)","AGHI R1,I2","167@0|R1@8|11@12|I2@16|??@32","A7B"
+"ADD HIGH (32)","AHHHR R1,R2,R3","47560@0|R3@16|//@20|R1@24|R2@28|??@32","B9C8"
+"ADD HIGH (32)","AHHLR R1,R2,R3","47576@0|R3@16|//@20|R1@24|R2@28|??@32","B9D8"
+"ADD IMMEDIATE (32)","AFI R1,I2","194@0|R1@8|9@12|I2@16|??@48","C29"
+"ADD IMMEDIATE (32←16)","AHIK R1,R3,I2","236@0|R1@8|R3@12|I2@16|//@32|216@40|??@48","ECD8"
+"ADD IMMEDIATE (32←8)","ASI D1(B1),I2","235@0|I2@8|B1@16|D1@20|106@40|??@48","ST"
+"ADD IMMEDIATE (64←16)","AGHIK R1,R3,I2","236@0|R1@8|R3@12|I2@16|//@32|217@40|??@48","ECD9"
+"ADD IMMEDIATE (64←32)","AGFI R1,I2","194@0|R1@8|8@12|I2@16|??@48","C28"
+"ADD IMMEDIATE (64←8)","AGSI D1(B1),I2","235@0|I2@8|B1@16|D1@20|122@40|??@48","ST"
+"ADD IMMEDIATE HIGH (32)","AIH R1,I2","204@0|R1@8|8@12|I2@16|??@48","CC8"
+"ADD LOGICAL (32)","AL R1,D2(X2,B2)","94@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"ADD LOGICAL (32)","ALR R1,R2","30@0|R1@8|R2@12|??@16","1E"
+"ADD LOGICAL (32)","ALRK R1,R2,R3","47610@0|R3@16|//@20|R1@24|R2@28|??@32","B9FA"
+"ADD LOGICAL (32)","ALY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|94@40|??@48","B"
+"ADD LOGICAL (64)","ALG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|10@40|??@48","B"
+"ADD LOGICAL (64)","ALGR R1,R2","47370@0|//@16|R1@24|R2@28|??@32","B90A"
+"ADD LOGICAL (64)","ALGRK R1,R2,R3","47594@0|R3@16|//@20|R1@24|R2@28|??@32","B9EA"
+"ADD LOGICAL (64←32)","ALGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|26@40|??@48","B"
+"ADD LOGICAL (64←32)","ALGFR R1,R2","47386@0|//@16|R1@24|R2@28|??@32","B91A"
+"ADD LOGICAL HIGH (32)","ALHHHR R1,R2,R3","47562@0|R3@16|//@20|R1@24|R2@28|??@32","B9CA"
+"ADD LOGICAL HIGH (32)","ALHHLR R1,R2,R3","47578@0|R3@16|//@20|R1@24|R2@28|??@32","B9DA"
+"ADD LOGICAL IMMEDIATE (32)","ALFI R1,I2","194@0|R1@8|11@12|I2@16|??@48","C2B"
+"ADD LOGICAL IMMEDIATE (64←32)","ALGFI R1,I2","194@0|R1@8|10@12|I2@16|??@48","C2A"
+"ADD LOGICAL WITH CARRY (32)","ALC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|152@40|??@48","B"
+"ADD LOGICAL WITH CARRY (32)","ALCR R1,R2","47512@0|//@16|R1@24|R2@28|??@32","B998"
+"ADD LOGICAL WITH CARRY (64)","ALCG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|136@40|??@48","B"
+"ADD LOGICAL WITH CARRY (64)","ALCGR R1,R2","47496@0|//@16|R1@24|R2@28|??@32","B988"
+"ADD LOGICAL WITH SIGNED IMMEDIATE(32→16)","ALHSIK R1,R3,I2","236@0|R1@8|R3@12|I2@16|//@32|218@40|??@48","ECDA 7-31"
+"ADD LOGICAL WITH SIGNED IMMEDIATE (32←8)","ALSI D1(B1),I2","235@0|I2@8|B1@16|D1@20|110@40|??@48","ST"
+"ADD LOGICAL WITH SIGNED IMMEDIATE(64→16)","ALGHSIK R1,R3,I2","236@0|R1@8|R3@12|I2@16|//@32|219@40|??@48","ECDB 7-31"
+"ADD LOGICAL WITH SIGNED IMMEDIATE (64→8)","ALGSI D1(B1),I2","235@0|I2@8|B1@16|D1@20|126@40|??@48","ST"
+"ADD LOGICAL WITH SIGNED IMMEDIATE HIGH(32)","ALSIH R1,I2","204@0|R1@8|10@12|I2@16|??@48","CCA"
+"ADD LOGICAL WITH SIGNED IMMEDIATE HIGH(32)","ALSIHN R1,I2","204@0|R1@8|11@12|I2@16|??@48","CCB"
+"ADD NORMALIZED (extended HFP)","AXR R1,R2","54@0|R1@8|R2@12|??@16","SP Da"
+"ADD NORMALIZED (long HFP)","AD R1,D2(X2,B2)","106@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"ADD NORMALIZED (long HFP)","ADR R1,R2","42@0|R1@8|R2@12|??@16","Da"
+"ADD NORMALIZED (short HFP)","AE R1,D2(X2,B2)","122@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"ADD NORMALIZED (short HFP)","AER R1,R2","58@0|R1@8|R2@12|??@16","Da"
+"ADD UNNORMALIZED (long HFP)","AW R1,D2(X2,B2)","110@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"ADD UNNORMALIZED (long HFP)","AWR R1,R2","46@0|R1@8|R2@12|??@16","Da"
+"ADD UNNORMALIZED (short HFP)","AU R1,D2(X2,B2)","126@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"ADD UNNORMALIZED (short HFP)","AUR R1,R2","62@0|R1@8|R2@12|??@16","Da"
+"AND (32)","N R1,D2(X2,B2)","84@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"AND (32)","NR R1,R2","20@0|R1@8|R2@12|??@16","14"
+"AND (32)","NRK R1,R2,R3","47604@0|R3@16|//@20|R1@24|R2@28|??@32","B9F4"
+"AND (32)","NY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|84@40|??@48","B"
+"AND (64)","NG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|128@40|??@48","B"
+"AND (64)","NGR R1,R2","47488@0|//@16|R1@24|R2@28|??@32","B980"
+"AND (64)","NGRK R1,R2,R3","47588@0|R3@16|//@20|R1@24|R2@28|??@32","B9E4"
+"AND (character)","NC D1(L1,B1),D2(B2)","212@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"AND (immediate)","NI D1(B1),I2","148@0|I2@8|B1@16|D1@20|??@32","ST"
+"AND (immediate)","NIY D1(B1),I2","235@0|I2@8|B1@16|D1@20|84@40|??@48","ST"
+"AND IMMEDIATE (high high)","NIHH R1,I2","165@0|R1@8|4@12|I2@16|??@32","A54"
+"AND IMMEDIATE (high low)","NIHL R1,I2","165@0|R1@8|5@12|I2@16|??@32","A55"
+"AND IMMEDIATE (high)","NIHF R1,I2","192@0|R1@8|10@12|I2@16|??@48","C0A"
+"AND IMMEDIATE (low high)","NILH R1,I2","165@0|R1@8|6@12|I2@16|??@32","A56"
+"AND IMMEDIATE (low low)","NILL R1,I2","165@0|R1@8|7@12|I2@16|??@32","A57"
+"AND IMMEDIATE (low)","NILF R1,I2","192@0|R1@8|11@12|I2@16|??@48","C0B"
+"AND WITH COMPLEMENT(32)","NCRK R1,R2,R3","47605@0|R3@16|//@20|R1@24|R2@28|??@32","B9F5"
+"AND WITH COMPLEMENT(64)","NCGRK R1,R2,R3","47589@0|R3@16|//@20|R1@24|R2@28|??@32","B9E5"
+"BRANCH AND LINK","BAL R1,D2(X2,B2)","69@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"BRANCH AND LINK","BALR R1,R2","5@0|R1@8|R2@12|??@16","B"
+"BRANCH AND SAVE","BAS R1,D2(X2,B2)","77@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"BRANCH AND SAVE","BASR R1,R2","13@0|R1@8|R2@12|??@16","B"
+"BRANCH AND SAVE AND SET MODE","BASSM R1,R2","12@0|R1@8|R2@12|??@16","B"
+"BRANCH AND SET AUTHORITY","BSA R1,R2","45658@0|//@16|R1@24|R2@28|??@32","SO"
+"BRANCH AND SET MODE","BSM R1,R2","11@0|R1@8|R2@12|??@16","B"
+"BRANCH AND STACK","BAKR R1,R2","45632@0|//@16|R1@24|R2@28|??@32","Z"
+"BRANCH IN SUBSPACE GROUP","BSG R1,R2","45656@0|//@16|R1@24|R2@28|??@32","SO"
+"BRANCH INDIRECT ON CONDITION","BIC M1,D2(X2,B2)","227@0|M1@8|X2@12|B2@16|D2@20|71@40|??@48","B"
+"BRANCH ON CONDITION","BC M1,D2(X2,B2)","71@0|M1@8|X2@12|B2@16|D2@20|??@32","B"
+"BRANCH ON CONDITION","BCR M1,R2","7@0|M1@8|R2@12|??@16","B"
+"BRANCH ON COUNT (32)","BCT R1,D2(X2,B2)","70@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"BRANCH ON COUNT (32)","BCTR R1,R2","6@0|R1@8|R2@12|??@16","B"
+"BRANCH ON COUNT (64)","BCTG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|70@40|??@48","B"
+"BRANCH ON COUNT (64)","BCTGR R1,R2","47430@0|//@16|R1@24|R2@28|??@32","B"
+"BRANCH ON INDEX HIGH (32)","BXH R1,R3,D2(B2)","134@0|R1@8|R3@12|B2@16|D2@20|??@32","B"
+"BRANCH ON INDEX HIGH (64)","BXHG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|68@40|??@48","B"
+"BRANCH ON INDEX LOW OR EQUAL (32)","BXLE R1,R3,D2(B2)","135@0|R1@8|R3@12|B2@16|D2@20|??@32","B"
+"BRANCH ON INDEX LOW OR EQUAL (64)","BXLEG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|69@40|??@48","B"
+"BRANCH PREDICTION PRELOAD","BPP M1,RI2,D3(B3)","199@0|M1@8|//@12|B3@16|D3@20|RI2@32|??@48","C7"
+"BRANCH PREDICTION RELATIVE PRELOAD","BPRP M1,RI2,RI3","197@0|M1@8|RI2@12|RI3@24|??@48","C5"
+"BRANCH RELATIVE AND SAVE","BRAS R1,RI2","167@0|R1@8|5@12|RI2@16|??@32","B"
+"BRANCH RELATIVE AND SAVE LONG","BRASL R1,RI2","192@0|R1@8|5@12|RI2@16|??@48","B"
+"BRANCH RELATIVE ON CONDITION","BRC M1,RI2","167@0|M1@8|4@12|RI2@16|??@32","B"
+"BRANCH RELATIVE ON CONDITION LONG","BRCL M1,RI2","192@0|M1@8|4@12|RI2@16|??@48","B"
+"BRANCH RELATIVE ON COUNT (32)","BRCT R1,RI2","167@0|R1@8|6@12|RI2@16|??@32","B"
+"BRANCH RELATIVE ON COUNT (64)","BRCTG R1,RI2","167@0|R1@8|7@12|RI2@16|??@32","B"
+"BRANCH RELATIVE ON COUNT HIGH (32)","BRCTH R1,RI2","204@0|R1@8|6@12|RI2@16|??@48","B"
+"BRANCH RELATIVE ON INDEX HIGH (32)","BRXH R1,R3,RI2","132@0|R1@8|R3@12|RI2@16|??@32","B"
+"BRANCH RELATIVE ON INDEX HIGH (64)","BRXHG R1,R3,RI2","236@0|R1@8|R3@12|RI2@16|//@32|68@40|??@48","B"
+"BRANCH RELATIVE ON INDEX LOW OR EQ. (32)","BRXLE R1,R3,RI2","133@0|R1@8|R3@12|RI2@16|??@32","B"
+"BRANCH RELATIVE ON INDEX LOW OR EQ. (64)","BRXLG R1,R3,RI2","236@0|R1@8|R3@12|RI2@16|//@32|69@40|??@48","B"
+"CANCEL SUBCHANNEL","XSCH","45686@0|//@16|??@32","OP"
+"CHECKSUM","CKSM R1,R2","45633@0|//@16|R1@24|R2@28|??@32","SP IC"
+"CIPHER MESSAGE","KM R1,R2","47406@0|//@16|R1@24|R2@28|??@32","SP IC"
+"CIPHER MESSAGE WITH AUTHENTICATION","KMA R1,R3,R2","47401@0|R3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"CIPHER MESSAGE WITH CHAINING","KMC R1,R2","47407@0|//@16|R1@24|R2@28|??@32","SP IC"
+"CIPHER MESSAGE WITH CIPHER FEEDBACK","KMF R1,R2","47402@0|//@16|R1@24|R2@28|??@32","SP IC"
+"CIPHER MESSAGE WITH COUNTER","KMCTR R1,R3,R2","47405@0|R3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"CIPHER MESSAGE WITH OUTPUT FEEDBACK","KMO R1,R2","47403@0|//@16|R1@24|R2@28|??@32","SP IC"
+"CLEAR SUBCHANNEL","CSCH","45616@0|//@16|??@32","OP"
+"COMPARE (32)","C R1,D2(X2,B2)","89@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"COMPARE (32)","CR R1,R2","25@0|R1@8|R2@12|??@16","19"
+"COMPARE (32)","CY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|89@40|??@48","B"
+"COMPARE (64)","CG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|32@40|??@48","B"
+"COMPARE (64)","CGR R1,R2","47392@0|//@16|R1@24|R2@28|??@32","B920"
+"COMPARE (64←32)","CGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|48@40|??@48","B"
+"COMPARE (64←32)","CGFR R1,R2","47408@0|//@16|R1@24|R2@28|??@32","B930"
+"COMPARE (extended BFP)","CXBR R1,R2","45897@0|//@16|R1@24|R2@28|??@32","SP Db"
+"COMPARE (extended DFP)","CXTR R1,R2","46060@0|//@16|R1@24|R2@28|??@32","SP Dt"
+"COMPARE (extended HFP)","CXR R1,R2","45929@0|//@16|R1@24|R2@28|??@32","SP Da"
+"COMPARE (long BFP)","CDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|25@40|??@48","Db"
+"COMPARE (long BFP)","CDBR R1,R2","45849@0|//@16|R1@24|R2@28|??@32","Db"
+"COMPARE (long DFP)","CDTR R1,R2","46052@0|//@16|R1@24|R2@28|??@32","Dt"
+"COMPARE (long HFP)","CD R1,D2(X2,B2)","105@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"COMPARE (long HFP)","CDR R1,R2","41@0|R1@8|R2@12|??@16","Da"
+"COMPARE (short BFP)","CEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|9@40|??@48","Db"
+"COMPARE (short BFP)","CEBR R1,R2","45833@0|//@16|R1@24|R2@28|??@32","Db"
+"COMPARE (short HFP)","CE R1,D2(X2,B2)","121@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"COMPARE (short HFP)","CER R1,R2","57@0|R1@8|R2@12|??@16","Da"
+"COMPARE AND BRANCH (32)","CRB R1,R2,M3,D4(B4)","236@0|R1@8|R2@12|B4@16|D4@20|M3@32|//@36|246@40|??@48","B"
+"COMPARE AND BRANCH (64)","CGRB R1,R2,M3,D4(B4)","236@0|R1@8|R2@12|B4@16|D4@20|M3@32|//@36|228@40|??@48","B"
+"COMPARE AND BRANCH RELATIVE (32)","CRJ R1,R2,M3,RI4","236@0|R1@8|R2@12|RI4@16|M3@32|//@36|118@40|??@48","B"
+"COMPARE AND BRANCH RELATIVE (64)","CGRJ R1,R2,M3,RI4","236@0|R1@8|R2@12|RI4@16|M3@32|//@36|100@40|??@48","B"
+"COMPARE AND FORM CODEWORD","CFC D2(B2)","45594@0|B2@16|D2@20|??@32","SP II"
+"COMPARE AND REPLACE DAT TABLE ENTRY","CRDTE R1,R3,R2,M4","47503@0|R3@16|M4@20|R1@24|R2@28|??@32","SP"
+"COMPARE AND SIGNAL (extended BFP)","KXBR R1,R2","45896@0|//@16|R1@24|R2@28|??@32","SP Db"
+"COMPARE AND SIGNAL (extended DFP)","KXTR R1,R2","46056@0|//@16|R1@24|R2@28|??@32","SP Dt"
+"COMPARE AND SIGNAL (long BFP)","KDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|24@40|??@48","Db"
+"COMPARE AND SIGNAL (long BFP)","KDBR R1,R2","45848@0|//@16|R1@24|R2@28|??@32","Db"
+"COMPARE AND SIGNAL (long DFP)","KDTR R1,R2","46048@0|//@16|R1@24|R2@28|??@32","Dt"
+"COMPARE AND SIGNAL (short BFP)","KEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|8@40|??@48","Db"
+"COMPARE AND SIGNAL (short BFP)","KEBR R1,R2","45832@0|//@16|R1@24|R2@28|??@32","Db"
+"COMPARE AND SWAP (32)","CS R1,R3,D2(B2)","186@0|R1@8|R3@12|B2@16|D2@20|??@32","SP"
+"COMPARE AND SWAP (32)","CSY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|20@40|??@48","SP"
+"COMPARE AND SWAP (64)","CSG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|48@40|??@48","SP"
+"COMPARE AND SWAP AND PURGE (32)","CSP R1,R2","45648@0|//@16|R1@24|R2@28|??@32","SP"
+"COMPARE AND SWAP AND PURGE (64)","CSPG R1,R2","47498@0|//@16|R1@24|R2@28|??@32","SP"
+"COMPARE AND SWAP AND STORE","CSST D1(B1),D2(B2),R3","200@0|R3@8|2@12|B1@16|D1@20|B2@32|D2@36|??@48","SP"
+"COMPARE AND TRAP (32)","CRT R1,R2,M3","47474@0|M3@16|//@20|R1@24|R2@28|??@32","B972"
+"COMPARE AND TRAP (64)","CGRT R1,R2,M3","47456@0|M3@16|//@20|R1@24|R2@28|??@32","B960"
+"COMPARE BIASED EXPONENT (extended DFP)","CEXTR R1,R2","46076@0|//@16|R1@24|R2@28|??@32","SP Dt"
+"COMPARE BIASED EXPONENT (long DFP)","CEDTR R1,R2","46068@0|//@16|R1@24|R2@28|??@32","Dt"
+"COMPARE DECIMAL","CP D1(L1,B1),D2(L2,B2)","249@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","Dg"
+"COMPARE DOUBLE AND SWAP (32)","CDS R1,R3,D2(B2)","187@0|R1@8|R3@12|B2@16|D2@20|??@32","SP"
+"COMPARE DOUBLE AND SWAP (32)","CDSY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|49@40|??@48","SP"
+"COMPARE DOUBLE AND SWAP (64)","CDSG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|62@40|??@48","SP"
+"COMPARE HALFWORD (32→16)","CH R1,D2(X2,B2)","73@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"COMPARE HALFWORD (32→16)","CHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|121@40|??@48","B"
+"COMPARE HALFWORD (64←16)","CGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|52@40|??@48","B"
+"COMPARE HALFWORD IMMEDIATE (16→16)","CHHSI D1(B1),I2","58708@0|B1@16|D1@20|I2@32|??@48","B"
+"COMPARE HALFWORD IMMEDIATE (32←16)","CHI R1,I2","167@0|R1@8|14@12|I2@16|??@32","A7E"
+"COMPARE HALFWORD IMMEDIATE (32←16)","CHSI D1(B1),I2","58716@0|B1@16|D1@20|I2@32|??@48","B"
+"COMPARE HALFWORD IMMEDIATE (64←16)","CGHI R1,I2","167@0|R1@8|15@12|I2@16|??@32","A7F"
+"COMPARE HALFWORD IMMEDIATE (64←16)","CGHSI D1(B1),I2","58712@0|B1@16|D1@20|I2@32|??@48","B"
+"COMPAREHALFWORDRELATIVE LONG (32→16)","CHRL R1,RI2","198@0|R1@8|5@12|RI2@16|??@48","C65"
+"COMPAREHALFWORDRELATIVE LONG (64←16)","CGHRL R1,RI2","198@0|R1@8|4@12|RI2@16|??@48","C64"
+"COMPARE HIGH (32)","CHF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|205@40|??@48","B"
+"COMPARE HIGH (32)","CHHR R1,R2","47565@0|//@16|R1@24|R2@28|??@32","B9CD"
+"COMPARE HIGH (32)","CHLR R1,R2","47581@0|//@16|R1@24|R2@28|??@32","B9DD"
+"COMPARE IMMEDIATE (32)","CFI R1,I2","194@0|R1@8|13@12|I2@16|??@48","C2D"
+"COMPARE IMMEDIATE (64←32)","CGFI R1,I2","194@0|R1@8|12@12|I2@16|??@48","C2C"
+"COMPARE IMMEDIATE AND BRANCH (32←8)","CIB R1,I2,M3,D4(B4)","236@0|R1@8|M3@12|B4@16|D4@20|I2@32|254@40|??@48","B"
+"COMPARE IMMEDIATE AND BRANCH (64←8)","CGIB R1,I2,M3,D4(B4)","236@0|R1@8|M3@12|B4@16|D4@20|I2@32|252@40|??@48","B"
+"COMPARE IMMEDIATE AND BRANCH RELATIVE(32→8)","CIJ R1,I2,M3,RI4","236@0|R1@8|M3@12|RI4@16|I2@32|126@40|??@48","B"
+"COMPARE IMMEDIATE AND BRANCH RELATIVE(64→8)","CGIJ R1,I2,M3,RI4","236@0|R1@8|M3@12|RI4@16|I2@32|124@40|??@48","B"
+"COMPARE IMMEDIATE AND TRAP (32→16)","CIT R1,I2,M3","236@0|R1@8|//@12|I2@16|M3@32|//@36|114@40|??@48","EC72"
+"COMPARE IMMEDIATE AND TRAP (64←16)","CGIT R1,I2,M3","236@0|R1@8|//@12|I2@16|M3@32|//@36|112@40|??@48","EC70"
+"COMPARE IMMEDIATE HIGH (32)","CIH R1,I2","204@0|R1@8|13@12|I2@16|??@48","CCD"
+"COMPARE LOGICAL (32)","CL R1,D2(X2,B2)","85@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"COMPARE LOGICAL (32)","CLR R1,R2","21@0|R1@8|R2@12|??@16","15"
+"COMPARE LOGICAL (32)","CLY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|85@40|??@48","B"
+"COMPARE LOGICAL (64)","CLG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|33@40|??@48","B"
+"COMPARE LOGICAL (64)","CLGR R1,R2","47393@0|//@16|R1@24|R2@28|??@32","B921"
+"COMPARE LOGICAL (64→32)","CLGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|49@40|??@48","B"
+"COMPARE LOGICAL (64→32)","CLGFR R1,R2","47409@0|//@16|R1@24|R2@28|??@32","B931"
+"COMPARE LOGICAL (character)","CLC D1(L1,B1),D2(B2)","213@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","B"
+"COMPARE LOGICAL (immediate)","CLI D1(B1),I2","149@0|I2@8|B1@16|D1@20|??@32","B"
+"COMPARE LOGICAL (immediate)","CLIY D1(B1),I2","235@0|I2@8|B1@16|D1@20|85@40|??@48","B"
+"COMPARE LOGICAL AND BRANCH (32)","CLRB R1,R2,M3,D4(B4)","236@0|R1@8|R2@12|B4@16|D4@20|M3@32|//@36|247@40|??@48","B"
+"COMPARE LOGICAL AND BRANCH (64)","CLGRB R1,R2,M3,D4(B4)","236@0|R1@8|R2@12|B4@16|D4@20|M3@32|//@36|229@40|??@48","B"
+"COMPARE LOGICAL AND BRANCH RELATIVE(32)","CLRJ R1,R2,M3,RI4","236@0|R1@8|R2@12|RI4@16|M3@32|//@36|119@40|??@48","B"
+"COMPARE LOGICAL AND BRANCH RELATIVE(64)","CLGRJ R1,R2,M3,RI4","236@0|R1@8|R2@12|RI4@16|M3@32|//@36|101@40|??@48","B"
+"COMPARE LOGICAL AND TRAP (32)","CLRT R1,R2,M3","47475@0|M3@16|//@20|R1@24|R2@28|??@32","B973"
+"COMPARE LOGICAL AND TRAP (32)","CLT R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|35@40|??@48","B"
+"COMPARE LOGICAL AND TRAP (64)","CLGRT R1,R2,M3","47457@0|M3@16|//@20|R1@24|R2@28|??@32","B961"
+"COMPARE LOGICAL AND TRAP (64)","CLGT R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|43@40|??@48","B"
+"COMPARE LOGICAL CHAR. UNDER MASK (high)","CLMH R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|32@40|??@48","B"
+"COMPARE LOGICAL CHAR. UNDER MASK (low)","CLM R1,M3,D2(B2)","189@0|R1@8|M3@12|B2@16|D2@20|??@32","B"
+"COMPARE LOGICAL CHAR. UNDER MASK (low)","CLMY R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|33@40|??@48","B"
+"COMPARE LOGICAL HIGH (32)","CLHF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|207@40|??@48","B"
+"COMPARE LOGICAL HIGH (32)","CLHHR R1,R2","47567@0|//@16|R1@24|R2@28|??@32","B9CF"
+"COMPARE LOGICAL HIGH (32)","CLHLR R1,R2","47583@0|//@16|R1@24|R2@28|??@32","B9DF"
+"COMPARE LOGICAL IMMEDIATE (16←16)","CLHHSI D1(B1),I2","58709@0|B1@16|D1@20|I2@32|??@48","B"
+"COMPARE LOGICAL IMMEDIATE (32)","CLFI R1,I2","194@0|R1@8|15@12|I2@16|??@48","C2F"
+"COMPARE LOGICAL IMMEDIATE (32←16)","CLFHSI D1(B1),I2","58717@0|B1@16|D1@20|I2@32|??@48","B"
+"COMPARE LOGICAL IMMEDIATE (64←16)","CLGHSI D1(B1),I2","58713@0|B1@16|D1@20|I2@32|??@48","B"
+"COMPARE LOGICAL IMMEDIATE (64←32)","CLGFI R1,I2","194@0|R1@8|14@12|I2@16|??@48","C2E"
+"COMPARE LOGICAL IMMEDIATE AND BRANCH(32←8)","CLIB R1,I2,M3,D4(B4)","236@0|R1@8|M3@12|B4@16|D4@20|I2@32|255@40|??@48","B"
+"COMPARE LOGICAL IMMEDIATE AND BRANCH(64→8)","CLGIB R1,I2,M3,D4(B4)","236@0|R1@8|M3@12|B4@16|D4@20|I2@32|253@40|??@48","B"
+"COMPARE LOGICAL IMMEDIATE AND BRANCH","CLIJ R1,I2,M3,RI4","236@0|R1@8|M3@12|RI4@16|I2@32|127@40|??@48","B"
+"RELATIVE (32→8)10COMPARE LOGICAL IMMEDIATE AND BRANCH","CLGIJ R1,I2,M3,RI4","236@0|R1@8|M3@12|RI4@16|I2@32|125@40|??@48","B"
+"RELATIVE (64→8)COMPARE LOGICAL IMMEDIATE AND TRAP(32→16)","CLFIT R1,I2,M3","236@0|R1@8|//@12|I2@16|M3@32|//@36|115@40|??@48","EC73"
+"COMPARE LOGICAL IMMEDIATE AND TRAP(64←16)","CLGIT R1,I2,M3","236@0|R1@8|//@12|I2@16|M3@32|//@36|113@40|??@48","EC71"
+"COMPARE LOGICAL IMMEDIATE HIGH (32)","CLIH R1,I2","204@0|R1@8|15@12|I2@16|??@48","CCF"
+"COMPARE LOGICAL LONG","CLCL R1,R2","15@0|R1@8|R2@12|??@16","SP II"
+"COMPARE LOGICAL LONG EXTENDED","CLCLE R1,R3,D2(B2)","169@0|R1@8|R3@12|B2@16|D2@20|??@32","SP IC"
+"COMPARE LOGICAL LONG UNICODE","CLCLU R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|143@40|??@48","SP IC"
+"COMPARE LOGICAL RELATIVE LONG (32)","CLRL R1,RI2","198@0|R1@8|15@12|RI2@16|??@48","SP"
+"COMPARE LOGICAL RELATIVE LONG (32→16)","CLHRL R1,RI2","198@0|R1@8|7@12|RI2@16|??@48","C67"
+"COMPARE LOGICAL RELATIVE LONG (64)","CLGRL R1,RI2","198@0|R1@8|10@12|RI2@16|??@48","SP"
+"COMPARE LOGICAL RELATIVE LONG (64→16)","CLGHRL R1,RI2","198@0|R1@8|6@12|RI2@16|??@48","C66"
+"COMPARE LOGICAL RELATIVE LONG (64→32)","CLGFRL R1,RI2","198@0|R1@8|14@12|RI2@16|??@48","SP"
+"COMPARE LOGICAL STRING","CLST R1,R2","45661@0|//@16|R1@24|R2@28|??@32","SP IC"
+"COMPARE RELATIVE LONG (32)","CRL R1,RI2","198@0|R1@8|13@12|RI2@16|??@48","SP"
+"COMPARE RELATIVE LONG (64)","CGRL R1,RI2","198@0|R1@8|8@12|RI2@16|??@48","SP"
+"COMPARE RELATIVE LONG (64←32)","CGFRL R1,RI2","198@0|R1@8|12@12|RI2@16|??@48","SP"
+"COMPARE UNTIL SUBSTRING EQUAL","CUSE R1,R2","45655@0|//@16|R1@24|R2@28|??@32","SP II"
+"COMPRESSION CALL","CMPSC R1,R2","45667@0|//@16|R1@24|R2@28|??@32","SP II"
+"COMPUTE DIGITAL SIGNATURE AUTHENTICATION","KDSA R1,R2","47418@0|//@16|R1@24|R2@28|??@32","SP IC"
+"COMPUTE INTERMEDIATE MESSAGE DIGEST","KIMD R1,R2","47422@0|//@16|R1@24|R2@28|??@32","SP IC"
+"COMPUTE LAST MESSAGE DIGEST","KLMD R1,R2","47423@0|//@16|R1@24|R2@28|??@32","SP IC"
+"COMPUTE MESSAGE AUTHENTICATION CODE","KMAC R1,R2","47390@0|//@16|R1@24|R2@28|??@32","SP IC"
+"CONVERT BFP TO HFP (long)","THDR R1,R2","45913@0|//@16|R1@24|R2@28|??@32","Da"
+"CONVERT BFP TO HFP (short to long)","THDER R1,R2","45912@0|//@16|R1@24|R2@28|??@32","Da"
+"CONVERT FROM FIXED (32 to extended BFP)","CXFBR R1,R2","45974@0|//@16|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM FIXED (32 to extended BFP)","CXFBRA R1,M3,R2,M4","45974@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM FIXED (32 to extended DFP)","CXFTR R1,M3,R2,M4","47449@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"CONVERT FROM FIXED (32 to extended HFP)","CXFR R1,R2","46006@0|//@16|R1@24|R2@28|??@32","SP Da"
+"CONVERT FROM FIXED (32 to long BFP)","CDFBR R1,R2","45973@0|//@16|R1@24|R2@28|??@32","Db"
+"CONVERT FROM FIXED (32 to long BFP)","CDFBRA R1,M3,R2,M4","45973@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM FIXED (32 to long DFP)","CDFTR R1,M3,R2,M4","47441@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"CONVERT FROM FIXED (32 to long HFP)","CDFR R1,R2","46005@0|//@16|R1@24|R2@28|??@32","Da"
+"CONVERT FROM FIXED (32 to short BFP)","CEFBR R1,R2","45972@0|//@16|R1@24|R2@28|??@32","Db"
+"CONVERT FROM FIXED (32 to short BFP)","CEFBRA R1,M3,R2,M4","45972@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM FIXED (32 to short HFP)","CEFR R1,R2","46004@0|//@16|R1@24|R2@28|??@32","Da"
+"CONVERT FROM FIXED (64 to extended BFP)","CXGBR R1,R2","45990@0|//@16|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM FIXED (64 to extended BFP)","CXGBRA R1,M3,R2,M4","45990@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM FIXED (64 to extended DFP)","CXGTR R1,R2","46073@0|//@16|R1@24|R2@28|??@32","SP Dt"
+"CONVERT FROM FIXED (64 to extended DFP)","CXGTRA R1,M3,R2,M4","46073@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"CONVERT FROM FIXED (64 to extended HFP)","CXGR R1,R2","46022@0|//@16|R1@24|R2@28|??@32","SP Da"
+"CONVERT FROM FIXED (64 to long BFP)","CDGBR R1,R2","45989@0|//@16|R1@24|R2@28|??@32","Db"
+"CONVERT FROM FIXED (64 to long BFP)","CDGBRA R1,M3,R2,M4","45989@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM FIXED (64 to long DFP)","CDGTR R1,R2","46065@0|//@16|R1@24|R2@28|??@32","Dt"
+"CONVERT FROM FIXED (64 to long DFP)","CDGTRA R1,M3,R2,M4","46065@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"CONVERT FROM FIXED (64 to long HFP)","CDGR R1,R2","46021@0|//@16|R1@24|R2@28|??@32","Da"
+"CONVERT FROM FIXED (64 to short BFP)","CEGBR R1,R2","45988@0|//@16|R1@24|R2@28|??@32","Db"
+"CONVERT FROM FIXED (64 to short BFP)","CEGBRA R1,M3,R2,M4","45988@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM FIXED (64 to short HFP)","CEGR R1,R2","46020@0|//@16|R1@24|R2@28|??@32","Da"
+"CONVERT FROM LOGICAL (32 to extended BFP)","CXLFBR R1,M3,R2,M4","45970@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM LOGICAL (32 to extended DFP)","CXLFTR R1,M3,R2,M4","47451@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"CONVERT FROM LOGICAL (32 to long BFP)","CDLFBR R1,M3,R2,M4","45969@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM LOGICAL (32 to long DFP)","CDLFTR R1,M3,R2,M4","47443@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"CONVERT FROM LOGICAL (32 to short BFP)","CELFBR R1,M3,R2,M4","45968@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM LOGICAL (64 to extended BFP)","CXLGBR R1,M3,R2,M4","45986@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM LOGICAL (64 to extended DFP)","CXLGTR R1,M3,R2,M4","47450@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"CONVERT FROM LOGICAL (64 to long BFP)","CDLGBR R1,M3,R2,M4","45985@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM LOGICAL (64 to long DFP)","CDLGTR R1,M3,R2,M4","47442@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"CONVERT FROM LOGICAL (64 to short BFP)","CELGBR R1,M3,R2,M4","45984@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT FROM PACKED (to extended DFP)","CXPT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|175@40|??@48","SP Dt"
+"CONVERT FROM PACKED (to long DFP)","CDPT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|174@40|??@48","SP Dt"
+"CONVERT FROM SIGNED PACKED (128 to extended DFP)","CXSTR R1,R2","46075@0|//@16|R1@24|R2@28|??@32","SP Dt"
+"CONVERT FROM SIGNED PACKED (64 to long DFP)","CDSTR R1,R2","46067@0|//@16|R1@24|R2@28|??@32","Dt"
+"CONVERT FROM UNSIGNED PACKED (128 to ext. DFP)","CXUTR R1,R2","46074@0|//@16|R1@24|R2@28|??@32","SP Dt"
+"CONVERT FROM UNSIGNED PACKED (64 to long DFP)","CDUTR R1,R2","46066@0|//@16|R1@24|R2@28|??@32","Dt"
+"CONVERT FROM ZONED (to extended DFP)","CXZT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|171@40|??@48","SP Dt"
+"CONVERT FROM ZONED (to long DFP)","CDZT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|170@40|??@48","SP Dt"
+"CONVERT HFP TO BFP (long to short)","TBEDR R1,M3,R2","45904@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da"
+"CONVERT HFP TO BFP (long)","TBDR R1,M3,R2","45905@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da"
+"CONVERT TO BINARY (32)","CVB R1,D2(X2,B2)","79@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"CONVERT TO BINARY (32)","CVBY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|6@40|??@48","B"
+"CONVERT TO BINARY (64)","CVBG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|14@40|??@48","B"
+"CONVERT TO DECIMAL (32)","CVD R1,D2(X2,B2)","78@0|R1@8|X2@12|B2@16|D2@20|??@32","ST"
+"CONVERT TO DECIMAL (32)","CVDY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|38@40|??@48","ST"
+"CONVERT TO DECIMAL (64)","CVDG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|46@40|??@48","ST"
+"CONVERT TO FIXED (extended BFP to 32)","CFXBR R1,M3,R2","45978@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (extended BFP to 32)","CFXBRA R1,M3,R2,M4","45978@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (extended BFP to 64)","CGXBR R1,M3,R2","45994@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (extended BFP to 64)","CGXBRA R1,M3,R2,M4","45994@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (extended DFP to 32)","CFXTR R1,M3,R2,M4","47433@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"CONVERT TO FIXED (extended DFP to 64)","CGXTR R1,M3,R2","46057@0|M3@16|//@20|R1@24|R2@28|??@32","SP Dt"
+"CONVERT TO FIXED (extended DFP to 64)","CGXTRA R1,M3,R2,M4","46057@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"CONVERT TO FIXED (extended HFP to 32)","CFXR R1,M3,R2","46010@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da"
+"CONVERT TO FIXED (extended HFP to 64)","CGXR R1,M3,R2","46026@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da"
+"CONVERT TO FIXED (long BFP to 32)","CFDBR R1,M3,R2","45977@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (long BFP to 32)","CFDBRA R1,M3,R2,M4","45977@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (long BFP to 64)","CGDBR R1,M3,R2","45993@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (long BFP to 64)","CGDBRA R1,M3,R2,M4","45993@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (long DFP to 32)","CFDTR R1,M3,R2,M4","47425@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"CONVERT TO FIXED (long DFP to 64)","CGDTR R1,M3,R2","46049@0|M3@16|//@20|R1@24|R2@28|??@32","Dt"
+"CONVERT TO FIXED (long DFP to 64)","CGDTRA R1,M3,R2,M4","46049@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"CONVERT TO FIXED (long HFP to 32)","CFDR R1,M3,R2","46009@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da"
+"CONVERT TO FIXED (long HFP to 64)","CGDR R1,M3,R2","46025@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da"
+"CONVERT TO FIXED (short BFP to 32)","CFEBR R1,M3,R2","45976@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (short BFP to 32)","CFEBRA R1,M3,R2,M4","45976@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (short BFP to 64)","CGEBR R1,M3,R2","45992@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (short BFP to 64)","CGEBRA R1,M3,R2,M4","45992@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO FIXED (short HFP to 32)","CFER R1,M3,R2","46008@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da"
+"CONVERT TO FIXED (short HFP to 64)","CGER R1,M3,R2","46024@0|M3@16|//@20|R1@24|R2@28|??@32","SP Da"
+"CONVERT TO LOGICAL (extended BFP to 32)","CLFXBR R1,M3,R2,M4","45982@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO LOGICAL (extended BFP to 64)","CLGXBR R1,M3,R2,M4","45998@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO LOGICAL (extended DFP to 32)","CLFXTR R1,M3,R2,M4","47435@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"CONVERT TO LOGICAL (extended DFP to 64)","CLGXTR R1,M3,R2,M4","47434@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"CONVERT TO LOGICAL (long BFP to 32)","CLFDBR R1,M3,R2,M4","45981@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO LOGICAL (long BFP to 64)","CLGDBR R1,M3,R2,M4","45997@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO LOGICAL (long DFP to 32)","CLFDTR R1,M3,R2,M4","47427@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"CONVERT TO LOGICAL (long DFP to 64)","CLGDTR R1,M3,R2,M4","47426@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"CONVERT TO LOGICAL (short BFP to 32)","CLFEBR R1,M3,R2,M4","45980@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO LOGICAL (short BFP to 64)","CLGEBR R1,M3,R2,M4","45996@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"CONVERT TO PACKED (from extended DFP)","CPXT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|173@40|??@48","SP Dt"
+"CONVERT TO PACKED (from long DFP)","CPDT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|172@40|??@48","SP Dt"
+"CONVERT TO SIGNED PACKED (extended DFP to 128)","CSXTR R1,R2,M4","46059@0|//@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"CONVERT TO SIGNED PACKED (long DFP to 64)","CSDTR R1,R2,M4","46051@0|//@16|M4@20|R1@24|R2@28|??@32","Dt"
+"CONVERTTOUNSIGNEDPACKED(extendedDFP to 128)","CUXTR R1,R2","46058@0|//@16|R1@24|R2@28|??@32","SP Dt"
+"CONVERT TO UNSIGNED PACKED (long DFP to 64)","CUDTR R1,R2","46050@0|//@16|R1@24|R2@28|??@32","Dt"
+"CONVERT TO ZONED (from extended DFP)","CZXT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|169@40|??@48","SP"
+"CONVERT TO ZONED (from long DFP)","CZDT R1,D2(L2,B2),M3","237@0|L2@8|B2@16|D2@20|R1@32|M3@36|168@40|??@48","SP"
+"CONVERT UNICODE TO UTF-8","CUUTF R1,R2,M3","45734@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"CONVERT UTF-16 TO UTF-32","CU24 R1,R2,M3","47537@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"CONVERT UTF-16 TO UTF-8","CU21 R1,R2,M3","45734@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"CONVERT UTF-8 TO UNICODE","CUTFU R1,R2,M3","45735@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"CONVERT UTF-8 TO UTF-16","CU12 R1,R2,M3","45735@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"CONVERT UTF-8 TO UTF-32","CU14 R1,R2,M3","47536@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"CONVERT UTF-32 TO UTF-16","CU42 R1,R2","47539@0|//@16|R1@24|R2@28|??@32","SP IC"
+"CONVERT UTF-32 TO UTF-8","CU41 R1,R2","47538@0|//@16|R1@24|R2@28|??@32","SP IC"
+"COPY ACCESS","CPYA R1,R2","45645@0|//@16|R1@24|R2@28|??@32","U"
+"COPY SIGN (long)","CPSDR R1,R3,R2","45938@0|R3@16|//@20|R1@24|R2@28|??@32","Da"
+"DECIMAL SCALE AND CONVERT AND SPLIT TO HFP","VSCSHP V1,V2,V3","230@0|V1@8|V2@12|V3@16|//@20|RXB@36|124@40|??@48","Dv"
+"DECIMAL SCALE AND CONVERT TO HFP","VSCHP V1,V2,V3,M4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|116@40|??@48","SP Dv"
+"DEFLATE CONVERSION CALL","DFLTCC R1,R2,R3","47417@0|R3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"DIVIDE (32→64)","D R1,D2(X2,B2)","93@0|R1@8|X2@12|B2@16|D2@20|??@32","SP"
+"DIVIDE (32←64)","DR R1,R2","29@0|R1@8|R2@12|??@16","SP"
+"DIVIDE (extended BFP)","DXBR R1,R2","45901@0|//@16|R1@24|R2@28|??@32","SP Db"
+"DIVIDE (extended DFP)","DXTR R1,R2,R3","46041@0|R3@16|//@20|R1@24|R2@28|??@32","SP Dt"
+"DIVIDE (extended DFP)","DXTRA R1,R2,R3,M4","46041@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"DIVIDE (extended HFP)","DXR R1,R2","45613@0|//@16|R1@24|R2@28|??@32","SP Da"
+"DIVIDE (long BFP)","DDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|29@40|??@48","Db"
+"DIVIDE (long BFP)","DDBR R1,R2","45853@0|//@16|R1@24|R2@28|??@32","Db"
+"DIVIDE (long DFP)","DDTR R1,R2,R3","46033@0|R3@16|//@20|R1@24|R2@28|??@32","Dt"
+"DIVIDE (long DFP)","DDTRA R1,R2,R3,M4","46033@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"DIVIDE (long HFP)","DD R1,D2(X2,B2)","109@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"DIVIDE (long HFP)","DDR R1,R2","45@0|R1@8|R2@12|??@16","Da"
+"DIVIDE (short BFP)","DEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|13@40|??@48","Db"
+"DIVIDE (short BFP)","DEBR R1,R2","45837@0|//@16|R1@24|R2@28|??@32","Db"
+"DIVIDE (short HFP)","DE R1,D2(X2,B2)","125@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"DIVIDE (short HFP)","DER R1,R2","61@0|R1@8|R2@12|??@16","Da"
+"DIVIDE DECIMAL","DP D1(L1,B1),D2(L2,B2)","253@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","SP Dg"
+"DIVIDE LOGICAL (32→64)","DL R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|151@40|??@48","SP"
+"DIVIDE LOGICAL (32←64)","DLR R1,R2","47511@0|//@16|R1@24|R2@28|??@32","SP"
+"DIVIDE LOGICAL (64←128)","DLG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|135@40|??@48","SP"
+"DIVIDE LOGICAL (64→128)","DLGR R1,R2","47495@0|//@16|R1@24|R2@28|??@32","SP"
+"DIVIDE SINGLE (64)","DSG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|13@40|??@48","SP"
+"DIVIDE SINGLE (64)","DSGR R1,R2","47373@0|//@16|R1@24|R2@28|??@32","SP"
+"DIVIDE SINGLE (64←32)","DSGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|29@40|??@48","SP"
+"DIVIDE SINGLE (64→32)","DSGFR R1,R2","47389@0|//@16|R1@24|R2@28|??@32","SP"
+"DIVIDE TO INTEGER (long BFP)","DIDBR R1,R3,R2,M4","45915@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"DIVIDE TO INTEGER (short BFP)","DIEBR R1,R3,R2,M4","45907@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"EDIT","ED D1(L1,B1),D2(B2)","222@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","Dg"
+"EDIT AND MARK","EDMK D1(L1,B1),D2(B2)","223@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","Dg"
+"EXCLUSIVE OR (32)","X R1,D2(X2,B2)","87@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"EXCLUSIVE OR (32)","XR R1,R2","23@0|R1@8|R2@12|??@16","17"
+"EXCLUSIVE OR (32)","XRK R1,R2,R3","47607@0|R3@16|//@20|R1@24|R2@28|??@32","B9F7"
+"EXCLUSIVE OR (32)","XY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|87@40|??@48","B"
+"EXCLUSIVE OR (64)","XG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|130@40|??@48","B"
+"EXCLUSIVE OR (64)","XGR R1,R2","47490@0|//@16|R1@24|R2@28|??@32","B982"
+"EXCLUSIVE OR (64)","XGRK R1,R2,R3","47591@0|R3@16|//@20|R1@24|R2@28|??@32","B9E7"
+"EXCLUSIVE OR (character)","XC D1(L1,B1),D2(B2)","215@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"EXCLUSIVE OR (immediate)","XI D1(B1),I2","151@0|I2@8|B1@16|D1@20|??@32","ST"
+"EXCLUSIVE OR (immediate)","XIY D1(B1),I2","235@0|I2@8|B1@16|D1@20|87@40|??@48","ST"
+"EXCLUSIVE OR IMMEDIATE (high)","XIHF R1,I2","192@0|R1@8|6@12|I2@16|??@48","C06"
+"EXCLUSIVE OR IMMEDIATE (low)","XILF R1,I2","192@0|R1@8|7@12|I2@16|??@48","C07"
+"EXECUTE","EX R1,D2(X2,B2)","68@0|R1@8|X2@12|B2@16|D2@20|??@32","SP"
+"EXECUTE RELATIVE LONG","EXRL R1,RI2","198@0|R1@8|0@12|RI2@16|??@48","C60"
+"EXTRACT ACCESS","EAR R1,R2","45647@0|//@16|R1@24|R2@28|??@32","U"
+"EXTRACT AND SET EXTENDED AUTHORITY","ESEA R1","47517@0|//@16|R1@24|//@28|??@32","B99D"
+"EXTRACT BIASED EXPONENT (extended DFP to 64)","EEXTR R1,R2","46061@0|//@16|R1@24|R2@28|??@32","SP Dt"
+"EXTRACT BIASED EXPONENT (long DFP to 64)","EEDTR R1,R2","46053@0|//@16|R1@24|R2@28|??@32","Dt"
+"EXTRACT CPU ATTRIBUTE","ECAG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|76@40|??@48","EB4C"
+"EXTRACT CPU TIME","ECTG D1(B1),D2(B2),R3","200@0|R3@8|1@12|B1@16|D1@20|B2@32|D2@36|??@48","R"
+"EXTRACT FPC","EFPC R1","45964@0|//@16|R1@24|//@28|??@32","Db"
+"EXTRACT PRIMARY ASN","EPAR R1","45606@0|//@16|R1@24|//@28|??@32","SO"
+"EXTRACT PRIMARY ASN AND INSTANCE","EPAIR R1","47514@0|//@16|R1@24|//@28|??@32","SO"
+"EXTRACT PSW","EPSW R1,R2","47501@0|//@16|R1@24|R2@28|??@32","B98D"
+"EXTRACT SECONDARY ASN","ESAR R1","45607@0|//@16|R1@24|//@28|??@32","SO"
+"EXTRACT SECONDARY ASN AND INSTANCE","ESAIR R1","47515@0|//@16|R1@24|//@28|??@32","SO"
+"EXTRACT SIGNIFICANCE (extended DFP to 64)","ESXTR R1,R2","46063@0|//@16|R1@24|R2@28|??@32","SP Dt"
+"EXTRACT SIGNIFICANCE (long DFP to 64)","ESDTR R1,R2","46055@0|//@16|R1@24|R2@28|??@32","Dt"
+"EXTRACT STACKED REGISTERS (32)","EREG R1,R2","45641@0|//@16|R1@24|R2@28|??@32","SE"
+"EXTRACT STACKED REGISTERS (64)","EREGG R1,R2","47374@0|//@16|R1@24|R2@28|??@32","SE"
+"EXTRACT STACKED STATE","ESTA R1,R2","45642@0|//@16|R1@24|R2@28|??@32","B24A"
+"EXTRACT TRANSACTION NESTING DEPTH","ETND R1","45804@0|//@16|R1@24|//@28|??@32","SO"
+"FIND LEFTMOST ONE","FLOGR R1,R2","47491@0|//@16|R1@24|R2@28|??@32","SP"
+"HALT SUBCHANNEL","HSCH","45617@0|//@16|??@32","OP"
+"HALVE (long HFP)","HDR R1,R2","36@0|R1@8|R2@12|??@16","Da"
+"HALVE (short HFP)","HER R1,R2","52@0|R1@8|R2@12|??@16","Da"
+"INSERT ADDRESS SPACE CONTROL","IAC R1","45604@0|//@16|R1@24|//@28|??@32","SO"
+"INSERT BIASED EXPONENT (64 to extended DFP)","IEXTR R1,R3,R2","46078@0|R3@16|//@20|R1@24|R2@28|??@32","SP Dt"
+"INSERT BIASED EXPONENT (64 to long DFP)","IEDTR R1,R3,R2","46070@0|R3@16|//@20|R1@24|R2@28|??@32","Dt"
+"INSERT CHARACTER","IC R1,D2(X2,B2)","67@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"INSERT CHARACTER","ICY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|115@40|??@48","B"
+"INSERT CHARACTERS UNDER MASK (high)","ICMH R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|128@40|??@48","B"
+"INSERT CHARACTERS UNDER MASK (low)","ICM R1,M3,D2(B2)","191@0|R1@8|M3@12|B2@16|D2@20|??@32","B"
+"INSERT CHARACTERS UNDER MASK (low)","ICMY R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|129@40|??@48","B"
+"INSERT IMMEDIATE (high high)","IIHH R1,I2","165@0|R1@8|0@12|I2@16|??@32","A50"
+"INSERT IMMEDIATE (high low)","IIHL R1,I2","165@0|R1@8|1@12|I2@16|??@32","A51"
+"INSERT IMMEDIATE (high)","IIHF R1,I2","192@0|R1@8|8@12|I2@16|??@48","C08"
+"INSERT IMMEDIATE (low high)","IILH R1,I2","165@0|R1@8|2@12|I2@16|??@32","A52"
+"INSERT IMMEDIATE (low low)","IILL R1,I2","165@0|R1@8|3@12|I2@16|??@32","A53"
+"INSERT IMMEDIATE (low)","IILF R1,I2","192@0|R1@8|9@12|I2@16|??@48","C09"
+"INSERT PROGRAM MASK","IPM R1","45602@0|//@16|R1@24|//@28|??@32","B222"
+"INSERT PSW KEY","IPK","45579@0|//@16|??@32","G2"
+"INSERT REFERENCE BITS MULTIPLE","IRBM R1,R2","47532@0|//@16|R1@24|R2@28|??@32","B9AC"
+"INSERT STORAGE KEY EXTENDED","ISKE R1,R2","45609@0|//@16|R1@24|R2@28|??@32","SO"
+"INSERT VIRTUAL STORAGE KEY","IVSK R1,R2","45603@0|//@16|R1@24|R2@28|??@32","SO"
+"INVALIDATE DAT TABLE ENTRY","IDTE R1,R3,R2,M4","47502@0|R3@16|M4@20|R1@24|R2@28|??@32","SP"
+"INVALIDATE PAGE TABLE ENTRY","IPTE R1,R2,R3,M4","45601@0|R3@16|M4@20|R1@24|R2@28|??@32","SP II"
+"LOAD (32)","L R1,D2(X2,B2)","88@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"LOAD (32)","LR R1,R2","24@0|R1@8|R2@12|??@16","18"
+"LOAD (32)","LY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|88@40|??@48","B"
+"LOAD (64)","LG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|4@40|??@48","B"
+"LOAD (64)","LGR R1,R2","47364@0|//@16|R1@24|R2@28|??@32","B904"
+"LOAD (64←32)","LGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|20@40|??@48","B"
+"LOAD (64←32)","LGFR R1,R2","47380@0|//@16|R1@24|R2@28|??@32","B914"
+"LOAD (extended)","LXR R1,R2","45925@0|//@16|R1@24|R2@28|??@32","SP Da"
+"LOAD (long)","LD R1,D2(X2,B2)","104@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"LOAD (long)","LDR R1,R2","40@0|R1@8|R2@12|??@16","Da"
+"LOAD (long)","LDY R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|101@40|??@48","Da"
+"LOAD (short)","LE R1,D2(X2,B2)","120@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"LOAD (short)","LER R1,R2","56@0|R1@8|R2@12|??@16","Da"
+"LOAD (short)","LEY R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|100@40|??@48","Da"
+"LOAD ACCESS MULTIPLE 7-268","LAM R1,R3,D2(B2)","154@0|R1@8|R3@12|B2@16|D2@20|??@32","SP"
+"LOAD ACCESS MULTIPLE 7-268","LAMY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|154@40|??@48","SP"
+"LOAD ADDRESS","LA R1,D2(X2,B2)","65@0|R1@8|X2@12|B2@16|D2@20|??@32","41"
+"LOAD ADDRESS","LAY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|113@40|??@48","E371"
+"LOAD ADDRESS EXTENDED","LAE R1,D2(X2,B2)","81@0|R1@8|X2@12|B2@16|D2@20|??@32","U"
+"LOAD ADDRESS EXTENDED","LAEY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|117@40|??@48","U"
+"LOAD ADDRESS RELATIVE LONG","LARL R1,RI2","192@0|R1@8|0@12|RI2@16|??@48","C00"
+"LOAD ADDRESS SPACE PARAMETERS","LASP D1(B1),D2(B2)","58624@0|B1@16|D1@20|B2@32|D2@36|??@48","SP SO"
+"LOAD AND ADD (32)","LAA R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|248@40|??@48","SP"
+"LOAD AND ADD (64)","LAAG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|232@40|??@48","SP"
+"LOAD AND ADD LOGICAL (32)","LAAL R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|250@40|??@48","SP"
+"LOAD AND ADD LOGICAL (64)","LAALG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|234@40|??@48","SP"
+"LOAD AND AND (32)","LAN R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|244@40|??@48","SP"
+"LOAD AND AND (64)","LANG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|228@40|??@48","SP"
+"LOAD AND EXCLUSIVE OR (32)","LAX R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|247@40|??@48","SP"
+"LOAD AND EXCLUSIVE OR (64)","LAXG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|231@40|??@48","SP"
+"LOAD AND OR (32)","LAO R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|246@40|??@48","SP"
+"LOAD AND OR (64)","LAOG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|230@40|??@48","SP"
+"LOAD AND TEST (32)","LT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|18@40|??@48","B"
+"LOAD AND TEST (32)","LTR R1,R2","18@0|R1@8|R2@12|??@16","12"
+"LOAD AND TEST (64)","LTG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|2@40|??@48","B"
+"LOAD AND TEST (64)","LTGR R1,R2","47362@0|//@16|R1@24|R2@28|??@32","B902"
+"LOAD AND TEST (64→32)","LTGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|50@40|??@48","B"
+"LOAD AND TEST (64→32)","LTGFR R1,R2","47378@0|//@16|R1@24|R2@28|??@32","B912"
+"LOAD AND TEST (extended BFP)","LTXBR R1,R2","45890@0|//@16|R1@24|R2@28|??@32","SP Db"
+"LOAD AND TEST (extended DFP)","LTXTR R1,R2","46046@0|//@16|R1@24|R2@28|??@32","SP Dt"
+"LOAD AND TEST (extended HFP)","LTXR R1,R2","45922@0|//@16|R1@24|R2@28|??@32","SP Da"
+"LOAD AND TEST (long BFP)","LTDBR R1,R2","45842@0|//@16|R1@24|R2@28|??@32","Db"
+"LOAD AND TEST (long DFP)","LTDTR R1,R2","46038@0|//@16|R1@24|R2@28|??@32","Dt"
+"LOAD AND TEST (long HFP)","LTDR R1,R2","34@0|R1@8|R2@12|??@16","Da"
+"LOAD AND TEST (short BFP)","LTEBR R1,R2","45826@0|//@16|R1@24|R2@28|??@32","Db"
+"LOAD AND TEST (short HFP)","LTER R1,R2","50@0|R1@8|R2@12|??@16","Da"
+"LOAD AND TRAP (32L→32)","LAT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|159@40|??@48","B"
+"LOAD AND TRAP (64)","LGAT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|133@40|??@48","B"
+"LOAD AND ZERO RIGHTMOST BYTE (32)","LZRF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|59@40|??@48","B"
+"LOAD AND ZERO RIGHTMOST BYTE (64)","LZRG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|42@40|??@48","B"
+"LOAD BEAR","LBEAR D2(B2)","45568@0|B2@16|D2@20|??@32","SP"
+"LOAD BYTE (32→8)","LB R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|118@40|??@48","E376"
+"LOAD BYTE (32←8)","LBR R1,R2","47398@0|//@16|R1@24|R2@28|??@32","B926"
+"LOAD BYTE (64→8)","LGB R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|119@40|??@48","E377"
+"LOAD BYTE (64←8)","LGBR R1,R2","47366@0|//@16|R1@24|R2@28|??@32","B906"
+"LOAD BYTE HIGH (32←8)","LBH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|192@40|??@48","B"
+"LOAD COMPLEMENT (32)","LCR R1,R2","19@0|R1@8|R2@12|??@16","13"
+"LOAD COMPLEMENT (64)","LCGR R1,R2","47363@0|//@16|R1@24|R2@28|??@32","B903"
+"LOAD COMPLEMENT (64←32)","LCGFR R1,R2","47379@0|//@16|R1@24|R2@28|??@32","B913"
+"LOAD COMPLEMENT (extended BFP)","LCXBR R1,R2","45891@0|//@16|R1@24|R2@28|??@32","SP Db"
+"LOAD COMPLEMENT (extended HFP)","LCXR R1,R2","45923@0|//@16|R1@24|R2@28|??@32","SP Da"
+"LOAD COMPLEMENT (long BFP)","LCDBR R1,R2","45843@0|//@16|R1@24|R2@28|??@32","Db"
+"LOAD COMPLEMENT (long HFP)","LCDR R1,R2","35@0|R1@8|R2@12|??@16","Da"
+"LOAD COMPLEMENT (long)","LCDFR R1,R2","45939@0|//@16|R1@24|R2@28|??@32","Da"
+"LOAD COMPLEMENT (short BFP)","LCEBR R1,R2","45827@0|//@16|R1@24|R2@28|??@32","Db"
+"LOAD COMPLEMENT (short HFP)","LCER R1,R2","51@0|R1@8|R2@12|??@16","Da"
+"LOAD CONTROL (32)","LCTL R1,R3,D2(B2)","183@0|R1@8|R3@12|B2@16|D2@20|??@32","SP"
+"LOAD CONTROL (64)","LCTLG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|47@40|??@48","SP"
+"LOAD COUNT TO BLOCK BOUNDARY","LCBB R1,D2(X2,B2),M3","231@0|R1@8|X2@12|B2@16|D2@20|M3@32|//@36|39@40|??@48","SP"
+"LOAD FP INTEGER (extended BFP)","FIXBR R1,M3,R2","45895@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db"
+"LOAD FP INTEGER (extended BFP)","FIXBRA R1,M3,R2,M4","45895@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"LOAD FP INTEGER (extended DFP)","FIXTR R1,M3,R2,M4","46047@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"LOAD FP INTEGER (extended HFP)","FIXR R1,R2","45927@0|//@16|R1@24|R2@28|??@32","SP Da"
+"LOAD FP INTEGER (long BFP)","FIDBR R1,M3,R2","45919@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db"
+"LOAD FP INTEGER (long BFP)","FIDBRA R1,M3,R2,M4","45919@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"LOAD FP INTEGER (long DFP)","FIDTR R1,M3,R2,M4","46039@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"LOAD FP INTEGER (long HFP)","FIDR R1,R2","45951@0|//@16|R1@24|R2@28|??@32","Da"
+"LOAD FP INTEGER (short BFP)","FIEBR R1,M3,R2","45911@0|M3@16|//@20|R1@24|R2@28|??@32","SP Db"
+"LOAD FP INTEGER (short BFP)","FIEBRA R1,M3,R2,M4","45911@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"LOAD FP INTEGER (short HFP)","FIER R1,R2","45943@0|//@16|R1@24|R2@28|??@32","Da"
+"LOAD FPC","LFPC D2(B2)","45725@0|B2@16|D2@20|??@32","SP Db"
+"LOAD FPC AND SIGNAL","LFAS D2(B2)","45757@0|B2@16|D2@20|??@32","SP Dt"
+"LOAD FPR FROM GR (64 to long)","LDGR R1,R2","46017@0|//@16|R1@24|R2@28|??@32","Da"
+"LOAD GR FROM FPR (long to 64)","LGDR R1,R2","46029@0|//@16|R1@24|R2@28|??@32","Da"
+"LOAD GUARDED (64)","LGG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|76@40|??@48","SP"
+"LOAD GUARDED STORAGE CONTROLS","LGSC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|77@40|??@48","SO"
+"LOAD HALFWORD (32→16)","LH R1,D2(X2,B2)","72@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"LOAD HALFWORD (32←16)","LHR R1,R2","47399@0|//@16|R1@24|R2@28|??@32","B927"
+"LOAD HALFWORD (32←16)","LHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|120@40|??@48","B"
+"LOAD HALFWORD (64←16)","LGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|21@40|??@48","B"
+"LOAD HALFWORD (64←16)","LGHR R1,R2","47367@0|//@16|R1@24|R2@28|??@32","B907"
+"LOAD HALFWORD HIGH (32→16)","LHH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|196@40|??@48","B"
+"LOAD HALFWORD HIGH IMMEDIATE ON","LOCHHI R1,I2,M3","236@0|R1@8|M3@12|I2@16|//@32|78@40|??@48","EC4E"
+"CONDITION (32←16)LOAD HALFWORD IMMEDIATE (32)←16","LHI R1,I2","167@0|R1@8|8@12|I2@16|??@32","A78"
+"LOAD HALFWORD IMMEDIATE (64→16)","LGHI R1,I2","167@0|R1@8|9@12|I2@16|??@32","A79"
+"LOAD HALFWORD IMMEDIATE ON CONDITION(32←16)","LOCHI R1,I2,M3","236@0|R1@8|M3@12|I2@16|//@32|66@40|??@48","EC42"
+"LOAD HALFWORD IMMEDIATE ON CONDITION(64→16)","LOCGHI R1,I2,M3","236@0|R1@8|M3@12|I2@16|//@32|70@40|??@48","EC46"
+"LOAD HALFWORD RELATIVE LONG (32←16)","LHRL R1,RI2","196@0|R1@8|5@12|RI2@16|??@48","C45"
+"LOAD HALFWORD RELATIVE LONG (64←16)","LGHRL R1,RI2","196@0|R1@8|4@12|RI2@16|??@48","C44"
+"LOAD HIGH (32)","LFH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|202@40|??@48","B"
+"LOAD HIGH AND TRAP (32H←32)","LFHAT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|200@40|??@48","B"
+"LOAD HIGH ON CONDITION (32)","LOCFH R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|224@40|??@48","B"
+"LOAD HIGH ON CONDITION (32)","LOCFHR R1,R2,M3","47584@0|M3@16|//@20|R1@24|R2@28|??@32","B9E0"
+"LOAD IMMEDIATE (64→32)","LGFI R1,I2","192@0|R1@8|1@12|I2@16|??@48","C01"
+"LOAD LENGTHENED (long to extended BFP)","LXDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|5@40|??@48","SP Db"
+"LOAD LENGTHENED (long to extended BFP)","LXDBR R1,R2","45829@0|//@16|R1@24|R2@28|??@32","SP Db"
+"LOAD LENGTHENED (long to extended DFP)","LXDTR R1,R2,M4","46044@0|//@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"LOAD LENGTHENED (long to extended HFP)","LXD R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|37@40|??@48","SP Da"
+"LOAD LENGTHENED (long to extended HFP)","LXDR R1,R2","45861@0|//@16|R1@24|R2@28|??@32","SP Da"
+"LOAD LENGTHENED (short to extended BFP)","LXEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|6@40|??@48","SP Db"
+"LOAD LENGTHENED (short to extended BFP)","LXEBR R1,R2","45830@0|//@16|R1@24|R2@28|??@32","SP Db"
+"LOAD LENGTHENED (short to extended HFP)","LXE R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|38@40|??@48","SP Da"
+"LOAD LENGTHENED (short to extended HFP)","LXER R1,R2","45862@0|//@16|R1@24|R2@28|??@32","SP Da"
+"LOAD LENGTHENED (short to long BFP)","LDEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|4@40|??@48","Db"
+"LOAD LENGTHENED (short to long BFP)","LDEBR R1,R2","45828@0|//@16|R1@24|R2@28|??@32","Db"
+"LOAD LENGTHENED (short to long DFP)","LDETR R1,R2,M4","46036@0|//@16|M4@20|R1@24|R2@28|??@32","Dt"
+"LOAD LENGTHENED (short to long HFP)","LDE R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|36@40|??@48","Da"
+"LOAD LENGTHENED (short to long HFP)","LDER R1,R2","45860@0|//@16|R1@24|R2@28|??@32","Da"
+"LOAD LOGICAL (64←32)","LLGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|22@40|??@48","B"
+"LOAD LOGICAL (64←32)","LLGFR R1,R2","47382@0|//@16|R1@24|R2@28|??@32","B916"
+"LOAD LOGICAL AND SHIFT GUARDED (64←32)","LLGFSG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|72@40|??@48","SP"
+"LOAD LOGICAL AND TRAP (64→32)","LLGFAT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|157@40|??@48","B"
+"LOAD LOGICAL AND ZERO RIGHTMOST BYTE(64→32)","LLZRGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|58@40|??@48","B"
+"LOAD LOGICAL CHARACTER (32→8)","LLC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|148@40|??@48","B"
+"LOAD LOGICAL CHARACTER (32←8)","LLCR R1,R2","47508@0|//@16|R1@24|R2@28|??@32","B994"
+"LOAD LOGICAL CHARACTER (64←8)","LLGC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|144@40|??@48","B"
+"LOAD LOGICAL CHARACTER (64←8)","LLGCR R1,R2","47492@0|//@16|R1@24|R2@28|??@32","B984"
+"LOAD LOGICAL CHARACTER HIGH (32←8)","LLCH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|194@40|??@48","B"
+"LOAD LOGICAL HALFWORD (32←16)","LLH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|149@40|??@48","B"
+"LOAD LOGICAL HALFWORD (32←16)","LLHR R1,R2","47509@0|//@16|R1@24|R2@28|??@32","B995"
+"LOAD LOGICAL HALFWORD (64→16)","LLGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|145@40|??@48","B"
+"LOAD LOGICAL HALFWORD (64←16)","LLGHR R1,R2","47493@0|//@16|R1@24|R2@28|??@32","B985"
+"LOAD LOGICAL HALFWORD HIGH (32→16)","LLHH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|198@40|??@48","B"
+"LOAD LOGICAL HALFWORD RELATIVE LONG(32←16)","LLHRL R1,RI2","196@0|R1@8|2@12|RI2@16|??@48","C42"
+"LOAD LOGICAL HALFWORD RELATIVE LONG(64→16)","LLGHRL R1,RI2","196@0|R1@8|6@12|RI2@16|??@48","C46"
+"LOAD LOGICAL IMMEDIATE (high high)","LLIHH R1,I2","165@0|R1@8|12@12|I2@16|??@32","A5C"
+"LOAD LOGICAL IMMEDIATE (high low)","LLIHL R1,I2","165@0|R1@8|13@12|I2@16|??@32","A5D"
+"LOAD LOGICAL IMMEDIATE (high)","LLIHF R1,I2","192@0|R1@8|14@12|I2@16|??@48","C0E"
+"LOAD LOGICAL IMMEDIATE (low high)","LLILH R1,I2","165@0|R1@8|14@12|I2@16|??@32","A5E"
+"LOAD LOGICAL IMMEDIATE (low low)","LLILL R1,I2","165@0|R1@8|15@12|I2@16|??@32","A5F"
+"LOAD LOGICAL IMMEDIATE (low)","LLILF R1,I2","192@0|R1@8|15@12|I2@16|??@48","C0F"
+"LOAD LOGICAL RELATIVE LONG (64→32)","LLGFRL R1,RI2","196@0|R1@8|14@12|RI2@16|??@48","SP"
+"LOAD LOGICAL THIRTY ONE BITS (64→31)","LLGT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|23@40|??@48","B"
+"LOAD LOGICAL THIRTY ONE BITS (64→31)","LLGTR R1,R2","47383@0|//@16|R1@24|R2@28|??@32","B917"
+"LOAD LOGICAL THIRTY ONE BITS AND TRAP(64←31)","LLGTAT R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|156@40|??@48","B"
+"LOAD MULTIPLE (32)","LM R1,R3,D2(B2)","152@0|R1@8|R3@12|B2@16|D2@20|??@32","B"
+"LOAD MULTIPLE (32)","LMY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|152@40|??@48","B"
+"LOAD MULTIPLE (64)","LMG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|4@40|??@48","B"
+"LOAD MULTIPLE DISJOINT (64→32&32)","LMD R1,R3,D2(B2),D4(B4)","239@0|R1@8|R3@12|B2@16|D2@20|B4@32|D4@36|??@48","B"
+"LOAD MULTIPLE HIGH (32)","LMH R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|150@40|??@48","B"
+"LOAD NEGATIVE (32)","LNR R1,R2","17@0|R1@8|R2@12|??@16","11"
+"LOAD NEGATIVE (64)","LNGR R1,R2","47361@0|//@16|R1@24|R2@28|??@32","B901"
+"LOAD NEGATIVE (64→32)","LNGFR R1,R2","47377@0|//@16|R1@24|R2@28|??@32","B911"
+"LOAD NEGATIVE (extended BFP)","LNXBR R1,R2","45889@0|//@16|R1@24|R2@28|??@32","SP Db"
+"LOAD NEGATIVE (extended HFP)","LNXR R1,R2","45921@0|//@16|R1@24|R2@28|??@32","SP Da"
+"LOAD NEGATIVE (long BFP)","LNDBR R1,R2","45841@0|//@16|R1@24|R2@28|??@32","Db"
+"LOAD NEGATIVE (long HFP)","LNDR R1,R2","33@0|R1@8|R2@12|??@16","Da"
+"LOAD NEGATIVE (long)","LNDFR R1,R2","45937@0|//@16|R1@24|R2@28|??@32","Da"
+"LOAD NEGATIVE (short BFP)","LNEBR R1,R2","45825@0|//@16|R1@24|R2@28|??@32","Db"
+"LOAD NEGATIVE (short HFP)","LNER R1,R2","49@0|R1@8|R2@12|??@16","Da"
+"LOAD ON CONDITION (32)","LOC R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|242@40|??@48","B"
+"LOAD ON CONDITION (32)","LOCR R1,R2,M3","47602@0|M3@16|//@20|R1@24|R2@28|??@32","B9F2"
+"LOAD ON CONDITION (64)","LOCG R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|226@40|??@48","B"
+"LOAD ON CONDITION (64)","LOCGR R1,R2,M3","47586@0|M3@16|//@20|R1@24|R2@28|??@32","B9E2"
+"LOAD PAGE TABLE ENTRY ADDRESS","LPTEA R1,R3,R2,M4","47530@0|R3@16|M4@20|R1@24|R2@28|??@32","R"
+"LOAD PAIR DISJOINT (32)","LPD R3,D1(B1),D2(B2)","200@0|R3@8|4@12|B1@16|D1@20|B2@32|D2@36|??@48","SP"
+"LOAD PAIR DISJOINT (64)","LPDG R3,D1(B1),D2(B2)","200@0|R3@8|5@12|B1@16|D1@20|B2@32|D2@36|??@48","SP"
+"LOAD PAIR FROM QUADWORD (64&64←128)","LPQ R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|143@40|??@48","SP"
+"LOAD POSITIVE (32)","LPR R1,R2","16@0|R1@8|R2@12|??@16","10"
+"LOAD POSITIVE (64)","LPGR R1,R2","47360@0|//@16|R1@24|R2@28|??@32","B900"
+"LOAD POSITIVE (64→32)","LPGFR R1,R2","47376@0|//@16|R1@24|R2@28|??@32","B910"
+"LOAD POSITIVE (extended BFP)","LPXBR R1,R2","45888@0|//@16|R1@24|R2@28|??@32","SP Db"
+"LOAD POSITIVE (extended HFP)","LPXR R1,R2","45920@0|//@16|R1@24|R2@28|??@32","SP Da"
+"LOAD POSITIVE (long BFP)","LPDBR R1,R2","45840@0|//@16|R1@24|R2@28|??@32","Db"
+"LOAD POSITIVE (long HFP)","LPDR R1,R2","32@0|R1@8|R2@12|??@16","Da"
+"LOAD POSITIVE (long)","LPDFR R1,R2","45936@0|//@16|R1@24|R2@28|??@32","Da"
+"LOAD POSITIVE (short BFP)","LPEBR R1,R2","45824@0|//@16|R1@24|R2@28|??@32","Db"
+"LOAD POSITIVE (short HFP)","LPER R1,R2","48@0|R1@8|R2@12|??@16","Da"
+"LOAD PSW","LPSW D1(B1)","130@0|I2@8|B1@16|D1@20|??@32","SP SO"
+"LOAD PSW EXTENDED","LPSWE D2(B2)","45746@0|B2@16|D2@20|??@32","SP SO"
+"LOAD PSW EXTENDED","LPSWEY D1(B1)","235@0|//@8|B1@16|D1@20|113@40|??@48","SP SO"
+"LOAD REAL ADDRESS (32)","LRA R1,D2(X2,B2)","177@0|R1@8|X2@12|B2@16|D2@20|??@32","SO"
+"LOAD REAL ADDRESS (32)","LRAY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|19@40|??@48","SO"
+"LOAD REAL ADDRESS (64)","LRAG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|3@40|??@48","BP"
+"LOAD RELATIVE LONG (32)","LRL R1,RI2","196@0|R1@8|13@12|RI2@16|??@48","SP"
+"LOAD RELATIVE LONG (64)","LGRL R1,RI2","196@0|R1@8|8@12|RI2@16|??@48","SP"
+"LOAD RELATIVE LONG (64→32)","LGFRL R1,RI2","196@0|R1@8|12@12|RI2@16|??@48","SP"
+"LOAD REVERSED (16)","LRVH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|31@40|??@48","B"
+"LOAD REVERSED (32)","LRV R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|30@40|??@48","B"
+"LOAD REVERSED (32)","LRVR R1,R2","47391@0|//@16|R1@24|R2@28|??@32","B91F"
+"LOAD REVERSED (64)","LRVG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|15@40|??@48","B"
+"LOAD REVERSED (64)","LRVGR R1,R2","47375@0|//@16|R1@24|R2@28|??@32","B90F"
+"LOAD ROUNDED (extended to long BFP)","LDXBR R1,R2","45893@0|//@16|R1@24|R2@28|??@32","SP Db"
+"LOAD ROUNDED (extended to long BFP)","LDXBRA R1,M3,R2,M4","45893@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"LOAD ROUNDED (extended to long DFP)","LDXTR R1,M3,R2,M4","46045@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"LOAD ROUNDED (extended to long HFP)","LDXR R1,R2","37@0|R1@8|R2@12|??@16","SP Da"
+"LOAD ROUNDED (extended to long HFP)","LRDR R1,R2","37@0|R1@8|R2@12|??@16","SP Da"
+"LOAD ROUNDED (extended to short BFP)","LEXBR R1,R2","45894@0|//@16|R1@24|R2@28|??@32","SP Db"
+"LOAD ROUNDED (extended to short BFP)","LEXBRA R1,M3,R2,M4","45894@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"LOAD ROUNDED (extended to short HFP)","LEXR R1,R2","45926@0|//@16|R1@24|R2@28|??@32","SP Da"
+"LOAD ROUNDED (long to short BFP)","LEDBR R1,R2","45892@0|//@16|R1@24|R2@28|??@32","Db"
+"LOAD ROUNDED (long to short BFP)","LEDBRA R1,M3,R2,M4","45892@0|M3@16|M4@20|R1@24|R2@28|??@32","SP Db"
+"LOAD ROUNDED (long to short DFP)","LEDTR R1,M3,R2,M4","46037@0|M3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"LOAD ROUNDED (long to short HFP)","LEDR R1,R2","53@0|R1@8|R2@12|??@16","Da"
+"LOAD ROUNDED (long to short HFP)","LRER R1,R2","53@0|R1@8|R2@12|??@16","Da"
+"LOAD USING REAL ADDRESS (32)","LURA R1,R2","45643@0|//@16|R1@24|R2@28|??@32","SP"
+"LOAD USING REAL ADDRESS (64)","LURAG R1,R2","47365@0|//@16|R1@24|R2@28|??@32","SP"
+"LOAD ZERO (extended)","LZXR R1","45942@0|//@16|R1@24|//@28|??@32","SP Da"
+"LOAD ZERO (long)","LZDR R1","45941@0|//@16|R1@24|//@28|??@32","Da"
+"LOAD ZERO (short)","LZER R1","45940@0|//@16|R1@24|//@28|??@32","Da"
+"MODIFY STACKED STATE","MSTA R1","45639@0|//@16|R1@24|//@28|??@32","ST"
+"MODIFY SUBCHANNEL","MSCH D2(B2)","45618@0|B2@16|D2@20|??@32","SP OP"
+"MONITOR CALL","MC D1(B1),I2","175@0|I2@8|B1@16|D1@20|??@32","SP"
+"MOVE (16←16)","MVHHI D1(B1),I2","58692@0|B1@16|D1@20|I2@32|??@48","ST"
+"MOVE (32→16)","MVHI D1(B1),I2","58700@0|B1@16|D1@20|I2@32|??@48","ST"
+"MOVE (64←16)","MVGHI D1(B1),I2","58696@0|B1@16|D1@20|I2@32|??@48","ST"
+"MOVE (character)","MVC D1(L1,B1),D2(B2)","210@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"MOVE (immediate)","MVI D1(B1),I2","146@0|I2@8|B1@16|D1@20|??@32","ST"
+"MOVE (immediate)","MVIY D1(B1),I2","235@0|I2@8|B1@16|D1@20|82@40|??@48","ST"
+"MOVE INVERSE","MVCIN D1(L1,B1),D2(B2)","232@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"MOVE LONG","MVCL R1,R2","14@0|R1@8|R2@12|??@16","SP II"
+"MOVE LONG EXTENDED","MVCLE R1,R3,D2(B2)","168@0|R1@8|R3@12|B2@16|D2@20|??@32","SP IC"
+"MOVE LONG UNICODE","MVCLU R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|142@40|??@48","SP IC"
+"MOVE NUMERICS","MVN D1(L1,B1),D2(B2)","209@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"MOVE PAGE","MVPG R1,R2","45652@0|//@16|R1@24|R2@28|??@32","SP SO"
+"MOVE RIGHT TO LEFT","MVCRL D1(B1),D2(B2)","58634@0|B1@16|D1@20|B2@32|D2@36|??@48","G0"
+"MOVE STRING","MVST R1,R2","45653@0|//@16|R1@24|R2@28|??@32","SP IC"
+"MOVE TO PRIMARY","MVCP D1(R1,B1),D2(B2),R3","218@0|R1@8|R3@12|B1@16|D1@20|B2@32|D2@36|??@48","SO"
+"MOVE TO SECONDARY","MVCS D1(R1,B1),D2(B2),R3","219@0|R1@8|R3@12|B1@16|D1@20|B2@32|D2@36|??@48","SO"
+"MOVE WITH DESTINATION KEY","MVCDK D1(B1),D2(B2)","58639@0|B1@16|D1@20|B2@32|D2@36|??@48","SO"
+"MOVE WITH KEY","MVCK D1(R1,B1),D2(B2),R3","217@0|R1@8|R3@12|B1@16|D1@20|B2@32|D2@36|??@48","SO"
+"MOVE WITH OFFSET","MVO D1(L1,B1),D2(L2,B2)","241@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"MOVE WITH OPTIONAL SPECIFICATIONS","MVCOS D1(B1),D2(B2),R3","200@0|R3@8|0@12|B1@16|D1@20|B2@32|D2@36|??@48","SO"
+"MOVE WITH SOURCE KEY","MVCSK D1(B1),D2(B2)","58638@0|B1@16|D1@20|B2@32|D2@36|??@48","SO"
+"MOVE ZONES","MVZ D1(L1,B1),D2(B2)","211@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"MULTIPLY (128←64)","MG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|132@40|??@48","SP"
+"MULTIPLY (128←64)","MGRK R1,R2,R3","47596@0|R3@16|//@20|R1@24|R2@28|??@32","SP"
+"MULTIPLY (64←32)","M R1,D2(X2,B2)","92@0|R1@8|X2@12|B2@16|D2@20|??@32","SP"
+"MULTIPLY (64←32)","MFY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|92@40|??@48","SP"
+"MULTIPLY (64←32)","MR R1,R2","28@0|R1@8|R2@12|??@16","SP"
+"MULTIPLY (extended BFP)","MXBR R1,R2","45900@0|//@16|R1@24|R2@28|??@32","SP Db"
+"MULTIPLY (extended DFP)","MXTR R1,R2,R3","46040@0|R3@16|//@20|R1@24|R2@28|??@32","SP Dt"
+"MULTIPLY (extended DFP)","MXTRA R1,R2,R3,M4","46040@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"MULTIPLY (extended HFP)","MXR R1,R2","38@0|R1@8|R2@12|??@16","SP Da"
+"MULTIPLY (long BFP)","MDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|28@40|??@48","Db"
+"MULTIPLY (long BFP)","MDBR R1,R2","45852@0|//@16|R1@24|R2@28|??@32","Db"
+"MULTIPLY (long DFP)","MDTR R1,R2,R3","46032@0|R3@16|//@20|R1@24|R2@28|??@32","Dt"
+"MULTIPLY (long DFP)","MDTRA R1,R2,R3,M4","46032@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"MULTIPLY (long HFP)","MD R1,D2(X2,B2)","108@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"MULTIPLY (long HFP)","MDR R1,R2","44@0|R1@8|R2@12|??@16","Da"
+"MULTIPLY (long to extended BFP)","MXDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|7@40|??@48","SP Db"
+"MULTIPLY (long to extended BFP)","MXDBR R1,R2","45831@0|//@16|R1@24|R2@28|??@32","SP Db"
+"MULTIPLY (long to extended HFP)","MXD R1,D2(X2,B2)","103@0|R1@8|X2@12|B2@16|D2@20|??@32","SP Da"
+"MULTIPLY (long to extended HFP)","MXDR R1,R2","39@0|R1@8|R2@12|??@16","SP Da"
+"MULTIPLY (short BFP)","MEEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|23@40|??@48","Db"
+"MULTIPLY (short BFP)","MEEBR R1,R2","45847@0|//@16|R1@24|R2@28|??@32","Db"
+"MULTIPLY (short HFP)","MEE R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|55@40|??@48","Da"
+"MULTIPLY (short HFP)","MEER R1,R2","45879@0|//@16|R1@24|R2@28|??@32","Da"
+"MULTIPLY (short to long BFP)","MDEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|12@40|??@48","Db"
+"MULTIPLY (short to long BFP)","MDEBR R1,R2","45836@0|//@16|R1@24|R2@28|??@32","Db"
+"MULTIPLY (short to long HFP)","MDE R1,D2(X2,B2)","124@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"MULTIPLY (short to long HFP)","MDER R1,R2","60@0|R1@8|R2@12|??@16","Da"
+"MULTIPLY (short to long HFP)","ME R1,D2(X2,B2)","124@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"MULTIPLY (short to long HFP)","MER R1,R2","60@0|R1@8|R2@12|??@16","Da"
+"MULTIPLY & ADD UNNORMALIZED (long to ext. HFP)","MAY R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|58@40|??@48","Da"
+"MULTIPLY & ADD UNNORMALIZED (long to ext. HFP)","MAYR R1,R3,R2","45882@0|R1@16|//@20|R3@24|R2@28|??@32","Da"
+"MULTIPLY AND ADD (long BFP)","MADB R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|30@40|??@48","Db"
+"MULTIPLY AND ADD (long BFP)","MADBR R1,R3,R2","45854@0|R1@16|//@20|R3@24|R2@28|??@32","Db"
+"MULTIPLY AND ADD (long HFP)","MAD R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|62@40|??@48","Da"
+"MULTIPLY AND ADD (long HFP)","MADR R1,R3,R2","45886@0|R1@16|//@20|R3@24|R2@28|??@32","Da"
+"MULTIPLY AND ADD (short BFP)","MAEB R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|14@40|??@48","Db"
+"MULTIPLY AND ADD (short BFP)","MAEBR R1,R3,R2","45838@0|R1@16|//@20|R3@24|R2@28|??@32","Db"
+"MULTIPLY AND ADD (short HFP)","MAE R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|46@40|??@48","Da"
+"MULTIPLY AND ADD (short HFP)","MAER R1,R3,R2","45870@0|R1@16|//@20|R3@24|R2@28|??@32","Da"
+"MULTIPLY AND ADD UNNRM. (long to ext. high HFP)","MAYH R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|60@40|??@48","Da"
+"MULTIPLY AND ADD UNNRM. (long to ext. high HFP)","MAYHR R1,R3,R2","45884@0|R1@16|//@20|R3@24|R2@28|??@32","Da"
+"MULTIPLY AND ADD UNNRM. (long to ext. low HFP)","MAYL R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|56@40|??@48","Da"
+"MULTIPLY AND ADD UNNRM. (long to ext. low HFP)","MAYLR R1,R3,R2","45880@0|R1@16|//@20|R3@24|R2@28|??@32","Da"
+"MULTIPLY AND SUBTRACT (long BFP)","MSDB R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|31@40|??@48","Db"
+"MULTIPLY AND SUBTRACT (long BFP)","MSDBR R1,R3,R2","45855@0|R1@16|//@20|R3@24|R2@28|??@32","Db"
+"MULTIPLY AND SUBTRACT (long HFP)","MSD R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|63@40|??@48","Da"
+"MULTIPLY AND SUBTRACT (long HFP)","MSDR R1,R3,R2","45887@0|R1@16|//@20|R3@24|R2@28|??@32","Da"
+"MULTIPLY AND SUBTRACT (short BFP)","MSEB R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|15@40|??@48","Db"
+"MULTIPLY AND SUBTRACT (short BFP)","MSEBR R1,R3,R2","45839@0|R1@16|//@20|R3@24|R2@28|??@32","Db"
+"MULTIPLY AND SUBTRACT (short HFP)","MSE R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|47@40|??@48","Da"
+"MULTIPLY AND SUBTRACT (short HFP)","MSER R1,R3,R2","45871@0|R1@16|//@20|R3@24|R2@28|??@32","Da"
+"MULTIPLY DECIMAL","MP D1(L1,B1),D2(L2,B2)","252@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","SP Dg"
+"MULTIPLY HALFWORD (32←16)","MH R1,D2(X2,B2)","76@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"MULTIPLY HALFWORD (32←16)","MHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|124@40|??@48","B"
+"MULTIPLY HALFWORD (64→16)","MGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|60@40|??@48","B"
+"MULTIPLY HALFWORD IMMEDIATE (32→16)","MHI R1,I2","167@0|R1@8|12@12|I2@16|??@32","A7C"
+"MULTIPLY HALFWORD IMMEDIATE (64→16)","MGHI R1,I2","167@0|R1@8|13@12|I2@16|??@32","A7D"
+"MULTIPLY LOGICAL (128→64)","MLG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|134@40|??@48","SP"
+"MULTIPLY LOGICAL (128→64)","MLGR R1,R2","47494@0|//@16|R1@24|R2@28|??@32","SP"
+"MULTIPLY LOGICAL (64←32)","ML R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|150@40|??@48","SP"
+"MULTIPLY LOGICAL (64←32)","MLR R1,R2","47510@0|//@16|R1@24|R2@28|??@32","SP"
+"MULTIPLY SINGLE (32)","MS R1,D2(X2,B2)","113@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"MULTIPLY SINGLE (32)","MSC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|83@40|??@48","B"
+"MULTIPLY SINGLE (32)","MSR R1,R2","45650@0|//@16|R1@24|R2@28|??@32","B252"
+"MULTIPLY SINGLE (32)","MSRKC R1,R2,R3","47613@0|R3@16|//@20|R1@24|R2@28|??@32","B9FD"
+"MULTIPLY SINGLE (32)","MSY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|81@40|??@48","B"
+"MULTIPLY SINGLE (64)","MSG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|12@40|??@48","B"
+"MULTIPLY SINGLE (64)","MSGC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|131@40|??@48","B"
+"MULTIPLY SINGLE (64)","MSGR R1,R2","47372@0|//@16|R1@24|R2@28|??@32","B90C"
+"MULTIPLY SINGLE (64)","MSGRKC R1,R2,R3","47597@0|R3@16|//@20|R1@24|R2@28|??@32","B9ED"
+"MULTIPLY SINGLE (64←32)","MSGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|28@40|??@48","B"
+"MULTIPLY SINGLE (64←32)","MSGFR R1,R2","47388@0|//@16|R1@24|R2@28|??@32","B91C"
+"MULTIPLY SINGLE IMMEDIATE (32)","MSFI R1,I2","194@0|R1@8|1@12|I2@16|??@48","C21"
+"MULTIPLY SINGLE IMMEDIATE (64←32)","MSGFI R1,I2","194@0|R1@8|0@12|I2@16|??@48","C20"
+"MULTIPLY UNNORM. (long to ext. high HFP)","MYH R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|61@40|??@48","Da"
+"MULTIPLY UNNORM. (long to ext. high HFP)","MYHR R1,R3,R2","45885@0|R1@16|//@20|R3@24|R2@28|??@32","Da"
+"MULTIPLY UNNORM. (long to ext. low HFP)","MYL R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|57@40|??@48","Da"
+"MULTIPLY UNNORM. (long to ext. low HFP)","MYLR R1,R3,R2","45881@0|R1@16|//@20|R3@24|R2@28|??@32","Da"
+"MULTIPLY UNNORMALIZED (long to ext. HFP)","MY R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|59@40|??@48","SP Da"
+"MULTIPLY UNNORMALIZED (long to ext. HFP)","MYR R1,R3,R2","45883@0|R1@16|//@20|R3@24|R2@28|??@32","SP Da"
+"NAND (32)","NNRK R1,R2,R3","47476@0|R3@16|//@20|R1@24|R2@28|??@32","B974"
+"NAND (64)","NNGRK R1,R2,R3","47460@0|R3@16|//@20|R1@24|R2@28|??@32","B964"
+"NEURAL NETWORK PROCESSING ASSIST","NNPA","47419@0|//@16|??@32","SP IC"
+"NEXT INSTRUCTION ACCESS INTENT","NIAI I1,I2","45818@0|//@16|I1@24|I2@28|??@32","B2FA"
+"NONTRANSACTIONAL STORE (64)","NTSTG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|37@40|??@48","SP"
+"NOR (32)","NORK R1,R2,R3","47478@0|R3@16|//@20|R1@24|R2@28|??@32","B976"
+"NOR (64)","NOGRK R1,R2,R3","47462@0|R3@16|//@20|R1@24|R2@28|??@32","B966"
+"NOT EXCLUSIVE OR (32)","NXRK R1,R2,R3","47479@0|R3@16|//@20|R1@24|R2@28|??@32","B977"
+"NOT EXCLUSIVE OR (64)","NXGRK R1,R2,R3","47463@0|R3@16|//@20|R1@24|R2@28|??@32","B967"
+"OR (32)","O R1,D2(X2,B2)","86@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"OR (32)","OR R1,R2","22@0|R1@8|R2@12|??@16","16"
+"OR (32)","ORK R1,R2,R3","47606@0|R3@16|//@20|R1@24|R2@28|??@32","B9F6"
+"OR (32)","OY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|86@40|??@48","B"
+"OR (64)","OG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|129@40|??@48","B"
+"OR (64)","OGR R1,R2","47489@0|//@16|R1@24|R2@28|??@32","B981"
+"OR (64)","OGRK R1,R2,R3","47590@0|R3@16|//@20|R1@24|R2@28|??@32","B9E6"
+"OR (character)","OC D1(L1,B1),D2(B2)","214@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"OR (immediate)","OI D1(B1),I2","150@0|I2@8|B1@16|D1@20|??@32","ST"
+"OR (immediate)","OIY D1(B1),I2","235@0|I2@8|B1@16|D1@20|86@40|??@48","ST"
+"OR IMMEDIATE (high high)","OIHH R1,I2","165@0|R1@8|8@12|I2@16|??@32","A58"
+"OR IMMEDIATE (high low)","OIHL R1,I2","165@0|R1@8|9@12|I2@16|??@32","A59"
+"OR IMMEDIATE (high)","OIHF R1,I2","192@0|R1@8|12@12|I2@16|??@48","C0C"
+"OR IMMEDIATE (low high)","OILH R1,I2","165@0|R1@8|10@12|I2@16|??@32","A5A"
+"OR IMMEDIATE (low low)","OILL R1,I2","165@0|R1@8|11@12|I2@16|??@32","A5B"
+"OR IMMEDIATE (low)","OILF R1,I2","192@0|R1@8|13@12|I2@16|??@48","C0D"
+"OR WITH COMPLEMENT (32)","OCRK R1,R2,R3","47477@0|R3@16|//@20|R1@24|R2@28|??@32","B975"
+"OR WITH COMPLEMENT (64)","OCGRK R1,R2,R3","47461@0|R3@16|//@20|R1@24|R2@28|??@32","B965"
+"PACK","PACK D1(L1,B1),D2(L2,B2)","242@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"PACK ASCII","PKA D1(B1),D2(L2,B2)","233@0|L2@8|B1@16|D1@20|B2@32|D2@36|??@48","SP"
+"PACK UNICODE","PKU D1(B1),D2(L2,B2)","225@0|L2@8|B1@16|D1@20|B2@32|D2@36|??@48","SP"
+"PAGE IN","PGIN R1,R2","45614@0|//@16|R1@24|R2@28|??@32","B22E"
+"PAGE OUT","PGOUT R1,R2","45615@0|//@16|R1@24|R2@28|??@32","B22F"
+"PERFORM CRYPTOGRAPHIC COMPUTATION","PCC","47404@0|//@16|??@32","SP IC"
+"PERFORM CRYPTOGRAPHIC KEY MGMT. OPERATIONS","PCKMO","47400@0|//@16|??@32","SP"
+"PERFORM FLOATING-POINT OPERATION","PFPO","266@0|??@16","SP Da"
+"PERFORM FRAME MANAGEMENT FUNCTION","PFMF R1,R2","47535@0|//@16|R1@24|R2@28|??@32","SP IS"
+"PERFORM LOCKED OPERATION","PLO R1,D2(B2),R3,D4(B4)","238@0|R1@8|R3@12|B2@16|D2@20|B4@32|D4@36|??@48","SP"
+"PERFORM PROCESSOR ASSIST","PPA R1,R2,M3","45800@0|M3@16|//@20|R1@24|R2@28|??@32","B2E8"
+"PERFORM RANDOM NUMBER OPERATION","PPNO R1,R2","47420@0|//@16|R1@24|R2@28|??@32","SP IC"
+"PERFORM RANDOM NUMBER OPERATION","PRNO R1,R2","47420@0|//@16|R1@24|R2@28|??@32","SP IC"
+"PERFORM TIMING FACILITY FUNCTION","PTFF","260@0|??@16","SP"
+"PERFORM TOPOLOGY FUNCTION","PTF R1","47522@0|//@16|R1@24|//@28|??@32","SP"
+"POPULATION COUNT","POPCNT R1,R2,M3","47585@0|M3@16|//@20|R1@24|R2@28|??@32","B9E1"
+"PREFETCH DATA","PFD M1,D2(X2,B2)","227@0|M1@8|X2@12|B2@16|D2@20|54@40|??@48","B"
+"PREFETCH DATA RELATIVE LONG","PFDRL M1,RI2","198@0|M1@8|2@12|RI2@16|??@48","C62"
+"PROGRAM CALL","PC D2(B2)","45592@0|B2@16|D2@20|??@32","Z"
+"PROGRAM RETURN","PR","257@0|??@16","B ST"
+"PROGRAM TRANSFER","PT R1,R2","45608@0|//@16|R1@24|R2@28|??@32","B"
+"PROGRAM TRANSFER WITH INSTANCE","PTI R1,R2","47518@0|//@16|R1@24|R2@28|??@32","B"
+"PURGE ALB","PALB","45640@0|//@16|??@32","B248"
+"PURGE TLB","PTLB","45581@0|//@16|??@32","B20D"
+"QUANTIZE (extended DFP)","QAXTR R1,R3,R2,M4","46077@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"QUANTIZE (long DFP)","QADTR R1,R3,R2,M4","46069@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"QUERY PROCESSOR ACTIVITY COUNTER INFORMATION","QPACI D2(B2)","45711@0|B2@16|D2@20|??@32","ST"
+"REROUND (extended DFP)","RRXTR R1,R3,R2,M4","46079@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"REROUND (long DFP)","RRDTR R1,R3,R2,M4","46071@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"RESET CHANNEL PATH","RCHP","45627@0|//@16|??@32","B23B"
+"RESET DAT PROTECTION","RDP R1,R3,R2,M4","47499@0|R3@16|M4@20|R1@24|R2@28|??@32","B98B"
+"RESET REFERENCE BIT EXTENDED","RRBE R1,R2","45610@0|//@16|R1@24|R2@28|??@32","SO"
+"RESET REFERENCE BITS MULTIPLE","RRBM R1,R2","47534@0|//@16|R1@24|R2@28|??@32","B9AE"
+"RESUME PROGRAM","RP D2(B2)","45687@0|B2@16|D2@20|??@32","SP WE T"
+"RESUME SUBCHANNEL","RSCH","45624@0|//@16|??@32","OP"
+"ROTATE LEFT SINGLE LOGICAL (32)","RLL R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|29@40|??@48","EB1D"
+"ROTATE LEFT SINGLE LOGICAL (64)","RLLG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|28@40|??@48","EB1C"
+"ROTATE THEN AND SELECTED BITS (64)","RNSBG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|84@40|??@48","EC54"
+"ROTATETHENEXCLUSIVEORSELECT.BITS(64)","RXSBG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|87@40|??@48","EC57"
+"ROTATE THEN INSERT SELECTED BITS (64)","RISBG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|85@40|??@48","EC55"
+"ROTATE THEN INSERT SELECTED BITS (64)","RISBGN R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|89@40|??@48","EC59"
+"ROTATE THEN INSERT SELECTED BITS HIGH(64)","RISBHG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|93@40|??@48","EC5D"
+"ROTATE THEN INSERT SELECTED BITS LOW (64)","RISBLG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|81@40|??@48","EC51"
+"ROTATE THEN OR SELECTED BITS (64)","ROSBG R1,R2,I3,I4,I5","236@0|R1@8|R2@12|I3@16|I4@24|I5@32|86@40|??@48","EC56"
+"SEARCH STRING","SRST R1,R2","45662@0|//@16|R1@24|R2@28|??@32","SP IC"
+"SEARCH STRING UNICODE","SRSTU R1,R2","47550@0|//@16|R1@24|R2@28|??@32","SP IC"
+"SELECT (32)","SELR R1,R2,R3,M4","47600@0|R3@16|M4@20|R1@24|R2@28|??@32","B9F0"
+"SELECT (64)","SELGR R1,R2,R3,M4","47587@0|R3@16|M4@20|R1@24|R2@28|??@32","B9E3"
+"SELECT HIGH (32)","SELFHR R1,R2,R3,M4","47552@0|R3@16|M4@20|R1@24|R2@28|??@32","B9C0"
+"SET ACCESS","SAR R1,R2","45646@0|//@16|R1@24|R2@28|??@32","U"
+"SET ADDRESS LIMIT","SAL","45623@0|//@16|??@32","OP"
+"SET ADDRESS SPACE CONTROL","SAC D2(B2)","45593@0|B2@16|D2@20|??@32","SP SW"
+"SET ADDRESS SPACE CONTROL FAST","SACF D2(B2)","45689@0|B2@16|D2@20|??@32","SP SW"
+"SET ADDRESSING MODE (24)","SAM24","268@0|??@16","SP"
+"SET ADDRESSING MODE (31)","SAM31","269@0|??@16","SP"
+"SET ADDRESSING MODE (64)","SAM64","270@0|??@16","010E"
+"SET BFP ROUNDING MODE (2 bit)","SRNM D2(B2)","45721@0|B2@16|D2@20|??@32","Db"
+"SET BFP ROUNDING MODE (3 bit)","SRNMB D2(B2)","45752@0|B2@16|D2@20|??@32","SP Db"
+"SET CHANNEL MONITOR","SCHM","45628@0|//@16|??@32","OP"
+"SET CLOCK","SCK D2(B2)","45572@0|B2@16|D2@20|??@32","SP"
+"SET CLOCK COMPARATOR","SCKC D2(B2)","45574@0|B2@16|D2@20|??@32","SP"
+"SET CLOCK PROGRAMMABLE FIELD","SCKPF","263@0|??@16","SP"
+"SET CPU TIMER","SPT D2(B2)","45576@0|B2@16|D2@20|??@32","SP"
+"SET DFP ROUNDING MODE","SRNMT D2(B2)","45753@0|B2@16|D2@20|??@32","Dt"
+"SET FPC","SFPC R1","45956@0|//@16|R1@24|//@28|??@32","SP Db"
+"SET FPC AND SIGNAL","SFASR R1","45957@0|//@16|R1@24|//@28|??@32","SP Dt"
+"SET PREFIX","SPX D2(B2)","45584@0|B2@16|D2@20|??@32","SP"
+"SET PROGRAM MASK","SPM R1","4@0|R1@8|//@12|??@16","04"
+"SET PSW KEY FROM ADDRESS","SPKA D2(B2)","45578@0|B2@16|D2@20|??@32","SO"
+"SET SECONDARY ASN","SSAR R1","45605@0|//@16|R1@24|//@28|??@32","Z"
+"SET SECONDARY ASN WITH INSTANCE","SSAIR R1","47519@0|//@16|R1@24|//@28|??@32","Z"
+"SET STORAGE KEY EXTENDED","SSKE R1,R2,M3","45611@0|M3@16|//@20|R1@24|R2@28|??@32","IS"
+"SET SYSTEM MASK","SSM D1(B1)","128@0|I2@8|B1@16|D1@20|??@32","SP SO"
+"SHIFT AND ROUND DECIMAL","SRP D1(L1,B1),D2(B2),I3","240@0|L1@8|I3@12|B1@16|D1@20|B2@32|D2@36|??@48","Dg"
+"SHIFT LEFT DOUBLE (64)","SLDA R1,D2(B2)","143@0|R1@8|//@12|B2@16|D2@20|??@32","SP"
+"SHIFT LEFT DOUBLE LOGICAL (64)","SLDL R1,D2(B2)","141@0|R1@8|//@12|B2@16|D2@20|??@32","SP"
+"SHIFT LEFT SINGLE (32)","SLA R1,D2(B2)","139@0|R1@8|//@12|B2@16|D2@20|??@32","8B"
+"SHIFT LEFT SINGLE (32)","SLAK R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|221@40|??@48","EBDD 7-383"
+"SHIFT LEFT SINGLE (64)","SLAG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|11@40|??@48","EB0B"
+"SHIFT LEFT SINGLE LOGICAL (32)","SLL R1,D2(B2)","137@0|R1@8|//@12|B2@16|D2@20|??@32","89"
+"SHIFT LEFT SINGLE LOGICAL (32)","SLLK R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|223@40|??@48","EBDF"
+"SHIFT LEFT SINGLE LOGICAL (64)","SLLG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|13@40|??@48","EB0D"
+"SHIFT RIGHT DOUBLE (64)","SRDA R1,D2(B2)","142@0|R1@8|//@12|B2@16|D2@20|??@32","SP"
+"SHIFT RIGHT DOUBLE LOGICAL (64)","SRDL R1,D2(B2)","140@0|R1@8|//@12|B2@16|D2@20|??@32","SP"
+"SHIFT RIGHT SINGLE (32)","SRA R1,D2(B2)","138@0|R1@8|//@12|B2@16|D2@20|??@32","8A"
+"SHIFT RIGHT SINGLE (32)","SRAK R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|220@40|??@48","EBDC 7-386"
+"SHIFT RIGHT SINGLE (64)","SRAG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|10@40|??@48","EB0A"
+"SHIFT RIGHT SINGLE LOGICAL (32)","SRL R1,D2(B2)","136@0|R1@8|//@12|B2@16|D2@20|??@32","88"
+"SHIFT RIGHT SINGLE LOGICAL (32)","SRLK R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|222@40|??@48","EBDE 7-387"
+"SHIFT RIGHT SINGLE LOGICAL (64)","SRLG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|12@40|??@48","EB0C"
+"SHIFT SIGNIFICAND LEFT (extended DFP)","SLXT R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|72@40|??@48","SP Dt"
+"SHIFT SIGNIFICAND LEFT (long DFP)","SLDT R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|64@40|??@48","Dt"
+"SHIFT SIGNIFICAND RIGHT (extended DFP)","SRXT R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|73@40|??@48","SP Dt"
+"SHIFT SIGNIFICAND RIGHT (long DFP)","SRDT R1,R3,D2(X2,B2)","237@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|65@40|??@48","Dt"
+"SIGNAL PROCESSOR","SIGP R1,R3,D2(B2)","174@0|R1@8|R3@12|B2@16|D2@20|??@32","AE"
+"SORT LISTS","SORTL R1,R2","47416@0|//@16|R1@24|R2@28|??@32","SP IC"
+"SQUARE ROOT (extended BFP)","SQXBR R1,R2","45846@0|//@16|R1@24|R2@28|??@32","SP Db"
+"SQUARE ROOT (extended HFP)","SQXR R1,R2","45878@0|//@16|R1@24|R2@28|??@32","SP Da"
+"SQUARE ROOT (long BFP)","SQDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|21@40|??@48","Db"
+"SQUARE ROOT (long BFP)","SQDBR R1,R2","45845@0|//@16|R1@24|R2@28|??@32","Db"
+"SQUARE ROOT (long HFP)","SQD R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|53@40|??@48","Da"
+"SQUARE ROOT (long HFP)","SQDR R1,R2","45636@0|//@16|R1@24|R2@28|??@32","Da"
+"SQUARE ROOT (short BFP)","SQEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|20@40|??@48","Db"
+"SQUARE ROOT (short BFP)","SQEBR R1,R2","45844@0|//@16|R1@24|R2@28|??@32","Db"
+"SQUARE ROOT (short HFP)","SQE R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|52@40|??@48","Da"
+"SQUARE ROOT (short HFP)","SQER R1,R2","45637@0|//@16|R1@24|R2@28|??@32","Da"
+"START SUBCHANNEL","SSCH D2(B2)","45619@0|B2@16|D2@20|??@32","SP OP"
+"STORE (32)","ST R1,D2(X2,B2)","80@0|R1@8|X2@12|B2@16|D2@20|??@32","ST"
+"STORE (32)","STY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|80@40|??@48","ST"
+"STORE (64)","STG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|36@40|??@48","ST"
+"STORE (long)","STD R1,D2(X2,B2)","96@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"STORE (long)","STDY R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|103@40|??@48","Da"
+"STORE (short)","STE R1,D2(X2,B2)","112@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"STORE (short)","STEY R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|102@40|??@48","Da"
+"STORE ACCESS MULTIPLE 7-389","STAM R1,R3,D2(B2)","155@0|R1@8|R3@12|B2@16|D2@20|??@32","SP"
+"STORE ACCESS MULTIPLE 7-389","STAMY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|155@40|??@48","SP"
+"STORE BEAR","STBEAR D2(B2)","45569@0|B2@16|D2@20|??@32","SP"
+"STORE CHANNEL PATH STATUS","STCPS D2(B2)","45626@0|B2@16|D2@20|??@32","SP"
+"STORE CHANNEL REPORT WORD","STCRW D2(B2)","45625@0|B2@16|D2@20|??@32","SP"
+"STORE CHARACTER","STC R1,D2(X2,B2)","66@0|R1@8|X2@12|B2@16|D2@20|??@32","ST"
+"STORE CHARACTER","STCY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|114@40|??@48","ST"
+"STORE CHARACTER HIGH (8)","STCH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|195@40|??@48","ST"
+"STORE CHARACTERS UNDER MASK (high)","STCMH R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|44@40|??@48","ST"
+"STORE CHARACTERS UNDER MASK (low)","STCM R1,M3,D2(B2)","190@0|R1@8|M3@12|B2@16|D2@20|??@32","ST"
+"STORE CHARACTERS UNDER MASK (low)","STCMY R1,M3,D2(B2)","235@0|R1@8|M3@12|B2@16|D2@20|45@40|??@48","ST"
+"STORE CLOCK","STCK D2(B2)","45573@0|B2@16|D2@20|??@32","ST"
+"STORE CLOCK COMPARATOR","STCKC D2(B2)","45575@0|B2@16|D2@20|??@32","SP"
+"STORE CLOCK EXTENDED","STCKE D2(B2)","45688@0|B2@16|D2@20|??@32","ST"
+"STORE CLOCK FAST","STCKF D2(B2)","45692@0|B2@16|D2@20|??@32","ST"
+"STORE CONTROL (32)","STCTL R1,R3,D2(B2)","182@0|R1@8|R3@12|B2@16|D2@20|??@32","SP"
+"STORE CONTROL (64)","STCTG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|37@40|??@48","SP"
+"STORE CPU ADDRESS","STAP D2(B2)","45586@0|B2@16|D2@20|??@32","SP"
+"STORE CPU ID","STIDP D2(B2)","45570@0|B2@16|D2@20|??@32","SP"
+"STORE CPU TIMER","STPT D2(B2)","45577@0|B2@16|D2@20|??@32","SP"
+"STORE FACILITY LIST","STFL D2(B2)","45745@0|B2@16|D2@20|??@32","B2B1"
+"STORE FACILITY LIST EXTENDED","STFLE D2(B2)","45744@0|B2@16|D2@20|??@32","SP"
+"STORE FPC","STFPC D2(B2)","45724@0|B2@16|D2@20|??@32","Db"
+"STORE GUARDED STORAGE CONTROLS","STGSC R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|73@40|??@48","SO"
+"STORE HALFWORD (16)","STH R1,D2(X2,B2)","64@0|R1@8|X2@12|B2@16|D2@20|??@32","ST"
+"STORE HALFWORD (16)","STHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|112@40|??@48","ST"
+"STORE HALFWORD HIGH (16)","STHH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|199@40|??@48","ST"
+"STORE HALFWORD RELATIVE LONG (16)","STHRL R1,RI2","196@0|R1@8|7@12|RI2@16|??@48","ST"
+"STORE HIGH (32)","STFH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|203@40|??@48","ST"
+"STORE HIGH ON CONDITION","STOCFH R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|225@40|??@48","ST"
+"STORE MULTIPLE (32)","STM R1,R3,D2(B2)","144@0|R1@8|R3@12|B2@16|D2@20|??@32","ST"
+"STORE MULTIPLE (32)","STMY R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|144@40|??@48","ST"
+"STORE MULTIPLE (64)","STMG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|36@40|??@48","ST"
+"STORE MULTIPLE HIGH (32)","STMH R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|38@40|??@48","ST"
+"STORE ON CONDITION (32)","STOC R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|243@40|??@48","ST"
+"STORE ON CONDITION (64)","STOCG R1,D2(B2),M3","235@0|R1@8|M3@12|B2@16|D2@20|227@40|??@48","ST"
+"STORE PAIR TO QUADWORD","STPQ R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|142@40|??@48","SP"
+"STORE PREFIX","STPX D2(B2)","45585@0|B2@16|D2@20|??@32","SP"
+"STORE REAL ADDRESS","STRAG D1(B1),D2(B2)","58626@0|B1@16|D1@20|B2@32|D2@36|??@48","SP"
+"STORE RELATIVE LONG (32)","STRL R1,RI2","196@0|R1@8|15@12|RI2@16|??@48","SP"
+"STORE RELATIVE LONG (64)","STGRL R1,RI2","196@0|R1@8|11@12|RI2@16|??@48","SP"
+"STORE REVERSED (16)","STRVH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|63@40|??@48","ST"
+"STORE REVERSED (32)","STRV R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|62@40|??@48","ST"
+"STORE REVERSED (64)","STRVG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|47@40|??@48","ST"
+"STORE SUBCHANNEL","STSCH D2(B2)","45620@0|B2@16|D2@20|??@32","SP OP"
+"STORE SYSTEM INFORMATION","STSI D2(B2)","45693@0|B2@16|D2@20|??@32","SP"
+"STORE THEN AND SYSTEM MASK","STNSM D1(B1),I2","172@0|I2@8|B1@16|D1@20|??@32","ST"
+"STORE THEN OR SYSTEM MASK","STOSM D1(B1),I2","173@0|I2@8|B1@16|D1@20|??@32","SP"
+"STORE USING REAL ADDRESS (32)","STURA R1,R2","45638@0|//@16|R1@24|R2@28|??@32","SP"
+"STORE USING REAL ADDRESS (64)","STURG R1,R2","47397@0|//@16|R1@24|R2@28|??@32","SP"
+"SUBTRACT (32)","S R1,D2(X2,B2)","91@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"SUBTRACT (32)","SR R1,R2","27@0|R1@8|R2@12|??@16","1B"
+"SUBTRACT (32)","SRK R1,R2,R3","47609@0|R3@16|//@20|R1@24|R2@28|??@32","B9F9"
+"SUBTRACT (32)","SY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|91@40|??@48","B"
+"SUBTRACT (64)","SG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|9@40|??@48","B"
+"SUBTRACT (64)","SGR R1,R2","47369@0|//@16|R1@24|R2@28|??@32","B909"
+"SUBTRACT (64)","SGRK R1,R2,R3","47593@0|R3@16|//@20|R1@24|R2@28|??@32","B9E9"
+"SUBTRACT (64←32)","SGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|25@40|??@48","B"
+"SUBTRACT (64→32)","SGFR R1,R2","47385@0|//@16|R1@24|R2@28|??@32","B919"
+"SUBTRACT (extended BFP)","SXBR R1,R2","45899@0|//@16|R1@24|R2@28|??@32","SP Db"
+"SUBTRACT (extended DFP)","SXTR R1,R2,R3","46043@0|R3@16|//@20|R1@24|R2@28|??@32","SP Dt"
+"SUBTRACT (extended DFP)","SXTRA R1,R2,R3,M4","46043@0|R3@16|M4@20|R1@24|R2@28|??@32","SP Dt"
+"SUBTRACT (long BFP)","SDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|27@40|??@48","Db"
+"SUBTRACT (long BFP)","SDBR R1,R2","45851@0|//@16|R1@24|R2@28|??@32","Db"
+"SUBTRACT (long DFP)","SDTR R1,R2,R3","46035@0|R3@16|//@20|R1@24|R2@28|??@32","Dt"
+"SUBTRACT (long DFP)","SDTRA R1,R2,R3,M4","46035@0|R3@16|M4@20|R1@24|R2@28|??@32","Dt"
+"SUBTRACT (short BFP)","SEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|11@40|??@48","Db"
+"SUBTRACT (short BFP)","SEBR R1,R2","45835@0|//@16|R1@24|R2@28|??@32","Db"
+"SUBTRACT DECIMAL","SP D1(L1,B1),D2(L2,B2)","251@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","Dg"
+"SUBTRACT HALFWORD (32←16)","SH R1,D2(X2,B2)","75@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"SUBTRACT HALFWORD (32→16)","SHY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|123@40|??@48","B"
+"SUBTRACT HALFWORD (64→16)","SGH R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|57@40|??@48","B"
+"SUBTRACT HIGH (32)","SHHHR R1,R2,R3","47561@0|R3@16|//@20|R1@24|R2@28|??@32","B9C9"
+"SUBTRACT HIGH (32)","SHHLR R1,R2,R3","47577@0|R3@16|//@20|R1@24|R2@28|??@32","B9D9"
+"SUBTRACT LOGICAL (32)","SL R1,D2(X2,B2)","95@0|R1@8|X2@12|B2@16|D2@20|??@32","B"
+"SUBTRACT LOGICAL (32)","SLR R1,R2","31@0|R1@8|R2@12|??@16","1F"
+"SUBTRACT LOGICAL (32)","SLRK R1,R2,R3","47611@0|R3@16|//@20|R1@24|R2@28|??@32","B9FB"
+"SUBTRACT LOGICAL (32)","SLY R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|95@40|??@48","B"
+"SUBTRACT LOGICAL (64)","SLG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|11@40|??@48","B"
+"SUBTRACT LOGICAL (64)","SLGR R1,R2","47371@0|//@16|R1@24|R2@28|??@32","B90B"
+"SUBTRACT LOGICAL (64)","SLGRK R1,R2,R3","47595@0|R3@16|//@20|R1@24|R2@28|??@32","B9EB"
+"SUBTRACT LOGICAL (64←32)","SLGF R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|27@40|??@48","B"
+"SUBTRACT LOGICAL (64←32)","SLGFR R1,R2","47387@0|//@16|R1@24|R2@28|??@32","B91B"
+"SUBTRACT LOGICAL HIGH (32)","SLHHHR R1,R2,R3","47563@0|R3@16|//@20|R1@24|R2@28|??@32","B9CB"
+"SUBTRACT LOGICAL HIGH (32)","SLHHLR R1,R2,R3","47579@0|R3@16|//@20|R1@24|R2@28|??@32","B9DB"
+"SUBTRACT LOGICAL IMMEDIATE (32)","SLFI R1,I2","194@0|R1@8|5@12|I2@16|??@48","C25"
+"SUBTRACT LOGICAL IMMEDIATE (64→32)","SLGFI R1,I2","194@0|R1@8|4@12|I2@16|??@48","C24"
+"SUBTRACT LOGICAL WITH BORROW (32)","SLB R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|153@40|??@48","B"
+"SUBTRACT LOGICAL WITH BORROW (32)","SLBR R1,R2","47513@0|//@16|R1@24|R2@28|??@32","B999"
+"SUBTRACT LOGICAL WITH BORROW (64)","SLBG R1,D2(X2,B2)","227@0|R1@8|X2@12|B2@16|D2@20|137@40|??@48","B"
+"SUBTRACT LOGICAL WITH BORROW (64)","SLBGR R1,R2","47497@0|//@16|R1@24|R2@28|??@32","B989"
+"SUBTRACT NORMALIZED (extended HFP)","SXR R1,R2","55@0|R1@8|R2@12|??@16","SP Da"
+"SUBTRACT NORMALIZED (long HFP)","SD R1,D2(X2,B2)","107@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"SUBTRACT NORMALIZED (long HFP)","SDR R1,R2","43@0|R1@8|R2@12|??@16","Da"
+"SUBTRACT NORMALIZED (short HFP)","SE R1,D2(X2,B2)","123@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"SUBTRACT NORMALIZED (short HFP)","SER R1,R2","59@0|R1@8|R2@12|??@16","Da"
+"SUBTRACT UNNORMALIZED (long HFP)","SW R1,D2(X2,B2)","111@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"SUBTRACT UNNORMALIZED (long HFP)","SWR R1,R2","47@0|R1@8|R2@12|??@16","Da"
+"SUBTRACT UNNORMALIZED (short HFP)","SU R1,D2(X2,B2)","127@0|R1@8|X2@12|B2@16|D2@20|??@32","Da"
+"SUBTRACT UNNORMALIZED (short HFP)","SUR R1,R2","63@0|R1@8|R2@12|??@16","Da"
+"SUPERVISOR CALL","SVC I","10@0|I@8|??@16","0A"
+"TEST ACCESS","TAR R1,R2","45644@0|//@16|R1@24|R2@28|??@32","U"
+"TEST ADDRESSING MODE","TAM","267@0|??@16","010B"
+"TEST AND SET","TS D1(B1)","147@0|I2@8|B1@16|D1@20|??@32","ST"
+"TEST BLOCK","TB R1,R2","45612@0|//@16|R1@24|R2@28|??@32","II"
+"TEST DATA CLASS (extended BFP)","TCXB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|18@40|??@48","SP Db"
+"TEST DATA CLASS (extended DFP)","TDCXT R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|88@40|??@48","SP Dt"
+"TEST DATA CLASS (long BFP)","TCDB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|17@40|??@48","Db"
+"TEST DATA CLASS (long DFP)","TDCDT R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|84@40|??@48","Dt"
+"TEST DATA CLASS (short BFP)","TCEB R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|16@40|??@48","Db"
+"TEST DATA CLASS (short DFP)","TDCET R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|80@40|??@48","Dt"
+"TEST DATA GROUP (extended DFP)","TDGXT R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|89@40|??@48","SP Dt"
+"TEST DATA GROUP (long DFP)","TDGDT R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|85@40|??@48","Dt"
+"TEST DATA GROUP (short DFP)","TDGET R1,D2(X2,B2)","237@0|R1@8|X2@12|B2@16|D2@20|//@32|81@40|??@48","Dt"
+"TEST DECIMAL","TP D1(L1,B1)","235@0|L1@8|//@12|B1@16|D1@20|//@32|192@40|??@48","B"
+"TEST PENDING EXTERNAL INTERRUPTION","TPEI R1,R2","47521@0|//@16|R1@24|R2@28|??@32","B9A1"
+"TEST PENDING INTERRUPTION","TPI D2(B2)","45622@0|B2@16|D2@20|??@32","ST"
+"TEST PROTECTION","TPROT D1(B1),D2(B2)","58625@0|B1@16|D1@20|B2@32|D2@36|??@48","SO"
+"TEST SUBCHANNEL","TSCH D2(B2)","45621@0|B2@16|D2@20|??@32","SP OP"
+"TEST UNDER MASK","TM D1(B1),I2","145@0|I2@8|B1@16|D1@20|??@32","B"
+"TEST UNDER MASK","TMY D1(B1),I2","235@0|I2@8|B1@16|D1@20|81@40|??@48","B"
+"TEST UNDER MASK (high high)","TMHH R1,I2","167@0|R1@8|2@12|I2@16|??@32","A72"
+"TEST UNDER MASK (high low)","TMHL R1,I2","167@0|R1@8|3@12|I2@16|??@32","A73"
+"TEST UNDER MASK (low high)","TMLH R1,I2","167@0|R1@8|0@12|I2@16|??@32","A70"
+"TEST UNDER MASK (low low)","TMLL R1,I2","167@0|R1@8|1@12|I2@16|??@32","A71"
+"TEST UNDER MASK HIGH","TMH R1,I2","167@0|R1@8|0@12|I2@16|??@32","A70"
+"TEST UNDER MASK LOW","TML R1,I2","167@0|R1@8|1@12|I2@16|??@32","A71"
+"TRACE (32)","TRACE R1,R3,D2(B2)","153@0|R1@8|R3@12|B2@16|D2@20|??@32","SP"
+"TRACE (64)","TRACG R1,R3,D2(B2)","235@0|R1@8|R3@12|B2@16|D2@20|15@40|??@48","SP"
+"TRANSACTION ABORT","TABORT D2(B2)","45820@0|B2@16|D2@20|??@32","SP SO"
+"TRANSACTION BEGIN (constrained)","TBEGINC D1(B1),I2","58721@0|B1@16|D1@20|I2@32|??@48","SP SO"
+"TRANSACTION BEGIN (nonconstrained)","TBEGIN D1(B1),I2","58720@0|B1@16|D1@20|I2@32|??@48","SP SO"
+"TRANSACTION END","TEND","45816@0|//@16|??@32","SO"
+"TRANSLATE","TR D1(L1,B1),D2(B2)","220@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"TRANSLATE AND TEST","TRT D1(L1,B1),D2(B2)","221@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","B"
+"TRANSLATE AND TEST EXTENDED","TRTE R1,R2,M3","47551@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"TRANSLATE AND TEST REVERSE","TRTR D1(L1,B1),D2(B2)","208@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","B"
+"TRANSLATE AND TEST REVERSE EXTENDED","TRTRE R1,R2,M3","47549@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"TRANSLATE EXTENDED","TRE R1,R2","45733@0|//@16|R1@24|R2@28|??@32","SP IC"
+"TRANSLATE ONE TO ONE","TROO R1,R2,M3","47507@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"TRANSLATE ONE TO TWO","TROT R1,R2,M3","47506@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"TRANSLATE TWO TO ONE","TRTO R1,R2,M3","47505@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"TRANSLATE TWO TO TWO","TRTT R1,R2,M3","47504@0|M3@16|//@20|R1@24|R2@28|??@32","SP IC"
+"TRAP","TRAP2","511@0|??@16","SO"
+"TRAP","TRAP4 D2(B2)","45823@0|B2@16|D2@20|??@32","SO"
+"UNPACK","UNPK D1(L1,B1),D2(L2,B2)","243@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","ST"
+"UNPACK ASCII","UNPKA D1(L1,B1),D2(B2)","234@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","SP"
+"UNPACK UNICODE","UNPKU D1(L1,B1),D2(B2)","226@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48","SP"
+"UPDATE TREE","UPT","258@0|??@16","SP II"
+"VECTOR ADD","VA V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|243@40|??@48","SP Dv"
+"VECTOR ADD COMPUTE CARRY","VACC V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|241@40|??@48","SP Dv"
+"VECTOR ADD DECIMAL","VAP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|113@40|??@48","SP Dv"
+"VECTOR ADD WITH CARRY","VAC V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|187@40|??@48","SP Dv"
+"VECTOR ADD WITH CARRY COMPUTE CARRY","VACCC V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|185@40|??@48","SP Dv"
+"VECTOR AND","VN V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|104@40|??@48","Dv"
+"VECTOR AND WITH COMPLEMENT","VNC V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|105@40|??@48","Dv"
+"VECTOR AVERAGE","VAVG V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|242@40|??@48","SP Dv"
+"VECTOR AVERAGE LOGICAL","VAVGL V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|240@40|??@48","SP Dv"
+"VECTOR BIT PERMUTE","VBPERM V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|133@40|??@48","Dv"
+"VECTOR CHECKSUM","VCKSM V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|102@40|??@48","Dv"
+"VECTOR COMPARE DECIMAL","VCP V1,V2,M3","230@0|//@8|V1@12|V2@16|//@20|M3@24|//@28|RXB@36|119@40|??@48","Dv"
+"VECTOR COMPARE EQUAL","VCEQ V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|248@40|??@48","SP Dv"
+"VECTOR COMPARE HIGH","VCH V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|251@40|??@48","SP Dv"
+"VECTOR COMPARE HIGH LOGICAL","VCHL V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|249@40|??@48","SP Dv"
+"VECTOR CONVERT HFP TO SCALED DECIMAL","VCSPH V1,V2,V3,M4","230@0|V1@8|V2@12|V3@16|//@20|M4@24|//@28|RXB@36|125@40|??@48","Dv"
+"VECTOR CONVERT TO BINARY","VCVB R1,V2,M3,M4","230@0|R1@8|V2@12|//@16|M3@24|M4@28|//@32|RXB@36|80@40|??@48","Dv"
+"VECTOR CONVERT TO BINARY","VCVBG R1,V2,M3,M4","230@0|R1@8|V2@12|//@16|M3@24|M4@28|//@32|RXB@36|82@40|??@48","Dv"
+"VECTOR CONVERT TO DECIMAL","VCVD V1,R2,I3,M4","230@0|V1@8|R2@12|//@16|M4@24|I3@28|RXB@36|88@40|??@48","SP Dv"
+"VECTOR CONVERT TO DECIMAL","VCVDG V1,R2,I3,M4","230@0|V1@8|R2@12|//@16|M4@24|I3@28|RXB@36|90@40|??@48","SP Dv"
+"VECTOR COUNT LEADING ZERO DIGITS","VCLZDP V1,V2,M3","230@0|V1@8|V2@12|//@16|M3@24|//@28|RXB@36|81@40|??@48","Dv"
+"VECTOR COUNT LEADING ZEROS","VCLZ V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|83@40|??@48","SP Dv"
+"VECTOR COUNT TRAILING ZEROS","VCTZ V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|82@40|??@48","SP Dv"
+"VECTOR DIVIDE DECIMAL","VDP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|122@40|??@48","SP Dv"
+"VECTOR ELEMENT COMPARE","VEC V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|219@40|??@48","SP Dv"
+"VECTOR ELEMENT COMPARE LOGICAL","VECL V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|217@40|??@48","SP Dv"
+"VECTORELEMENTROTATEANDINSERTUNDER MASK","VERIM V1,V2,V3,I4,M5","231@0|V1@8|V2@12|V3@16|//@20|I4@24|M5@32|RXB@36|114@40|??@48","SP Dv"
+"VECTOR ELEMENT ROTATE LEFT LOGICAL","VERLL V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|51@40|??@48","SP Dv"
+"VECTOR ELEMENT ROTATE LEFT LOGICAL","VERLLV V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|115@40|??@48","SP Dv"
+"VECTOR ELEMENT SHIFT LEFT","VESLV V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|112@40|??@48","SP Dv"
+"VECTOR ELEMENT SHIFT LEFT","VESL V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|48@40|??@48","SP Dv"
+"VECTOR ELEMENT SHIFT RIGHT ARITHMETIC","VESRA V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|58@40|??@48","SP Dv"
+"VECTOR ELEMENT SHIFT RIGHT ARITHMETIC","VESRAV V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|122@40|??@48","SP Dv"
+"VECTOR ELEMENT SHIFT RIGHT LOGICAL","VESRL V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|56@40|??@48","SP Dv"
+"VECTOR ELEMENT SHIFT RIGHT LOGICAL","VESRLV V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|120@40|??@48","SP Dv"
+"VECTOR EXCLUSIVE OR","VX V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|109@40|??@48","Dv"
+"VECTOR FIND ANY ELEMENT EQUAL","VFAE V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|130@40|??@48","SP Dv"
+"VECTOR FIND ELEMENT EQUAL","VFEE V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|128@40|??@48","SP Dv"
+"VECTOR FIND ELEMENT NOT EQUAL","VFENE V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|129@40|??@48","SP Dv"
+"VECTOR FP ADD","VFA V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|227@40|??@48","SP Dv"
+"VECTOR FP COMPARE AND SIGNAL SCALAR","WFK V1,V2,M3,M4","231@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|202@40|??@48","SP Dv"
+"VECTOR FP COMPARE EQUAL","VFCE V1,V2,V3,M4,M5,M6","231@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|232@40|??@48","SP Dv"
+"VECTOR FP COMPARE HIGH","VFCH V1,V2,V3,M4,M5,M6","231@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|235@40|??@48","SP Dv"
+"VECTOR FP COMPARE HIGH OR EQUAL","VFCHE V1,V2,V3,M4,M5,M6","231@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|234@40|??@48","SP Dv"
+"VECTOR FP COMPARE SCALAR","WFC V1,V2,M3,M4","231@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|203@40|??@48","SP Dv"
+"VECTOR FP CONVERT AND LENGTHEN FROM NNP HIGH","VCLFNH V1,V2,M3,M4","230@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|86@40|??@48","Dv"
+"VECTOR FP CONVERT AND LENGTHEN FROM NNP LOW","VCLFNL V1,V2,M3,M4","230@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|94@40|??@48","Dv"
+"VECTOR FP CONVERT AND ROUND TO NNP","VCRNF V1,V2,V3,M4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|117@40|??@48","Dv"
+"VECTOR FP CONVERT FROM FIXED","VCFPS V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|195@40|??@48","SP Dv"
+"VECTOR FP CONVERT FROM FIXED 64-BIT","VCDG V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|195@40|??@48","SP Dv"
+"VECTOR FP CONVERT FROM LOGICAL","VCFPL V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|193@40|??@48","SP Dv"
+"VECTOR FP CONVERT FROM LOGICAL 64-BIT","VCDLG V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|193@40|??@48","SP Dv"
+"VECTOR FP CONVERT FROM NNP","VCFN V1,V2,M3,M4","230@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|93@40|??@48","Dv"
+"VECTOR FP CONVERT TO FIXED","VCSFP V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|194@40|??@48","SP Dv"
+"VECTOR FP CONVERT TO FIXED 64-BIT","VCGD V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|194@40|??@48","SP Dv"
+"VECTOR FP CONVERT TO LOGICAL","VCLFP V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|192@40|??@48","SP Dv"
+"VECTOR FP CONVERT TO LOGICAL 64-BIT","VCLGD V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|192@40|??@48","SP Dv"
+"VECTOR FP CONVERT TO NNP","VCNF V1,V2,M3,M4","230@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|85@40|??@48","Dv"
+"VECTOR FP DIVIDE","VFD V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|229@40|??@48","SP Dv"
+"VECTOR FP LOAD LENGTHENED","VFLL V1,V2,M3,M4","231@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|196@40|??@48","SP Dv"
+"VECTOR FP LOAD ROUNDED","VFLR V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|197@40|??@48","SP Dv"
+"VECTOR FP MAXIMUM","VFMAX V1,V2,V3,M4,M5,M6","231@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|239@40|??@48","SP Dv"
+"VECTOR FP MINIMUM","VFMIN V1,V2,V3,M4,M5,M6","231@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|238@40|??@48","SP Dv"
+"VECTOR FP MULTIPLY","VFM V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|231@40|??@48","SP Dv"
+"VECTOR FP MULTIPLY AND ADD","VFMA V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M6@20|//@24|M5@28|V4@32|RXB@36|143@40|??@48","SP Dv"
+"VECTOR FP MULTIPLY AND SUBTRACT","VFMS V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M6@20|//@24|M5@28|V4@32|RXB@36|142@40|??@48","SP Dv"
+"VECTOR FP NEGATIVE MULTIPLY AND ADD","VFNMA V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M6@20|//@24|M5@28|V4@32|RXB@36|159@40|??@48","SP Dv"
+"VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT","VFNMS V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M6@20|//@24|M5@28|V4@32|RXB@36|158@40|??@48","SP Dv"
+"VECTOR FP PERFORM SIGN OPERATION","VFPSO V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|204@40|??@48","SP Dv"
+"VECTOR FP SQUARE ROOT","VFSQ V1,V2,M3,M4","231@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|206@40|??@48","SP Dv"
+"VECTOR FP SUBTRACT","VFS V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|226@40|??@48","SP Dv"
+"VECTOR FP TEST DATA CLASS IMMEDIATE","VFTCI V1,V2,I3,M4,M5","231@0|V1@8|V2@12|I3@16|M5@28|M4@32|RXB@36|74@40|??@48","SP Dv"
+"VECTOR GALOIS FIELD MULTIPLY SUM","VGFM V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|180@40|??@48","SP Dv"
+"VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE","VGFMA V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|188@40|??@48","SP Dv"
+"VECTOR GATHER ELEMENT (32)","VGEF V1,D2(V2,B2),M3","231@0|V1@8|V2@12|B2@16|D2@20|M3@32|RXB@36|19@40|??@48","SP Dv"
+"VECTOR GATHER ELEMENT (64)","VGEG V1,D2(V2,B2),M3","231@0|V1@8|V2@12|B2@16|D2@20|M3@32|RXB@36|18@40|??@48","SP Dv"
+"VECTOR GENERATE BYTE MASK","VGBM V1,I2","231@0|V1@8|//@12|I2@16|//@32|RXB@36|68@40|??@48","Dv"
+"VECTOR GENERATE MASK","VGM V1,I2,I3,M4","231@0|V1@8|//@12|I2@16|I3@24|M4@32|RXB@36|70@40|??@48","SP Dv"
+"VECTOR ISOLATE STRING","VISTR V1,V2,M3,M5","231@0|V1@8|V2@12|//@16|M5@24|//@28|M3@32|RXB@36|92@40|??@48","SP Dv"
+"VECTOR LOAD","VL V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|6@40|??@48","Dv"
+"VECTOR LOAD","VLR V1,V2","231@0|V1@8|V2@12|//@16|RXB@36|86@40|??@48","Dv"
+"VECTOR LOAD AND REPLICATE","VLREP V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|5@40|??@48","SP Dv"
+"VECTOR LOAD BYTE REVERSED ELEMENT (16)","VLEBRH V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|1@40|??@48","SP Dv"
+"VECTOR LOAD BYTE REVERSED ELEMENT (32)","VLEBRF V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|3@40|??@48","SP Dv"
+"VECTOR LOAD BYTE REVERSED ELEMENT (64)","VLEBRG V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|2@40|??@48","SP Dv"
+"VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE","VLBRREP V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|5@40|??@48","SP Dv"
+"VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO","VLLEBRZ V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|4@40|??@48","SP Dv"
+"VECTOR LOAD BYTE REVERSED ELEMENTS","VLBR V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|6@40|??@48","SP Dv"
+"VECTOR LOAD COMPLEMENT","VLC V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|222@40|??@48","SP Dv"
+"VECTOR LOAD ELEMENT (16)","VLEH V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|1@40|??@48","SP Dv"
+"VECTOR LOAD ELEMENT (32)","VLEF V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|3@40|??@48","SP Dv"
+"VECTOR LOAD ELEMENT (64)","VLEG V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|2@40|??@48","SP Dv"
+"VECTOR LOAD ELEMENT (8)","VLEB V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|0@40|??@48","SP Dv"
+"VECTOR LOAD ELEMENT IMMEDIATE (16)","VLEIH V1,I2,M3","231@0|V1@8|//@12|I2@16|M3@32|RXB@36|65@40|??@48","SP Dv"
+"VECTOR LOAD ELEMENT IMMEDIATE (32)","VLEIF V1,I2,M3","231@0|V1@8|//@12|I2@16|M3@32|RXB@36|67@40|??@48","SP Dv"
+"VECTOR LOAD ELEMENT IMMEDIATE (64)","VLEIG V1,I2,M3","231@0|V1@8|//@12|I2@16|M3@32|RXB@36|66@40|??@48","SP Dv"
+"VECTOR LOAD ELEMENT IMMEDIATE (8)","VLEIB V1,I2,M3","231@0|V1@8|//@12|I2@16|M3@32|RXB@36|64@40|??@48","SP Dv"
+"VECTOR LOAD ELEMENTS REVERSED","VLER V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|7@40|??@48","SP Dv"
+"VECTOR LOAD FP INTEGER","VFI V1,V2,M3,M4,M5","231@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|199@40|??@48","SP Dv"
+"VECTOR LOAD GR FROM VR ELEMENT","VLGV R1,V3,D2(B2),M4","231@0|R1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|33@40|??@48","SP Dv"
+"VECTOR LOAD IMMEDIATE DECIMAL","VLIP V1,I2,I3","230@0|V1@8|//@12|I2@16|I3@32|RXB@36|73@40|??@48","Dv"
+"VECTOR LOAD LOGICAL ELEMENT AND ZERO","VLLEZ V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|4@40|??@48","SP Dv"
+"VECTOR LOAD MULTIPLE","VLM V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|54@40|??@48","SP Dv"
+"VECTOR LOAD POSITIVE","VLP V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|223@40|??@48","SP Dv"
+"VECTOR LOAD RIGHTMOST WITH LENGTH","VLRL V1,D2(B2),I3","230@0|I3@8|B2@16|D2@20|V1@32|RXB@36|53@40|??@48","SP Dv"
+"VECTOR LOAD RIGHTMOST WITH LENGTH","VLRLR V1,R3,D2(B2)","230@0|//@8|R3@12|B2@16|D2@20|V1@32|RXB@36|55@40|??@48","Dv"
+"VECTOR LOAD TO BLOCK BOUNDARY","VLBB V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|7@40|??@48","SP Dv"
+"VECTOR LOAD VR ELEMENT FROM GR","VLVG V1,R3,D2(B2),M4","231@0|V1@8|R3@12|B2@16|D2@20|M4@32|RXB@36|34@40|??@48","SP Dv"
+"VECTOR LOAD VR FROM GRS DISJOINT","VLVGP V1,R2,R3","231@0|V1@8|R2@12|R3@16|//@20|RXB@36|98@40|??@48","Dv"
+"VECTOR LOAD WITH LENGTH","VLL V1,R3,D2(B2)","231@0|V1@8|R3@12|B2@16|D2@20|//@32|RXB@36|55@40|??@48","Dv"
+"VECTOR MAXIMUM","VMX V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|255@40|??@48","SP Dv"
+"VECTOR MAXIMUM LOGICAL","VMXL V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|253@40|??@48","SP Dv"
+"VECTOR MERGE HIGH","VMRH V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|97@40|??@48","SP Dv"
+"VECTOR MERGE LOW","VMRL V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|96@40|??@48","SP Dv"
+"VECTOR MINIMUM","VMN V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|254@40|??@48","SP Dv"
+"VECTOR MINIMUM LOGICAL","VMNL V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|252@40|??@48","SP Dv"
+"VECTOR MULTIPLY AND ADD EVEN","VMAE V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|174@40|??@48","SP Dv"
+"VECTOR MULTIPLY AND ADD HIGH","VMAH V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|171@40|??@48","SP Dv"
+"VECTOR MULTIPLY AND ADD LOGICAL EVEN","VMALE V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|172@40|??@48","SP Dv"
+"VECTOR MULTIPLY AND ADD LOGICAL HIGH","VMALH V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|169@40|??@48","SP Dv"
+"VECTOR MULTIPLY AND ADD LOGICAL ODD","VMALO V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|173@40|??@48","SP Dv"
+"VECTOR MULTIPLY AND ADD LOW","VMAL V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|170@40|??@48","SP Dv"
+"VECTOR MULTIPLY AND ADD ODD","VMAO V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|175@40|??@48","SP Dv"
+"VECTOR MULTIPLY AND SHIFT DECIMAL","VMSP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|121@40|??@48","SP Dv"
+"VECTOR MULTIPLY DECIMAL","VMP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|120@40|??@48","SP Dv"
+"VECTOR MULTIPLY EVEN","VME V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|166@40|??@48","SP Dv"
+"VECTOR MULTIPLY HIGH","VMH V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|163@40|??@48","SP Dv"
+"VECTOR MULTIPLY LOGICAL EVEN","VMLE V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|164@40|??@48","SP Dv"
+"VECTOR MULTIPLY LOGICAL HIGH","VMLH V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|161@40|??@48","SP Dv"
+"VECTOR MULTIPLY LOGICAL ODD","VMLO V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|165@40|??@48","SP Dv"
+"VECTOR MULTIPLY LOW","VML V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|162@40|??@48","SP Dv"
+"VECTOR MULTIPLY ODD","VMO V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|167@40|??@48","SP Dv"
+"VECTOR MULTIPLY SUM LOGICAL","VMSL V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M5@20|M6@24|//@28|V4@32|RXB@36|184@40|??@48","SP Dv"
+"VECTOR NAND","VNN V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|110@40|??@48","DV"
+"VECTOR NOR","VNO V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|107@40|??@48","Dv"
+"VECTOR NOT EXCLUSIVE OR","VNX V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|108@40|??@48","Dv"
+"VECTOR OR","VO V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|106@40|??@48","Dv"
+"VECTOR OR WITH COMPLEMENT","VOC V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|111@40|??@48","Dv"
+"VECTOR PACK","VPK V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|148@40|??@48","SP Dv"
+"VECTOR PACK LOGICAL SATURATE","VPKLS V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|149@40|??@48","SP Dv"
+"VECTOR PACK SATURATE","VPKS V1,V2,V3,M4,M5","231@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|151@40|??@48","SP Dv"
+"VECTOR PACK ZONED","VPKZ V1,D2(B2),I3","230@0|I3@8|B2@16|D2@20|V1@32|RXB@36|52@40|??@48","SP Dv"
+"VECTOR PACK ZONED REGISTER","VPKZR V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|112@40|??@48","SP Dv"
+"VECTOR PERFORM SIGN OPERATION DECIMAL","VPSOP V1,V2,I3,I4,M5","230@0|V1@8|V2@12|I4@16|M5@24|I3@28|RXB@36|91@40|??@48","SP Dv"
+"VECTOR PERMUTE","VPERM V1,V2,V3,V4","231@0|V1@8|V2@12|V3@16|//@20|V4@32|RXB@36|140@40|??@48","Dv"
+"VECTOR PERMUTE DOUBLEWORD IMMEDIATE","VPDI V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|132@40|??@48","Dv"
+"VECTOR POPULATION COUNT","VPOPCT V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|80@40|??@48","SP Dv"
+"VECTOR REMAINDER DECIMAL","VRP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|123@40|??@48","SP Dv"
+"VECTOR REPLICATE","VREP V1,V3,I2,M4","231@0|V1@8|V3@12|I2@16|M4@32|RXB@36|77@40|??@48","SP Dv"
+"VECTOR REPLICATE IMMEDIATE","VREPI V1,I2,M3","231@0|V1@8|//@12|I2@16|M3@32|RXB@36|69@40|??@48","SP Dv"
+"VECTOR SCATTER ELEMENT (32)","VSCEF V1,D2(V2,B2),M3","231@0|V1@8|V2@12|B2@16|D2@20|M3@32|RXB@36|27@40|??@48","SP Dv"
+"VECTOR SCATTER ELEMENT (64)","VSCEG V1,D2(V2,B2),M3","231@0|V1@8|V2@12|B2@16|D2@20|M3@32|RXB@36|26@40|??@48","SP Dv"
+"VECTOR SELECT","VSEL V1,V2,V3,V4","231@0|V1@8|V2@12|V3@16|//@20|V4@32|RXB@36|141@40|??@48","Dv"
+"VECTOR SHIFT AND DIVIDE DECIMAL","VSDP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|126@40|??@48","SP Dv"
+"VECTOR SHIFT AND ROUND DECIMAL","VSRP V1,V2,I3,I4,M5","230@0|V1@8|V2@12|I4@16|M5@24|I3@28|RXB@36|89@40|??@48","SP Dv"
+"VECTOR SHIFT AND ROUND DECIMAL REGISTER","VSRPR V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|114@40|??@48","SP Dv"
+"VECTOR SHIFT LEFT","VSL V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|116@40|??@48","Dv"
+"VECTOR SHIFT LEFT BY BYTE","VSLB V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|117@40|??@48","Dv"
+"VECTOR SHIFT LEFT DOUBLE BY BIT","VSLD V1,V2,V3,I4","231@0|V1@8|V2@12|V3@16|//@20|I4@24|//@32|RXB@36|134@40|??@48","SP Dv"
+"VECTOR SHIFT LEFT DOUBLE BY BYTE","VSLDB V1,V2,V3,I4","231@0|V1@8|V2@12|V3@16|//@20|I4@24|//@32|RXB@36|119@40|??@48","Dv"
+"VECTOR SHIFT RIGHT ARITHMETIC","VSRA V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|126@40|??@48","Dv"
+"VECTOR SHIFT RIGHT ARITHMETIC BY BYTE","VSRAB V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|127@40|??@48","Dv"
+"VECTOR SHIFT RIGHT DOUBLE BY BIT","VSRD V1,V2,V3,I4","231@0|V1@8|V2@12|V3@16|//@20|I4@24|//@32|RXB@36|135@40|??@48","SP Dv"
+"VECTOR SHIFT RIGHT LOGICAL","VSRL V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|124@40|??@48","Dv"
+"VECTOR SHIFT RIGHT LOGICAL BY BYTE","VSRLB V1,V2,V3","231@0|V1@8|V2@12|V3@16|//@20|RXB@36|125@40|??@48","Dv"
+"VECTOR SIGN EXTEND TO DOUBLEWORD","VSEG V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|95@40|??@48","SP Dv"
+"VECTOR STORE","VST V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|14@40|??@48","Dv"
+"VECTOR STORE BYTE REVERSED ELEMENT(16)","VSTEBRH V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|9@40|??@48","SP Dv"
+"VECTOR STORE BYTE REVERSED ELEMENT(32)","VSTEBRF V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|11@40|??@48","SP Dv"
+"VECTOR STORE BYTE REVERSED ELEMENT(64)","VSTEBRG V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|10@40|??@48","SP Dv"
+"VECTOR STORE BYTE REVERSED ELEMENTS","VSTBR V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|14@40|??@48","SP Dv"
+"VECTOR STORE ELEMENT (16)","VSTEH V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|9@40|??@48","SP Dv"
+"VECTOR STORE ELEMENT (32)","VSTEF V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|11@40|??@48","SP Dv"
+"VECTOR STORE ELEMENT (64)","VSTEG V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|10@40|??@48","SP Dv"
+"VECTOR STORE ELEMENT (8)","VSTEB V1,D2(X2,B2),M3","231@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|8@40|??@48","SP Dv"
+"VECTOR STORE ELEMENTS REVERSED","VSTER V1,D2(X2,B2),M3","230@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|15@40|??@48","SP Dv"
+"VECTOR STORE MULTIPLE","VSTM V1,V3,D2(B2),M4","231@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|62@40|??@48","SP Dv"
+"VECTOR STORE RIGHTMOST WITH LENGTH","VSTRL V1,D2(B2),I3","230@0|I3@8|B2@16|D2@20|V1@32|RXB@36|61@40|??@48","SP Dv"
+"VECTOR STORE RIGHTMOST WITH LENGTH","VSTRLR V1,R3,D2(B2)","230@0|//@8|R3@12|B2@16|D2@20|V1@32|RXB@36|63@40|??@48","Dv"
+"VECTOR STORE WITH LENGTH","VSTL V1,R3,D2(B2)","231@0|V1@8|R3@12|B2@16|D2@20|//@32|RXB@36|63@40|??@48","Dv"
+"VECTOR STRING RANGE COMPARE","VSTRC V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M5@20|M6@24|//@28|V4@32|RXB@36|138@40|??@48","SP Dv"
+"VECTOR STRING SEARCH","VSTRS V1,V2,V3,V4,M5,M6","231@0|V1@8|V2@12|V3@16|M5@20|M6@24|//@28|V4@32|RXB@36|139@40|??@48","SP Dv"
+"VECTOR SUBTRACT","VS V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|247@40|??@48","SP Dv"
+"VECTOR SUBTRACT COMPUTE BORROW INDICATION","VSCBI V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|245@40|??@48","SP Dv"
+"VECTOR SUBTRACT DECIMAL","VSP V1,V2,V3,I4,M5","230@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|115@40|??@48","SP Dv"
+"VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION","VSBCBI V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|189@40|??@48","SP Dv"
+"VECTOR SUBTRACT WITH BORROW INDICATION","VSBI V1,V2,V3,V4,M5","231@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|191@40|??@48","SP Dv"
+"VECTOR SUM ACROSS DOUBLEWORD","VSUMG V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|101@40|??@48","SP Dv"
+"VECTOR SUM ACROSS QUADWORD","VSUMQ V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|103@40|??@48","SP Dv"
+"VECTOR SUM ACROSS WORD","VSUM V1,V2,V3,M4","231@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|100@40|??@48","SP Dv"
+"VECTOR TEST DECIMAL","VTP V1","230@0|//@8|V1@12|//@16|RXB@36|95@40|??@48","Dv"
+"VECTOR TEST UNDER MASK","VTM V1,V2","231@0|V1@8|V2@12|//@16|RXB@36|216@40|??@48","Dv"
+"VECTOR UNPACK HIGH","VUPH V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|215@40|??@48","SP Dv"
+"VECTOR UNPACK LOGICAL HIGH","VUPLH V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|213@40|??@48","SP Dv"
+"VECTOR UNPACK LOGICAL LOW","VUPLL V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|212@40|??@48","SP Dv"
+"VECTOR UNPACK LOW","VUPL V1,V2,M3","231@0|V1@8|V2@12|//@16|M3@32|RXB@36|214@40|??@48","SP Dv"
+"VECTOR UNPACK ZONED","VUPKZ V1,D2(B2),I3","230@0|I3@8|B2@16|D2@20|V1@32|RXB@36|60@40|??@48","SP Dv"
+"VECTOR UNPACK ZONED HIGH","VUPKZH V1,V2,M3","230@0|V1@8|V2@12|//@16|M3@24|//@28|RXB@36|84@40|??@48","Dv"
+"VECTOR UNPACK ZONED LOW","VUPKZL V1,V2,M3","230@0|V1@8|V2@12|//@16|M3@24|//@28|RXB@36|92@40|??@48","Dv"
+"ZERO AND ADD","ZAP D1(L1,B1),D2(L2,B2)","248@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48","Dg"
diff --git a/s390x/s390xasm/Makefile b/s390x/s390xasm/Makefile
new file mode 100644
index 00000000..6d02dac2
--- /dev/null
+++ b/s390x/s390xasm/Makefile
@@ -0,0 +1,2 @@
+tables.go: ../s390xmap/map.go ../s390x.csv
+	go run ../s390xmap/map.go -fmt=decoder ../s390x.csv >_tables.go && gofmt _tables.go >tables.go && rm _tables.go
diff --git a/s390x/s390xasm/decode.go b/s390x/s390xasm/decode.go
new file mode 100644
index 00000000..823fe591
--- /dev/null
+++ b/s390x/s390xasm/decode.go
@@ -0,0 +1,241 @@
+// Copyright 2024 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package s390xasm
+
+import (
+	"encoding/binary"
+	"fmt"
+)
+
+// instFormat is a decoding rule for one specific instruction form.
+// An instruction ins matches the rule if ins&Mask == Value.
+// DontCare bits are mainly used for finding the same instruction
+// name differing with the number of argument fields.
+// The Args are stored in the same order as the instruction manual.
+type instFormat struct {
+	Op       Op
+	Mask     uint64
+	Value    uint64
+	DontCare uint64
+	Args     [8]*argField
+}
+
+// argField indicate how to decode an argument to an instruction.
+// First parse the value from the BitFields, shift it left by Shift
+// bits to get the actual numerical value.
+type argField struct {
+	Type  ArgType
+	flags uint16
+	BitField
+}
+
+// Parse parses the Arg out from the given binary instruction i.
+func (a argField) Parse(i uint64) Arg {
+	switch a.Type {
+	default:
+		return nil
+	case TypeUnknown:
+		return nil
+	case TypeReg:
+		return R0 + Reg(a.BitField.Parse(i))
+	case TypeFPReg:
+		return F0 + Reg(a.BitField.Parse(i))
+	case TypeCReg:
+		return C0 + Reg(a.BitField.Parse(i))
+	case TypeACReg:
+		return A0 + Reg(a.BitField.Parse(i))
+	case TypeBaseReg:
+		return B0 + Base(a.BitField.Parse(i))
+	case TypeIndexReg:
+		return X0 + Index(a.BitField.Parse(i))
+	case TypeDispUnsigned:
+		return Disp12(a.BitField.Parse(i))
+	case TypeDispSigned20:
+		return Disp20(a.BitField.ParseSigned(i))
+	case TypeVecReg:
+		m := i >> 24 // Handling RXB field(bits 36 to 39)
+		if ((m>>3)&0x1 == 1) && (a.BitField.Offs == 8) {
+			return V0 + VReg(a.BitField.Parse(i)) + VReg(16)
+		} else if ((m>>2)&0x1 == 1) && (a.BitField.Offs == 12) {
+			return V0 + VReg(a.BitField.Parse(i)) + VReg(16)
+		} else if ((m>>1)&0x1 == 1) && (a.BitField.Offs == 16) {
+			return V0 + VReg(a.BitField.Parse(i)) + VReg(16)
+		} else if ((m)&0x1 == 1) && (a.BitField.Offs == 32) {
+			return V0 + VReg(a.BitField.Parse(i)) + VReg(16)
+		} else {
+			return V0 + VReg(a.BitField.Parse(i))
+		}
+	case TypeImmSigned8:
+		return Sign8(a.BitField.ParseSigned(i))
+	case TypeImmSigned16:
+		return Sign16(a.BitField.ParseSigned(i))
+	case TypeImmSigned32:
+		return Sign32(a.BitField.ParseSigned(i))
+	case TypeImmUnsigned:
+		return Imm(a.BitField.Parse(i))
+	case TypeRegImSigned12:
+		return RegIm12(a.BitField.ParseSigned(i))
+	case TypeRegImSigned16:
+		return RegIm16(a.BitField.ParseSigned(i))
+	case TypeRegImSigned24:
+		return RegIm24(a.BitField.ParseSigned(i))
+	case TypeRegImSigned32:
+		return RegIm32(a.BitField.ParseSigned(i))
+	case TypeMask:
+		return Mask(a.BitField.Parse(i))
+	case TypeLen:
+		return Len(a.BitField.Parse(i))
+	}
+}
+
+type ArgType int8
+
+const (
+	TypeUnknown       ArgType = iota
+	TypeReg                   // integer register
+	TypeFPReg                 // floating point register
+	TypeACReg                 // access register
+	TypeCReg                  // control register
+	TypeVecReg                // vector register
+	TypeImmUnsigned           // unsigned immediate/flag/mask, this is the catch-all type
+	TypeImmSigned8            // Signed 8-bit Immdediate
+	TypeImmSigned16           // Signed 16-bit Immdediate
+	TypeImmSigned32           // Signed 32-bit Immdediate
+	TypeBaseReg               // Base Register for accessing memory
+	TypeIndexReg              // Index Register
+	TypeDispUnsigned          // Displacement 12-bit unsigned for memory address
+	TypeDispSigned20          // Displacement 20-bit signed for memory address
+	TypeRegImSigned12         // RegisterImmediate 12-bit signed data
+	TypeRegImSigned16         // RegisterImmediate 16-bit signed data
+	TypeRegImSigned24         // RegisterImmediate 24-bit signed data
+	TypeRegImSigned32         // RegisterImmediate 32-bit signed data
+	TypeMask                  // 4-bit Mask
+	TypeLen                   // Length of Memory Operand
+	TypeLast
+)
+
+func (t ArgType) String() string {
+	switch t {
+	default:
+		return fmt.Sprintf("ArgType(%d)", int(t))
+	case TypeUnknown:
+		return "Unknown"
+	case TypeReg:
+		return "Reg"
+	case TypeFPReg:
+		return "FPReg"
+	case TypeACReg:
+		return "ACReg"
+	case TypeCReg:
+		return "CReg"
+	case TypeDispUnsigned:
+		return "DispUnsigned"
+	case TypeDispSigned20:
+		return "DispSigned20"
+	case TypeBaseReg:
+		return "BaseReg"
+	case TypeIndexReg:
+		return "IndexReg"
+	case TypeVecReg:
+		return "VecReg"
+	case TypeImmSigned8:
+		return "ImmSigned8"
+	case TypeImmSigned16:
+		return "ImmSigned16"
+	case TypeImmSigned32:
+		return "ImmSigned32"
+	case TypeImmUnsigned:
+		return "ImmUnsigned"
+	case TypeRegImSigned12:
+		return "RegImSigned12"
+	case TypeRegImSigned16:
+		return "RegImSigned16"
+	case TypeRegImSigned24:
+		return "RegImSigned24"
+	case TypeRegImSigned32:
+		return "RegImSigned32"
+	case TypeMask:
+		return "Mask"
+	case TypeLen:
+		return "Len"
+	}
+}
+
+func (t ArgType) GoString() string {
+	s := t.String()
+	if t > 0 && t < TypeLast {
+		return "Type" + s
+	}
+	return s
+}
+
+var (
+	// Errors
+	errShort   = fmt.Errorf("truncated instruction")
+	errUnknown = fmt.Errorf("unknown instruction")
+)
+
+var decoderCover []bool
+
+// Decode decodes the leading bytes in src as a single instruction using
+// byte order ord.
+func Decode(src []byte) (inst Inst, err error) {
+	if len(src) < 2 {
+		return inst, errShort
+	}
+	if decoderCover == nil {
+		decoderCover = make([]bool, len(instFormats))
+	}
+	bit_check := binary.BigEndian.Uint16(src[:2])
+	bit_check = bit_check >> 14
+	l := int(0)
+	if (bit_check & 0x03) == 0 {
+		l = 2
+	} else if bit_check&0x03 == 3 {
+		l = 6
+	} else if (bit_check&0x01 == 1) || (bit_check&0x02 == 2) {
+		l = 4
+	}
+	inst.Len = l
+	ui_extn := uint64(0)
+	switch l {
+	case 2:
+		ui_extn = uint64(binary.BigEndian.Uint16(src[:inst.Len]))
+		inst.Enc = ui_extn
+		ui_extn = ui_extn << 48
+	case 4:
+		ui_extn = uint64(binary.BigEndian.Uint32(src[:inst.Len]))
+		inst.Enc = ui_extn
+		ui_extn = ui_extn << 32
+	case 6:
+		u1 := binary.BigEndian.Uint32(src[:(inst.Len - 2)])
+		u2 := binary.BigEndian.Uint16(src[(inst.Len - 2):inst.Len])
+		ui_extn = uint64(u1)<<16 | uint64(u2)
+		ui_extn = ui_extn << 16
+		inst.Enc = ui_extn
+	default:
+		return inst, errShort
+	}
+	for _, iform := range instFormats {
+		if ui_extn&iform.Mask != iform.Value {
+			continue
+		}
+		if (iform.DontCare & ^(ui_extn)) != iform.DontCare {
+			continue
+		}
+		for j, argfield := range iform.Args {
+			if argfield == nil {
+				break
+			}
+			inst.Args[j] = argfield.Parse(ui_extn)
+		}
+		inst.Op = iform.Op
+		break
+	}
+	if inst.Op == 0 && inst.Enc != 0 {
+		return inst, errUnknown
+	}
+	return inst, nil
+}
diff --git a/s390x/s390xasm/decode_test.go b/s390x/s390xasm/decode_test.go
new file mode 100644
index 00000000..5ca0b741
--- /dev/null
+++ b/s390x/s390xasm/decode_test.go
@@ -0,0 +1,88 @@
+// Copyright 2024 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package s390xasm
+
+import (
+	"encoding/hex"
+	"io/ioutil"
+	"path"
+	"strings"
+	"testing"
+)
+
+func TestDecode(t *testing.T) {
+	files, err := ioutil.ReadDir("testdata")
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, f := range files {
+		if !strings.HasPrefix(f.Name(), "decode") {
+			continue
+		}
+		filename := path.Join("testdata", f.Name())
+		data, err := ioutil.ReadFile(filename)
+		if err != nil {
+			t.Fatal(err)
+		}
+		decode(data, t, filename)
+	}
+}
+
+// Provide a fake symbol to verify PCrel argument decoding.
+func symlookup(pc uint64) (string, uint64) {
+	foopc := uint64(0x100000)
+	if pc >= foopc && pc < foopc+0x10 {
+		return "foo", foopc
+	}
+	return "", 0
+}
+
+func decode(data []byte, t *testing.T, filename string) {
+	all := string(data)
+	// Simulate PC based on number of instructions found in the test file.
+	pc := uint64(0)
+	for strings.Contains(all, "\t\t") {
+		all = strings.Replace(all, "\t\t", "\t", -1)
+	}
+	for _, line := range strings.Split(all, "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.SplitN(line, "\t", 3)
+		i := strings.Index(f[0], "|")
+		if i < 0 {
+			t.Errorf("%s: parsing %q: missing | separator", filename, f[0])
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("%s: parsing %q: misaligned | separator", filename, f[0])
+		}
+		size := i / 2
+		code, err := hex.DecodeString(f[0][:i] + f[0][i+1:])
+		if err != nil {
+			t.Errorf("%s: parsing %q: %v", filename, f[0], err)
+			continue
+		}
+		syntax, asm := f[1], f[2]
+		inst, err := Decode(code)
+		var out string
+		if err != nil {
+			out = "error: " + err.Error()
+		} else {
+			switch syntax {
+			case "gnu":
+				out = GNUSyntax(inst, pc)
+			default:
+				t.Errorf("unknown syntax %q", syntax)
+				continue
+			}
+		}
+		pc += uint64(size)
+		if out != asm || inst.Len != size {
+			t.Errorf("%s: Decode(%s) [%s] = %s want %s", filename, f[0], syntax, out, asm)
+		}
+	}
+}
diff --git a/s390x/s390xasm/field.go b/s390x/s390xasm/field.go
new file mode 100644
index 00000000..e00415fc
--- /dev/null
+++ b/s390x/s390xasm/field.go
@@ -0,0 +1,98 @@
+// Copyright 2024 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package s390xasm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// A BitField is a bit-field in a 64-bit double word.
+// Bits are counted from 0 from the MSB to 63 as the LSB.
+type BitField struct {
+	Offs uint8 // the offset of the left-most bit.
+	Bits uint8 // length in bits.
+}
+
+func (b BitField) String() string {
+	if b.Bits > 1 {
+		return fmt.Sprintf("[%d:%d]", b.Offs, int(b.Offs+b.Bits)-1)
+	} else if b.Bits == 1 {
+		return fmt.Sprintf("[%d]", b.Offs)
+	} else {
+		return fmt.Sprintf("[%d, len=0]", b.Offs)
+	}
+}
+
+// Parse extracts the bitfield b from i, and return it as an unsigned integer.
+// Parse will panic if b is invalid.
+func (b BitField) Parse(i uint64) uint64 {
+	if b.Bits > 64 || b.Bits == 0 || b.Offs > 63 || b.Offs+b.Bits > 64 {
+		panic(fmt.Sprintf("invalid bitfiled %v", b))
+	}
+	if b.Bits == 20 {
+		return ((((i >> (64 - b.Offs - b.Bits)) & ((1 << 8) - 1)) << 12) | ((i >> (64 - b.Offs - b.Bits + 8)) & 0xFFF))
+
+	} else {
+		return (i >> (64 - b.Offs - b.Bits)) & ((1 << b.Bits) - 1)
+	}
+}
+
+// ParseSigned extracts the bitfield b from i, and return it as a signed integer.
+// ParseSigned will panic if b is invalid.
+func (b BitField) ParseSigned(i uint64) int64 {
+	u := int64(b.Parse(i))
+	return u << (64 - b.Bits) >> (64 - b.Bits)
+}
+
+// BitFields is a series of BitFields representing a single number.
+type BitFields []BitField
+
+func (bs BitFields) String() string {
+	ss := make([]string, len(bs))
+	for i, bf := range bs {
+		ss[i] = bf.String()
+	}
+	return fmt.Sprintf("<%s>", strings.Join(ss, "|"))
+}
+
+func (bs *BitFields) Append(b BitField) {
+	*bs = append(*bs, b)
+}
+
+// parse extracts the bitfields from i, concatenate them and return the result
+// as an unsigned integer and the total length of all the bitfields.
+// parse will panic if any bitfield in b is invalid, but it doesn't check if
+// the sequence of bitfields is reasonable.
+func (bs BitFields) parse(i uint64) (u uint64, Bits uint8) {
+	for _, b := range bs {
+		u = (u << b.Bits) | uint64(b.Parse(i))
+		Bits += b.Bits
+	}
+	return u, Bits
+}
+
+// Parse extracts the bitfields from i, concatenate them and return the result
+// as an unsigned integer. Parse will panic if any bitfield in b is invalid.
+func (bs BitFields) Parse(i uint64) uint64 {
+	u, _ := bs.parse(i)
+	return u
+}
+
+// ParseSigned extracts the bitfields from i, concatenate them and return the result
+// as a signed integer. Parse will panic if any bitfield in b is invalid.
+func (bs BitFields) ParseSigned(i uint64) int64 {
+	u, l := bs.parse(i)
+	return int64(u) << (64 - l) >> (64 - l)
+}
+
+// Count the number of bits in the aggregate BitFields
+func (bs BitFields) NumBits() int {
+	num := 0
+	for _, b := range bs {
+		num += int(b.Bits)
+	}
+	return num
+}
diff --git a/s390x/s390xasm/gnu.go b/s390x/s390xasm/gnu.go
new file mode 100644
index 00000000..5755b354
--- /dev/null
+++ b/s390x/s390xasm/gnu.go
@@ -0,0 +1,1018 @@
+// Copyright 2024 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package s390xasm
+
+// Instructions with extended mnemonics fall under various categories.
+// To handle each of them in one single function, various different
+// structure types are defined as below. Corresponding instruction
+// structures are created with the help of these base structures.
+// Different instruction types are as below:
+
+// Typ1 - Instructions having different base and extended mnemonic strings.
+//
+//	These instructions have single M-field value and single offset.
+type typ1ExtndMnics struct {
+	BaseOpStr string
+	Value     uint8
+	Offset    uint8
+	ExtnOpStr string
+}
+
+// Typ2 - Instructions having couple of extra strings added to the base mnemonic string,
+//
+//	depending on the condition code evaluation.
+//	These instructions have single M-field value and single offset.
+type typ2ExtndMnics struct {
+	Value     uint8
+	Offset    uint8
+	ExtnOpStr string
+}
+
+// Typ3 - Instructions having couple of extra strings added to the base mnemonic string,
+//
+//	depending on the condition code evaluation.
+//	These instructions have two M-field values and two offsets.
+type typ3ExtndMnics struct {
+	Value1    uint8
+	Value2    uint8
+	Offset1   uint8
+	Offset2   uint8
+	ExtnOpStr string
+}
+
+// Typ4 - Instructions having different base and extended mnemonic strings.
+//
+//	These instructions have two M-field values and two offsets.
+type typ4ExtndMnics struct {
+	BaseOpStr string
+	Value1    uint8
+	Value2    uint8
+	Offset1   uint8
+	Offset2   uint8
+	ExtnOpStr string
+}
+
+// Typ5 - Instructions having different base and extended mnemonic strings.
+//
+//	These instructions have three M-field values and three offsets.
+type typ5ExtndMnics struct {
+	BaseOpStr string
+	Value1    uint8
+	Value2    uint8
+	Value3    uint8
+	Offset1   uint8
+	Offset2   uint8
+	Offset3   uint8
+	ExtnOpStr string
+}
+
+// "func Handleextndmnemonic" - This is the function where the extended mnemonic logic
+// is implemented. This function defines various structures to keep a list of base
+// instructions and their extended mnemonic strings. These structure will also have
+// M-field values and offset values defined, based on their type.
+// HandleExtndMnemonic takes "inst" structure as the input variable.
+// Inst structure will have all the details related to an instruction. Based on the
+// opcode base string, a switch-case statement is executed. In that, based on the
+// M-field value and the offset value of that particular M-field, extended mnemonic
+// string is either searched or constructed by adding couple of extra strings to the base
+// opcode string from one of the structure defined below.
+func HandleExtndMnemonic(inst *Inst) string {
+
+	brnchInstrExtndMnics := []typ1ExtndMnics{
+		//BIC - BRANCH INDIRECT ON CONDITION instruction
+		typ1ExtndMnics{BaseOpStr: "bic", Value: 1, Offset: 0, ExtnOpStr: "bio"},
+		typ1ExtndMnics{BaseOpStr: "bic", Value: 2, Offset: 0, ExtnOpStr: "bih"},
+		typ1ExtndMnics{BaseOpStr: "bic", Value: 4, Offset: 0, ExtnOpStr: "bil"},
+		typ1ExtndMnics{BaseOpStr: "bic", Value: 7, Offset: 0, ExtnOpStr: "bine"},
+		typ1ExtndMnics{BaseOpStr: "bic", Value: 8, Offset: 0, ExtnOpStr: "bie"},
+		typ1ExtndMnics{BaseOpStr: "bic", Value: 11, Offset: 0, ExtnOpStr: "binl"},
+		typ1ExtndMnics{BaseOpStr: "bic", Value: 13, Offset: 0, ExtnOpStr: "binh"},
+		typ1ExtndMnics{BaseOpStr: "bic", Value: 14, Offset: 0, ExtnOpStr: "bino"},
+		typ1ExtndMnics{BaseOpStr: "bic", Value: 15, Offset: 0, ExtnOpStr: "bi"},
+
+		//BCR - BRANCH ON CONDITION instruction
+		typ1ExtndMnics{BaseOpStr: "bcr", Value: 0, Offset: 0, ExtnOpStr: "nopr"},
+		typ1ExtndMnics{BaseOpStr: "bcr", Value: 1, Offset: 0, ExtnOpStr: "bor"},
+		typ1ExtndMnics{BaseOpStr: "bcr", Value: 2, Offset: 0, ExtnOpStr: "bhr"},
+		typ1ExtndMnics{BaseOpStr: "bcr", Value: 4, Offset: 0, ExtnOpStr: "blr"},
+		typ1ExtndMnics{BaseOpStr: "bcr", Value: 7, Offset: 0, ExtnOpStr: "bner"},
+		typ1ExtndMnics{BaseOpStr: "bcr", Value: 8, Offset: 0, ExtnOpStr: "ber"},
+		typ1ExtndMnics{BaseOpStr: "bcr", Value: 11, Offset: 0, ExtnOpStr: "bnlr"},
+		typ1ExtndMnics{BaseOpStr: "bcr", Value: 13, Offset: 0, ExtnOpStr: "bnhr"},
+		typ1ExtndMnics{BaseOpStr: "bcr", Value: 14, Offset: 0, ExtnOpStr: "bnor"},
+		typ1ExtndMnics{BaseOpStr: "bcr", Value: 15, Offset: 0, ExtnOpStr: "br"},
+
+		//BC - BRANCH ON CONDITION instruction
+		typ1ExtndMnics{BaseOpStr: "bc", Value: 0, Offset: 0, ExtnOpStr: "nopr"},
+		typ1ExtndMnics{BaseOpStr: "bc", Value: 1, Offset: 0, ExtnOpStr: "bo"},
+		typ1ExtndMnics{BaseOpStr: "bc", Value: 2, Offset: 0, ExtnOpStr: "bh"},
+		typ1ExtndMnics{BaseOpStr: "bc", Value: 4, Offset: 0, ExtnOpStr: "bl"},
+		typ1ExtndMnics{BaseOpStr: "bc", Value: 7, Offset: 0, ExtnOpStr: "bne"},
+		typ1ExtndMnics{BaseOpStr: "bc", Value: 8, Offset: 0, ExtnOpStr: "be"},
+		typ1ExtndMnics{BaseOpStr: "bc", Value: 11, Offset: 0, ExtnOpStr: "bnl"},
+		typ1ExtndMnics{BaseOpStr: "bc", Value: 13, Offset: 0, ExtnOpStr: "bnh"},
+		typ1ExtndMnics{BaseOpStr: "bc", Value: 14, Offset: 0, ExtnOpStr: "bno"},
+		typ1ExtndMnics{BaseOpStr: "bc", Value: 15, Offset: 0, ExtnOpStr: "b"},
+
+		//BRC - BRANCH RELATIVE ON CONDITION instruction
+		typ1ExtndMnics{BaseOpStr: "brc", Value: 0, Offset: 0, ExtnOpStr: "jnop"},
+		typ1ExtndMnics{BaseOpStr: "brc", Value: 1, Offset: 0, ExtnOpStr: "jo"},
+		typ1ExtndMnics{BaseOpStr: "brc", Value: 2, Offset: 0, ExtnOpStr: "jh"},
+		typ1ExtndMnics{BaseOpStr: "brc", Value: 4, Offset: 0, ExtnOpStr: "jl"},
+		typ1ExtndMnics{BaseOpStr: "brc", Value: 7, Offset: 0, ExtnOpStr: "jne"},
+		typ1ExtndMnics{BaseOpStr: "brc", Value: 8, Offset: 0, ExtnOpStr: "je"},
+		typ1ExtndMnics{BaseOpStr: "brc", Value: 11, Offset: 0, ExtnOpStr: "jnl"},
+		typ1ExtndMnics{BaseOpStr: "brc", Value: 13, Offset: 0, ExtnOpStr: "jnh"},
+		typ1ExtndMnics{BaseOpStr: "brc", Value: 14, Offset: 0, ExtnOpStr: "jno"},
+		typ1ExtndMnics{BaseOpStr: "brc", Value: 15, Offset: 0, ExtnOpStr: "j"},
+
+		//BRCL - BRANCH RELATIVE ON CONDITION LONG instruction
+		typ1ExtndMnics{BaseOpStr: "brcl", Value: 0, Offset: 0, ExtnOpStr: "jgnop"},
+		typ1ExtndMnics{BaseOpStr: "brcl", Value: 1, Offset: 0, ExtnOpStr: "jgo"},
+		typ1ExtndMnics{BaseOpStr: "brcl", Value: 2, Offset: 0, ExtnOpStr: "jgh"},
+		typ1ExtndMnics{BaseOpStr: "brcl", Value: 4, Offset: 0, ExtnOpStr: "jgl"},
+		typ1ExtndMnics{BaseOpStr: "brcl", Value: 7, Offset: 0, ExtnOpStr: "jgne"},
+		typ1ExtndMnics{BaseOpStr: "brcl", Value: 8, Offset: 0, ExtnOpStr: "jge"},
+		typ1ExtndMnics{BaseOpStr: "brcl", Value: 11, Offset: 0, ExtnOpStr: "jgnl"},
+		typ1ExtndMnics{BaseOpStr: "brcl", Value: 13, Offset: 0, ExtnOpStr: "jgnh"},
+		typ1ExtndMnics{BaseOpStr: "brcl", Value: 14, Offset: 0, ExtnOpStr: "jgno"},
+		typ1ExtndMnics{BaseOpStr: "brcl", Value: 15, Offset: 0, ExtnOpStr: "jg"},
+	}
+
+	//Compare instructions
+	cmpInstrExtndMnics := []typ2ExtndMnics{
+		typ2ExtndMnics{Value: 2, Offset: 2, ExtnOpStr: "h"},
+		typ2ExtndMnics{Value: 4, Offset: 2, ExtnOpStr: "l"},
+		typ2ExtndMnics{Value: 6, Offset: 2, ExtnOpStr: "ne"},
+		typ2ExtndMnics{Value: 8, Offset: 2, ExtnOpStr: "e"},
+		typ2ExtndMnics{Value: 10, Offset: 2, ExtnOpStr: "nl"},
+		typ2ExtndMnics{Value: 12, Offset: 2, ExtnOpStr: "nh"},
+	}
+
+	//Load and Store instructions
+	ldSt_InstrExtndMnics := []typ2ExtndMnics{
+		typ2ExtndMnics{Value: 1, Offset: 2, ExtnOpStr: "o"},
+		typ2ExtndMnics{Value: 2, Offset: 2, ExtnOpStr: "h"},
+		typ2ExtndMnics{Value: 3, Offset: 2, ExtnOpStr: "nle"},
+		typ2ExtndMnics{Value: 4, Offset: 2, ExtnOpStr: "l"},
+		typ2ExtndMnics{Value: 5, Offset: 2, ExtnOpStr: "nhe"},
+		typ2ExtndMnics{Value: 6, Offset: 2, ExtnOpStr: "lh"},
+		typ2ExtndMnics{Value: 7, Offset: 2, ExtnOpStr: "ne"},
+		typ2ExtndMnics{Value: 8, Offset: 2, ExtnOpStr: "e"},
+		typ2ExtndMnics{Value: 9, Offset: 2, ExtnOpStr: "nlh"},
+		typ2ExtndMnics{Value: 10, Offset: 2, ExtnOpStr: "he"},
+		typ2ExtndMnics{Value: 11, Offset: 2, ExtnOpStr: "nl"},
+		typ2ExtndMnics{Value: 12, Offset: 2, ExtnOpStr: "le"},
+		typ2ExtndMnics{Value: 13, Offset: 2, ExtnOpStr: "nh"},
+		typ2ExtndMnics{Value: 14, Offset: 2, ExtnOpStr: "no"},
+	}
+
+	vecInstrExtndMnics := []typ2ExtndMnics{
+		typ2ExtndMnics{Value: 0, Offset: 3, ExtnOpStr: "b"},
+		typ2ExtndMnics{Value: 1, Offset: 3, ExtnOpStr: "h"},
+		typ2ExtndMnics{Value: 2, Offset: 3, ExtnOpStr: "f"},
+		typ2ExtndMnics{Value: 3, Offset: 3, ExtnOpStr: "g"},
+		typ2ExtndMnics{Value: 4, Offset: 3, ExtnOpStr: "q"},
+		typ2ExtndMnics{Value: 6, Offset: 3, ExtnOpStr: "lf"},
+	}
+
+	//VCEQ, VCH, VCHL
+	vec2InstrExtndMnics := []typ3ExtndMnics{
+		typ3ExtndMnics{Value1: 0, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "b"},
+		typ3ExtndMnics{Value1: 1, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "h"},
+		typ3ExtndMnics{Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "f"},
+		typ3ExtndMnics{Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "g"},
+		typ3ExtndMnics{Value1: 0, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "bs"},
+		typ3ExtndMnics{Value1: 1, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "hs"},
+		typ3ExtndMnics{Value1: 2, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "fs"},
+		typ3ExtndMnics{Value1: 3, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "gs"},
+	}
+
+	//VFAE, VFEE, VFENE
+	vec21InstrExtndMnics := []typ3ExtndMnics{
+		typ3ExtndMnics{Value1: 0, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "b"},
+		typ3ExtndMnics{Value1: 1, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "h"},
+		typ3ExtndMnics{Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "f"},
+		typ3ExtndMnics{Value1: 0, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "bs"},
+		typ3ExtndMnics{Value1: 1, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "hs"},
+		typ3ExtndMnics{Value1: 2, Value2: 1, Offset1: 3, Offset2: 4, ExtnOpStr: "fs"},
+		typ3ExtndMnics{Value1: 0, Value2: 2, Offset1: 3, Offset2: 4, ExtnOpStr: "zb"},
+		typ3ExtndMnics{Value1: 1, Value2: 2, Offset1: 3, Offset2: 4, ExtnOpStr: "zh"},
+		typ3ExtndMnics{Value1: 2, Value2: 2, Offset1: 3, Offset2: 4, ExtnOpStr: "zf"},
+		typ3ExtndMnics{Value1: 0, Value2: 3, Offset1: 3, Offset2: 4, ExtnOpStr: "zbs"},
+		typ3ExtndMnics{Value1: 1, Value2: 3, Offset1: 3, Offset2: 4, ExtnOpStr: "zhs"},
+		typ3ExtndMnics{Value1: 2, Value2: 3, Offset1: 3, Offset2: 4, ExtnOpStr: "zfs"},
+	}
+
+	vec3InstrExtndMnics := []typ3ExtndMnics{
+		typ3ExtndMnics{Value1: 2, Value2: 0, Offset1: 2, Offset2: 3, ExtnOpStr: "sb"},
+		typ3ExtndMnics{Value1: 3, Value2: 0, Offset1: 2, Offset2: 3, ExtnOpStr: "db"},
+		typ3ExtndMnics{Value1: 4, Value2: 0, Offset1: 2, Offset2: 3, ExtnOpStr: "xb"},
+	}
+
+	vec4InstrExtndMnics := []typ4ExtndMnics{
+		// VFA - VECTOR FP ADD
+		typ4ExtndMnics{BaseOpStr: "vfa", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfasb"},
+		typ4ExtndMnics{BaseOpStr: "vfa", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfadb"},
+		typ4ExtndMnics{BaseOpStr: "vfa", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfasb"},
+		typ4ExtndMnics{BaseOpStr: "vfa", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfadb"},
+		typ4ExtndMnics{BaseOpStr: "vfa", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfaxb"},
+
+		// VFD - VECTOR FP DIVIDE
+		typ4ExtndMnics{BaseOpStr: "vfd", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfdsb"},
+		typ4ExtndMnics{BaseOpStr: "vfd", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfddb"},
+		typ4ExtndMnics{BaseOpStr: "vfd", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfdsb"},
+		typ4ExtndMnics{BaseOpStr: "vfd", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfddb"},
+		typ4ExtndMnics{BaseOpStr: "vfd", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfdxb"},
+
+		// VFLL - VECTOR FP LOAD LENGTHENED
+		typ4ExtndMnics{BaseOpStr: "vfll", Value1: 2, Value2: 0, Offset1: 2, Offset2: 3, ExtnOpStr: "vflfs"},
+		typ4ExtndMnics{BaseOpStr: "vfll", Value1: 2, Value2: 8, Offset1: 2, Offset2: 3, ExtnOpStr: "wflls"},
+		typ4ExtndMnics{BaseOpStr: "vfll", Value1: 3, Value2: 8, Offset1: 2, Offset2: 3, ExtnOpStr: "wflld"},
+
+		// VFMAX - VECTOR FP MAXIMUM
+		typ4ExtndMnics{BaseOpStr: "vfmax", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfmaxsb"},
+		typ4ExtndMnics{BaseOpStr: "vfmax", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfmaxdb"},
+		typ4ExtndMnics{BaseOpStr: "vfmax", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmaxsb"},
+		typ4ExtndMnics{BaseOpStr: "vfmax", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmaxdb"},
+		typ4ExtndMnics{BaseOpStr: "vfmax", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmaxxb"},
+
+		// VFMIN - VECTOR FP MINIMUM
+		typ4ExtndMnics{BaseOpStr: "vfmin", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfminsb"},
+		typ4ExtndMnics{BaseOpStr: "vfmin", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfmindb"},
+		typ4ExtndMnics{BaseOpStr: "vfmin", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfminsb"},
+		typ4ExtndMnics{BaseOpStr: "vfmin", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmindb"},
+		typ4ExtndMnics{BaseOpStr: "vfmin", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfminxb"},
+
+		// VFM - VECTOR FP MULTIPLY
+		typ4ExtndMnics{BaseOpStr: "vfm", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfmsb"},
+		typ4ExtndMnics{BaseOpStr: "vfm", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfmdb"},
+		typ4ExtndMnics{BaseOpStr: "vfm", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmsb"},
+		typ4ExtndMnics{BaseOpStr: "vfm", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmdb"},
+		typ4ExtndMnics{BaseOpStr: "vfm", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfmxb"},
+
+		// VFSQ - VECTOR FP SQUARE ROOT
+		typ4ExtndMnics{BaseOpStr: "vfsq", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfsqsb"},
+		typ4ExtndMnics{BaseOpStr: "vfsq", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfsqdb"},
+		typ4ExtndMnics{BaseOpStr: "vfsq", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfsqsb"},
+		typ4ExtndMnics{BaseOpStr: "vfsq", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfsqdb"},
+		typ4ExtndMnics{BaseOpStr: "vfsq", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfsqxb"},
+
+		// VFS - VECTOR FP SUBTRACT
+		typ4ExtndMnics{BaseOpStr: "vfs", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfssb"},
+		typ4ExtndMnics{BaseOpStr: "vfs", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vfsdb"},
+		typ4ExtndMnics{BaseOpStr: "vfs", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfssb"},
+		typ4ExtndMnics{BaseOpStr: "vfs", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfsdb"},
+		typ4ExtndMnics{BaseOpStr: "vfs", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wfsxb"},
+
+		// VFTCI - VECTOR FP TEST DATA CLASS IMMEDIATE
+		typ4ExtndMnics{BaseOpStr: "vftci", Value1: 2, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vftcisb"},
+		typ4ExtndMnics{BaseOpStr: "vftci", Value1: 3, Value2: 0, Offset1: 3, Offset2: 4, ExtnOpStr: "vftcidb"},
+		typ4ExtndMnics{BaseOpStr: "vftci", Value1: 2, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wftcisb"},
+		typ4ExtndMnics{BaseOpStr: "vftci", Value1: 3, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wftcidb"},
+		typ4ExtndMnics{BaseOpStr: "vftci", Value1: 4, Value2: 8, Offset1: 3, Offset2: 4, ExtnOpStr: "wftcixb"},
+	}
+
+	vec6InstrExtndMnics := []typ5ExtndMnics{
+		// VFCE - VECTOR FP COMPARE EQUAL
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcesb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcesbs"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcedb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcesb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcesbs"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcedb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcedbs"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 4, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcexb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 4, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcexbs"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkesb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkesbs"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkedb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkedbs"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkesb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkesbs"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkedb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkedbs"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 4, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkexb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 4, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkexbs"},
+
+		// VFCH - VECTOR FP COMPARE HIGH
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchsb"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchsbs"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchdb"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchdbs"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchsb"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchsbs"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchdb"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchdbs"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 4, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchxb"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 4, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchxbs"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhsb"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhsbs"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhdb"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhdbs"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhsb"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 2, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhsbs"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhdb"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 3, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhdbs"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 4, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhxb"},
+		typ5ExtndMnics{BaseOpStr: "vfch", Value1: 4, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhxbs"},
+
+		// VFCHE - VECTOR FP COMPARE HIGH OR EQUAL
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchesb"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchesbs"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchedb"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfchedbs"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchesb"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchesbs"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchedb"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchedbs"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 4, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchexb"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 4, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfchexbs"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhesb"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhesbs"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 4, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhedb"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 4, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfkhedbs"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhesb"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 2, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhesbs"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhedb"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 3, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhedbs"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 4, Value2: 12, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhexb"},
+		typ5ExtndMnics{BaseOpStr: "vfche", Value1: 4, Value2: 12, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfkhexbs"},
+
+		// VFPSO - VECTOR FP PERFORM SIGN OPERATION
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 0, Value3: 0, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflcsb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 8, Value3: 0, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflcsb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 0, Value3: 1, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflnsb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 8, Value3: 1, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflnsb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 0, Value3: 2, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflpsb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 2, Value2: 8, Value3: 2, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflpsb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 0, Value3: 0, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflcdb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 8, Value3: 0, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflcdb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 0, Value3: 1, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflndb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 8, Value3: 1, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflndb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 0, Value3: 2, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "vflpdb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 3, Value2: 8, Value3: 2, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflpdb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 4, Value2: 8, Value3: 0, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflcxb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 4, Value2: 8, Value3: 1, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflnxb"},
+		typ5ExtndMnics{BaseOpStr: "vfpso", Value1: 4, Value2: 8, Value3: 2, Offset1: 2, Offset2: 3, Offset3: 4, ExtnOpStr: "wflpxb"},
+	}
+
+	vec7InstrExtndMnics := []typ4ExtndMnics{
+		// VFMA - VECTOR FP MULTIPLY AND ADD
+		typ4ExtndMnics{BaseOpStr: "vfma", Value1: 0, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "vfmasb"},
+		typ4ExtndMnics{BaseOpStr: "vfma", Value1: 0, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "vfmadb"},
+		typ4ExtndMnics{BaseOpStr: "vfma", Value1: 8, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmasb"},
+		typ4ExtndMnics{BaseOpStr: "vfma", Value1: 8, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmadb"},
+		typ4ExtndMnics{BaseOpStr: "vfma", Value1: 8, Value2: 4, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmaxb"},
+
+		// VFMS - VECTOR FP MULTIPLY AND SUBTRACT
+		typ4ExtndMnics{BaseOpStr: "vfms", Value1: 0, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "vfmssb"},
+		typ4ExtndMnics{BaseOpStr: "vfms", Value1: 0, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "vfmsdb"},
+		typ4ExtndMnics{BaseOpStr: "vfms", Value1: 8, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmssb"},
+		typ4ExtndMnics{BaseOpStr: "vfms", Value1: 8, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmsdb"},
+		typ4ExtndMnics{BaseOpStr: "vfms", Value1: 8, Value2: 4, Offset1: 4, Offset2: 5, ExtnOpStr: "wfmsxb"},
+
+		// VFNMA - VECTOR FP NEGATIVE MULTIPLY AND ADD
+		typ4ExtndMnics{BaseOpStr: "vfnma", Value1: 0, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "vfnmasb"},
+		typ4ExtndMnics{BaseOpStr: "vfnma", Value1: 0, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "vfnmadb"},
+		typ4ExtndMnics{BaseOpStr: "vfnma", Value1: 8, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmasb"},
+		typ4ExtndMnics{BaseOpStr: "vfnma", Value1: 8, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmadb"},
+		typ4ExtndMnics{BaseOpStr: "vfnma", Value1: 8, Value2: 4, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmaxb"},
+
+		// VFNMS - VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT
+		typ4ExtndMnics{BaseOpStr: "vfnms", Value1: 0, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "vfnmssb"},
+		typ4ExtndMnics{BaseOpStr: "vfnms", Value1: 0, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "vfnmsdb"},
+		typ4ExtndMnics{BaseOpStr: "vfnms", Value1: 8, Value2: 2, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmssb"},
+		typ4ExtndMnics{BaseOpStr: "vfnms", Value1: 8, Value2: 3, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmsdb"},
+		typ4ExtndMnics{BaseOpStr: "vfnms", Value1: 8, Value2: 4, Offset1: 4, Offset2: 5, ExtnOpStr: "wfnmsxb"},
+	}
+
+	opString := inst.Op.String()
+	newOpStr := opString
+
+	if inst.Enc == 0 {
+		return ".long 0x0"
+	} else if inst.Op == 0 {
+		return "error: unknown instruction"
+	}
+
+	switch opString {
+	// Case to handle all "branch" instructions with one M-field operand
+	case "bic", "bcr", "bc", "brc", "brcl":
+
+		for i := 0; i < len(brnchInstrExtndMnics); i++ {
+			if opString == brnchInstrExtndMnics[i].BaseOpStr &&
+				uint8(inst.Args[brnchInstrExtndMnics[i].Offset].(Mask)) == brnchInstrExtndMnics[i].Value {
+				newOpStr = brnchInstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(brnchInstrExtndMnics[i].Offset))
+				break
+			}
+		}
+
+	// Case to handle all "compare" instructions with one M-field operand
+	case "crb", "cgrb", "crj", "cgrj", "crt", "cgrt", "cib", "cgib", "cij", "cgij", "cit", "cgit", "clrb", "clgrb",
+		"clrj", "clgrj", "clrt", "clgrt", "clt", "clgt", "clib", "clgib", "clij", "clgij", "clfit", "clgit":
+
+		for i := 0; i < len(cmpInstrExtndMnics); i++ {
+			//For CLT and CLGT instructions, M-value is the second operand.
+			//Hence, set the offset to "1"
+			if opString == "clt" || opString == "clgt" {
+				cmpInstrExtndMnics[i].Offset = 1
+			}
+
+			if uint8(inst.Args[cmpInstrExtndMnics[i].Offset].(Mask)) == cmpInstrExtndMnics[i].Value {
+				newOpStr = opString + cmpInstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(cmpInstrExtndMnics[i].Offset))
+				break
+			}
+		}
+
+	// Case to handle all "load" and "store" instructions with one M-field operand
+	case "lochhi", "lochi", "locghi", "locfhr", "locfh", "locr", "locgr", "loc",
+		"locg", "selr", "selgr", "selfhr", "stocfh", "stoc", "stocg":
+
+		for i := 0; i < len(ldSt_InstrExtndMnics); i++ {
+
+			//For LOCFH, LOC, LOCG, SELR, SELGR, SELFHR, STOCFH, STOC, STOCG instructions,
+			//M-value is the forth operand. Hence, set the offset to "3"
+			if opString == "locfh" || opString == "loc" || opString == "locg" || opString == "selr" || opString == "selgr" ||
+				opString == "selfhr" || opString == "stocfh" || opString == "stoc" || opString == "stocg" {
+				ldSt_InstrExtndMnics[i].Offset = 3
+			}
+
+			if uint8(inst.Args[ldSt_InstrExtndMnics[i].Offset].(Mask)) == ldSt_InstrExtndMnics[i].Value {
+				newOpStr = opString + ldSt_InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(ldSt_InstrExtndMnics[i].Offset))
+				break
+			}
+		}
+
+	// Case to handle all "vector" instructions with one M-field operand
+	case "vavg", "vavgl", "verllv", "veslv", "vesrav", "vesrlv", "vgfm", "vgm", "vmx", "vmxl", "vmrh", "vmrl", "vmn", "vmnl", "vrep",
+		"vclz", "vctz", "vec", "vecl", "vlc", "vlp", "vpopct", "vrepi", "verim", "verll", "vesl", "vesra", "vesrl", "vgfma", "vlrep",
+		"vlgv", "vlvg", "vlbrrep", "vler", "vlbr", "vstbr", "vster", "vpk", "vme", "vmh", "vmle", "vmlh", "vmlo", "vml", "vmo", "vmae",
+		"vmale", "vmalo", "vmal", "vmah", "vmalh", "vmao", "vmph", "vmplh", "vupl", "vupll", "vscbi", "vs", "vsum", "vsumg", "vsumq",
+		"va", "vacc":
+
+		switch opString {
+
+		case "vavg", "vavgl", "verllv", "veslv", "vesrav", "vesrlv", "vgfm", "vgm", "vmx", "vmxl", "vmrh", "vmrl", "vmn", "vmnl", "vrep":
+			//M-field is 3rd arg for all these instructions. Hence, set the offset to "2"
+			for i := 0; i < len(vecInstrExtndMnics)-2; i++ { // 0,1,2,3
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset))
+					break
+				}
+			}
+
+		case "vclz", "vctz", "vec", "vecl", "vlc", "vlp", "vpopct", "vrepi":
+			for i := 0; i < len(vecInstrExtndMnics)-2; i++ { //0,1,2,3
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset-1].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset-1))
+					break
+				}
+			}
+
+		case "verim", "verll", "vesl", "vesra", "vesrl", "vgfma", "vlrep":
+			for i := 0; i < len(vecInstrExtndMnics)-2; i++ { //0,1,2,3
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1))
+					break
+				}
+			}
+
+		case "vlgv", "vlvg":
+			for i := 0; i < len(vecInstrExtndMnics)-2; i++ {
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1))
+					break
+				}
+			}
+
+		case "vlbrrep", "vler", "vster":
+			for i := 1; i < len(vecInstrExtndMnics)-2; i++ {
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1))
+					break
+				}
+			}
+
+		case "vpk":
+			for i := 1; i < len(vecInstrExtndMnics)-2; i++ {
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset))
+					break
+				}
+			}
+
+		case "vlbr", "vstbr":
+			for i := 1; i < len(vecInstrExtndMnics)-1; i++ {
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1))
+					break
+				}
+			}
+		case "vme", "vmh", "vmle", "vmlh", "vmlo", "vmo":
+			for i := 0; i < len(vecInstrExtndMnics)-3; i++ { //0,1,2
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset))
+					break
+				}
+			}
+
+		case "vml":
+			for i := 0; i < len(vecInstrExtndMnics)-3; i++ { //0,1,2
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value {
+					if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == 1 {
+						newOpStr = opString + string("hw")
+					} else {
+						newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					}
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset))
+					break
+				}
+			}
+
+		case "vmae", "vmale", "vmalo", "vmal", "vmah", "vmalh", "vmao":
+			for i := 0; i < len(vecInstrExtndMnics)-3; i++ { //0,1,2
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1))
+					break
+				}
+			}
+
+		case "vmph", "vmplh", "vupl", "vupll": //0,1,2
+			for i := 0; i < len(vecInstrExtndMnics)-3; i++ {
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset-1].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset-1))
+					break
+				}
+			}
+
+		case "vscbi", "vs", "va", "vacc": // 0,1,2,3,4
+			for i := 0; i < len(vecInstrExtndMnics)-1; i++ {
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset))
+					break
+				}
+			}
+		case "vsum", "vsumg":
+			for i := 1; i < len(vecInstrExtndMnics)-4; i++ {
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset))
+					break
+				}
+			}
+		case "vsumq":
+			for i := 2; i < len(vecInstrExtndMnics)-2; i++ {
+				if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value {
+					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+					removeArg(inst, int8(vecInstrExtndMnics[i].Offset))
+					break
+				}
+			}
+		}
+
+	case "vllez":
+		for i := 0; i < len(vecInstrExtndMnics); i++ {
+			if i == 4 {
+				continue
+			}
+			if uint8(inst.Args[vecInstrExtndMnics[i].Offset+1].(Mask)) == vecInstrExtndMnics[i].Value {
+				newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vecInstrExtndMnics[i].Offset+1))
+				break
+			}
+		}
+
+	case "vgbm":
+		if uint16(inst.Args[1].(Imm)) == uint16(0) {
+			newOpStr = "vzeo"
+			removeArg(inst, int8(1))
+		} else if uint16(inst.Args[1].(Imm)) == uint16(0xFFFF) {
+			newOpStr = "vone"
+			removeArg(inst, int8(1))
+		}
+	case "vno":
+		if uint8(inst.Args[1].(VReg)) == uint8(inst.Args[2].(VReg)) { //Bitwise Not instruction(VNOT)  if V2 equal to v3
+			newOpStr = opString + "t"
+			removeArg(inst, int8(2))
+		}
+
+	case "vmsl":
+		if uint8(inst.Args[4].(Mask)) == uint8(3) {
+			newOpStr = opString + "g"
+			removeArg(inst, int8(4))
+		}
+
+	case "vflr":
+		if uint8(inst.Args[2].(Mask)) == uint8(3) && ((inst.Args[3].(Mask)>>3)&0x1 == 0x1) {
+			inst.Args[3] = (inst.Args[3].(Mask) ^ 0x8)
+			newOpStr = "wflrd"
+			removeArg(inst, int8(2))
+		} else if uint8(inst.Args[2].(Mask)) == uint8(4) && ((inst.Args[3].(Mask)>>3)&0x1 == 0x1) {
+			inst.Args[3] = (inst.Args[3].(Mask) ^ 0x8)
+			newOpStr = "wflrx"
+			removeArg(inst, int8(2))
+		} else if uint8(inst.Args[2].(Mask)) == uint8(3) {
+			newOpStr = "vflrd"
+			removeArg(inst, int8(2))
+		}
+
+	case "vllebrz":
+		if uint8(inst.Args[4].(Mask)) == uint8(1) {
+			newOpStr = opString + "h"
+			removeArg(inst, int8(4))
+		} else if uint8(inst.Args[4].(Mask)) == uint8(2) {
+			newOpStr = opString + "f"
+			removeArg(inst, int8(4))
+		} else if uint8(inst.Args[4].(Mask)) == uint8(3) {
+			newOpStr = "ldrv"
+			removeArg(inst, int8(4))
+		} else if uint8(inst.Args[4].(Mask)) == uint8(6) {
+			newOpStr = "lerv"
+			removeArg(inst, int8(4))
+		}
+
+	case "vschp":
+		if uint8(inst.Args[3].(Mask)) == uint8(2) {
+			newOpStr = "vschsp"
+			removeArg(inst, int8(3))
+		} else if uint8(inst.Args[3].(Mask)) == uint8(3) {
+			newOpStr = "vschdp"
+			removeArg(inst, int8(3))
+		} else if uint8(inst.Args[3].(Mask)) == uint8(4) {
+			newOpStr = "vschxp"
+			removeArg(inst, int8(3))
+		}
+
+	case "vsbcbi", "vsbi":
+		if uint8(inst.Args[4].(Mask)) == uint8(4) {
+			newOpStr = opString + vecInstrExtndMnics[4].ExtnOpStr
+			removeArg(inst, int8(4))
+		}
+
+	case "vac", "vaccc":
+		if uint8(inst.Args[4].(Mask)) == uint8(4) {
+			newOpStr = opString + vecInstrExtndMnics[3].ExtnOpStr
+			removeArg(inst, int8(3))
+		}
+
+	case "vceq", "vch", "vchl":
+		for i := 0; i < len(vec2InstrExtndMnics)-6; i++ {
+			if uint8(inst.Args[vec2InstrExtndMnics[i].Offset1].(Mask)) == vec2InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec2InstrExtndMnics[i].Offset2].(Mask)) == vec2InstrExtndMnics[i].Value2 {
+				newOpStr = opString + vec2InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec2InstrExtndMnics[i].Offset1))
+				removeArg(inst, int8(vec2InstrExtndMnics[i].Offset2-1))
+				break
+			}
+		}
+
+	case "vpks", "vpkls":
+		for i := 1; i < len(vec2InstrExtndMnics)-6; i++ {
+			if i == 4 {
+				continue
+			}
+			if uint8(inst.Args[vec2InstrExtndMnics[i].Offset1].(Mask)) == vec2InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec2InstrExtndMnics[i].Offset2].(Mask)) == vec2InstrExtndMnics[i].Value2 {
+				newOpStr = opString + vec2InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec2InstrExtndMnics[i].Offset1))
+				removeArg(inst, int8(vec2InstrExtndMnics[i].Offset2-1))
+				break
+			}
+		}
+	case "vfee", "vfene":
+		var check bool
+		for i := 0; i < len(vec21InstrExtndMnics); i++ {
+			if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1].(Mask)) == vec21InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec21InstrExtndMnics[i].Offset2].(Mask)) == vec21InstrExtndMnics[i].Value2 {
+				newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1))
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2-1))
+				check = true
+				break
+			}
+		}
+		if !check {
+			if uint8(inst.Args[3].(Mask)) == 0 && (uint8(inst.Args[4].(Mask)) != uint8(0)) {
+				newOpStr = opString + vec21InstrExtndMnics[0].ExtnOpStr
+				removeArg(inst, int8(vec21InstrExtndMnics[0].Offset1))
+			} else if uint8(inst.Args[3].(Mask)) == 1 && (uint8(inst.Args[4].(Mask)) != uint8(0)) {
+				newOpStr = opString + vec21InstrExtndMnics[1].ExtnOpStr
+				removeArg(inst, int8(vec21InstrExtndMnics[1].Offset1))
+			} else if uint8(inst.Args[3].(Mask)) == 2 && (uint8(inst.Args[4].(Mask)) != uint8(0)) {
+				newOpStr = opString + vec21InstrExtndMnics[2].ExtnOpStr
+				removeArg(inst, int8(vec21InstrExtndMnics[2].Offset1))
+			} else if uint8(inst.Args[4].(Mask)) == 0 {
+				removeArg(inst, int8(vec21InstrExtndMnics[2].Offset2))
+			}
+		}
+
+	case "vfae", "vstrc":
+		off := uint8(0)
+		var check bool
+		if opString == "vstrc" {
+			off = uint8(1)
+		}
+		for i := 0; i < len(vec21InstrExtndMnics)-9; i++ {
+			if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1+off].(Mask)) == vec21InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+off].(Mask)) == vec21InstrExtndMnics[i].Value2 {
+				newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+off))
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2+off-1))
+				check = true
+				break
+			}
+		}
+
+		for i := 0; !(check) && (i < len(vec21InstrExtndMnics)-9); i++ {
+			if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1+off].(Mask)) == vec21InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+off].(Mask)) == vec21InstrExtndMnics[i].Value2 {
+				newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+off))
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2+off-1))
+				check = true
+				break
+			}
+		}
+		//for i := 3; !(check) && (i < len(vec21InstrExtndMnics)); i++ {
+		for i := len(vec21InstrExtndMnics) - 1; !(check) && (i > 2); i-- {
+			if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1+off].(Mask)) == vec21InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+off].(Mask))&(vec21InstrExtndMnics[i].Value2) == vec21InstrExtndMnics[i].Value2 {
+				x := uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+off].(Mask)) ^ (vec21InstrExtndMnics[i].Value2)
+				newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr
+				if x != 0 {
+					inst.Args[vec21InstrExtndMnics[i].Offset2+off] = Mask(x)
+					removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+off))
+					check = true
+					break
+				} else {
+					removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+off))
+					removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2+off-1))
+					check = true
+					break
+				}
+			}
+		}
+		if !check && inst.Args[4+off].(Mask) == Mask(0) {
+			removeArg(inst, int8(4+off))
+			break
+		}
+
+	case "vstrs":
+		var check bool
+		for i := 0; i < len(vec21InstrExtndMnics)-3; i++ {
+			if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1+1].(Mask)) == vec21InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+1].(Mask)) == vec21InstrExtndMnics[i].Value2 {
+				newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+1))
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2))
+				check = true
+				break
+			}
+			if i == 2 {
+				i = i + 3
+			}
+		}
+
+		for i := 0; !(check) && (i < len(vec21InstrExtndMnics)-9); i++ {
+			if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1+1].(Mask)) == vec21InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec21InstrExtndMnics[i].Offset2+1].(Mask)) != 0 {
+				newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1+1))
+				break
+			}
+		}
+
+	case "vistr":
+		var check bool
+		for i := 0; i < len(vec21InstrExtndMnics)-6; i++ {
+			if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1-1].(Mask)) == vec21InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec21InstrExtndMnics[i].Offset2-1].(Mask)) == vec21InstrExtndMnics[i].Value2 {
+				newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1-1))
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset2-2))
+				check = true
+				break
+			}
+		}
+
+		for i := 0; !(check) && (i < len(vec21InstrExtndMnics)-9); i++ {
+			if uint8(inst.Args[vec21InstrExtndMnics[i].Offset1-1].(Mask)) == vec21InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec21InstrExtndMnics[i].Offset2-1].(Mask)) != 0 {
+				newOpStr = opString + vec21InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec21InstrExtndMnics[i].Offset1-1))
+				break
+			}
+		}
+
+		if uint8(inst.Args[3].(Mask)) == 0 {
+			removeArg(inst, int8(3))
+			break
+		}
+
+	case "vcfps":
+		if inst.Args[2].(Mask) == Mask(2) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) {
+			inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8))
+			newOpStr = "wcefb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(3) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) {
+			inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8))
+			newOpStr = "wcdgb"
+			removeArg(inst, int8(2))
+			break
+		} else if uint8(inst.Args[2].(Mask)) == uint8(2) {
+			newOpStr = "vcefb"
+			removeArg(inst, int8(2))
+			break
+		} else if uint8(inst.Args[2].(Mask)) == uint8(3) {
+			newOpStr = "vcdgb"
+			removeArg(inst, int8(2))
+			break
+		}
+
+	case "vcfpl":
+		if inst.Args[2].(Mask) == Mask(2) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) {
+			inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8))
+			newOpStr = "wcelfb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(3) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) {
+			inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8))
+			newOpStr = "wcdlgb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(2) {
+			newOpStr = "vcelfb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(3) {
+			newOpStr = "vcdlgb"
+			removeArg(inst, int8(2))
+			break
+		}
+
+	case "vcsfp":
+		if inst.Args[2].(Mask) == Mask(2) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) {
+			inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8))
+			newOpStr = "wcfeb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(3) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) {
+			inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8))
+			newOpStr = "wcgdb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(2) {
+			newOpStr = "vcfeb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(3) {
+			newOpStr = "vcgdb"
+			removeArg(inst, int8(2))
+			break
+		}
+
+	case "vclfp":
+		if inst.Args[2].(Mask) == Mask(2) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) {
+			inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8))
+			newOpStr = "wclfeb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(3) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) {
+			inst.Args[3] = Mask((inst.Args[3].(Mask)) ^ (0x8))
+			newOpStr = "wclgdb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(2) {
+			newOpStr = "vclfeb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(3) {
+			newOpStr = "vclgdb"
+			removeArg(inst, int8(2))
+			break
+		}
+
+	case "vfi":
+		if inst.Args[2].(Mask) == Mask(2) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) {
+			newOpStr = "wfisb"
+			removeArg(inst, int8(2))
+			inst.Args[2] = Mask((inst.Args[2].(Mask)) ^ (0x8))
+			break
+		} else if inst.Args[2].(Mask) == Mask(3) && ((inst.Args[3].(Mask)>>3)&(0x3) == 1) {
+			newOpStr = "wfidb"
+			removeArg(inst, int8(2))
+			inst.Args[2] = Mask((inst.Args[2].(Mask)) ^ (0x8))
+			break
+		} else if inst.Args[2].(Mask) == Mask(4) && ((inst.Args[3].(Mask)>>3)&(0x1) == 1) {
+			newOpStr = "wfixb"
+			removeArg(inst, int8(2))
+			inst.Args[2] = Mask((inst.Args[2].(Mask)) ^ (0x8))
+			break
+		} else if inst.Args[2].(Mask) == Mask(2) {
+			newOpStr = "vfisb"
+			removeArg(inst, int8(2))
+			break
+		} else if inst.Args[2].(Mask) == Mask(3) {
+			newOpStr = "vfidb"
+			removeArg(inst, int8(2))
+			break
+		}
+
+	// Case to handle few vector instructions with 2 M-field operands
+	case "vfa", "vfd", "vfll", "vfmax", "vfmin", "vfm":
+		for i := 0; i < len(vec4InstrExtndMnics); i++ {
+			if opString == vec4InstrExtndMnics[i].BaseOpStr &&
+				uint8(inst.Args[vec4InstrExtndMnics[i].Offset1].(Mask)) == vec4InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec4InstrExtndMnics[i].Offset2].(Mask)) == vec4InstrExtndMnics[i].Value2 {
+				newOpStr = vec4InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec4InstrExtndMnics[i].Offset1))
+				removeArg(inst, int8(vec4InstrExtndMnics[i].Offset2-1))
+				break
+			}
+		}
+
+	// Case to handle few special "vector" instructions with 2 M-field operands
+	case "wfc", "wfk":
+		for i := 0; i < len(vec3InstrExtndMnics); i++ {
+			if uint8(inst.Args[vec3InstrExtndMnics[i].Offset1].(Mask)) == vec3InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec3InstrExtndMnics[i].Offset2].(Mask)) == vec3InstrExtndMnics[i].Value2 {
+				newOpStr = opString + vec3InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec3InstrExtndMnics[i].Offset1))
+				removeArg(inst, int8(vec3InstrExtndMnics[i].Offset2-1))
+				break
+			}
+		}
+
+	// Case to handle few vector instructions with 2 M-field operands
+	case "vfma", "vfms", "vfnma", "vfnms":
+		for i := 0; i < len(vec7InstrExtndMnics); i++ {
+			if opString == vec7InstrExtndMnics[i].BaseOpStr &&
+				uint8(inst.Args[vec7InstrExtndMnics[i].Offset1].(Mask)) == vec7InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec7InstrExtndMnics[i].Offset2].(Mask)) == vec7InstrExtndMnics[i].Value2 {
+				newOpStr = vec7InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec7InstrExtndMnics[i].Offset1))
+				removeArg(inst, int8(vec7InstrExtndMnics[i].Offset2-1))
+				break
+			}
+		}
+
+	// List of instructions with 3 M-field operands.
+	case "vfce", "vfch", "vfche", "vfpso":
+		for i := 0; i < len(vec6InstrExtndMnics); i++ {
+			if opString == vec6InstrExtndMnics[i].BaseOpStr &&
+				uint8(inst.Args[vec6InstrExtndMnics[i].Offset1].(Mask)) == vec6InstrExtndMnics[i].Value1 &&
+				uint8(inst.Args[vec6InstrExtndMnics[i].Offset2].(Mask)) == vec6InstrExtndMnics[i].Value2 &&
+				uint8(inst.Args[vec6InstrExtndMnics[i].Offset3].(Mask)) == vec6InstrExtndMnics[i].Value3 {
+				newOpStr = vec6InstrExtndMnics[i].ExtnOpStr
+				removeArg(inst, int8(vec6InstrExtndMnics[i].Offset1))
+				removeArg(inst, int8(vec6InstrExtndMnics[i].Offset2-1))
+				removeArg(inst, int8(vec6InstrExtndMnics[i].Offset3-2))
+				break
+			}
+		}
+
+	default:
+		return opString
+	}
+	return newOpStr
+}
+
+// This is the function that is called to print the disassembled instruction
+// in the GNU (AT&T) syntax form.
+func GNUSyntax(inst Inst, pc uint64) string {
+	if inst.Enc == 0 {
+		return ".long 0x0"
+	} else if inst.Op == 0 {
+		return "error: unknown instruction"
+	}
+	return inst.String(pc)
+}
+
+// removeArg removes the arg in inst.Args[index].
+func removeArg(inst *Inst, index int8) {
+	for i := int(index); i < len(inst.Args); i++ {
+		if i+1 < len(inst.Args) {
+			inst.Args[i] = inst.Args[i+1]
+		} else {
+			inst.Args[i] = nil
+		}
+	}
+}
diff --git a/s390x/s390xasm/inst.go b/s390x/s390xasm/inst.go
new file mode 100644
index 00000000..19d70156
--- /dev/null
+++ b/s390x/s390xasm/inst.go
@@ -0,0 +1,399 @@
+// Copyright 2024 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package s390xasm
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+)
+
+type Inst struct {
+	Op   Op     // Opcode mnemonic
+	Enc  uint64 // Raw encoding bits (if Len == 8, this is the prefix word)
+	Len  int    // Length of encoding in bytes.
+	Args Args   // Instruction arguments, in Power ISA manual order.
+}
+
+func (i Inst) String(pc uint64) string {
+	var buf bytes.Buffer
+	var rxb_check bool
+	m := i.Op.String()
+	if strings.HasPrefix(m, "v") || strings.Contains(m, "wfc") || strings.Contains(m, "wfk") {
+		rxb_check = true
+	}
+	mnemonic := HandleExtndMnemonic(&i)
+	buf.WriteString(fmt.Sprintf("%s", mnemonic))
+	for j, arg := range i.Args {
+		if arg == nil {
+			break
+		}
+		if j == 0 {
+			buf.WriteString(" ")
+		} else {
+			switch arg.(type) {
+			case VReg, Reg:
+				if _, ok := i.Args[j-1].(Disp12); ok {
+					buf.WriteString("")
+				} else if _, ok := i.Args[j-1].(Disp20); ok {
+					buf.WriteString("")
+				} else {
+					buf.WriteString(",")
+				}
+			case Base:
+				if _, ok := i.Args[j-1].(VReg); ok {
+					buf.WriteString(",")
+				} else if _, ok := i.Args[j-1].(Reg); ok {
+					buf.WriteString(",")
+				}
+			case Index, Len:
+			default:
+				buf.WriteString(",")
+			}
+		}
+		buf.WriteString(arg.String(pc))
+		if rxb_check && i.Args[j+2] == nil {
+			break
+		}
+	}
+	return buf.String()
+}
+
+// An Op is an instruction operation.
+type Op uint16
+
+func (o Op) String() string {
+	if int(o) >= len(opstr) || opstr[o] == "" {
+		return fmt.Sprintf("Op(%d)", int(o))
+	}
+	return opstr[o]
+}
+
+// An Arg is a single instruction argument.
+// One of these types: Reg, Base, Index, Disp20, Disp12, Len, Mask, Sign8, Sign16, Sign32, RegIm12, RegIm16, RegIm24, RegIm32.
+type Arg interface {
+	IsArg()
+	String(pc uint64) string
+}
+
+// An Args holds the instruction arguments.
+// If an instruction has fewer than 6 arguments,
+// the final elements in the array are nil.
+type Args [8]Arg
+
+// Base represents an 4-bit Base Register field
+type Base uint8
+
+const (
+	B0 Base = iota
+	B1
+	B2
+	B3
+	B4
+	B5
+	B6
+	B7
+	B8
+	B9
+	B10
+	B11
+	B12
+	B13
+	B14
+	B15
+)
+
+func (Base) IsArg() {}
+func (r Base) String(pc uint64) string {
+	switch {
+	case B1 <= r && r <= B15:
+		s := "%"
+		return fmt.Sprintf("%sr%d)", s, int(r-B0))
+	case B0 == r:
+		return fmt.Sprintf("")
+	default:
+		return fmt.Sprintf("Base(%d)", int(r))
+	}
+}
+
+// Index represents an 4-bit Index Register field
+type Index uint8
+
+const (
+	X0 Index = iota
+	X1
+	X2
+	X3
+	X4
+	X5
+	X6
+	X7
+	X8
+	X9
+	X10
+	X11
+	X12
+	X13
+	X14
+	X15
+)
+
+func (Index) IsArg() {}
+func (r Index) String(pc uint64) string {
+	switch {
+	case X1 <= r && r <= X15:
+		s := "%"
+		return fmt.Sprintf("%sr%d,", s, int(r-X0))
+	case X0 == r:
+		return fmt.Sprintf("")
+	default:
+		return fmt.Sprintf("Base(%d)", int(r))
+	}
+}
+
+// Disp20 represents an 20-bit Unsigned Displacement
+type Disp20 uint32
+
+func (Disp20) IsArg() {}
+func (r Disp20) String(pc uint64) string {
+	if (r>>19)&0x01 == 1 {
+		return fmt.Sprintf("%d(", int32(r|0xfff<<20))
+	} else {
+		return fmt.Sprintf("%d(", int32(r))
+	}
+}
+
+// Disp12 represents an 12-bit Unsigned Displacement
+type Disp12 uint16
+
+func (Disp12) IsArg() {}
+func (r Disp12) String(pc uint64) string {
+	return fmt.Sprintf("%d(", r)
+}
+
+// RegIm12 represents an 12-bit Register immediate number.
+type RegIm12 uint16
+
+func (RegIm12) IsArg() {}
+func (r RegIm12) String(pc uint64) string {
+	if (r>>11)&0x01 == 1 {
+		return fmt.Sprintf("%#x", pc+(2*uint64(int16(r|0xf<<12))))
+	} else {
+		return fmt.Sprintf("%#x", pc+(2*uint64(int16(r))))
+	}
+}
+
+// RegIm16 represents an 16-bit Register immediate number.
+type RegIm16 uint16
+
+func (RegIm16) IsArg() {}
+func (r RegIm16) String(pc uint64) string {
+	return fmt.Sprintf("%#x", pc+(2*uint64(int16(r))))
+}
+
+// RegIm24 represents an 24-bit Register immediate number.
+type RegIm24 uint32
+
+func (RegIm24) IsArg() {}
+func (r RegIm24) String(pc uint64) string {
+	if (r>>23)&0x01 == 1 {
+		return fmt.Sprintf("%#x", pc+(2*uint64(int32(r|0xff<<24))))
+	} else {
+		return fmt.Sprintf("%#x", pc+(2*uint64(int32(r))))
+	}
+}
+
+// RegIm32 represents an 32-bit Register immediate number.
+type RegIm32 uint32
+
+func (RegIm32) IsArg() {}
+func (r RegIm32) String(pc uint64) string {
+	return fmt.Sprintf("%#x", pc+(2*uint64(int32(r))))
+}
+
+// A Reg is a single register. The zero value means R0, not the absence of a register.
+// It also includes special registers.
+type Reg uint16
+
+const (
+	R0 Reg = iota
+	R1
+	R2
+	R3
+	R4
+	R5
+	R6
+	R7
+	R8
+	R9
+	R10
+	R11
+	R12
+	R13
+	R14
+	R15
+	F0
+	F1
+	F2
+	F3
+	F4
+	F5
+	F6
+	F7
+	F8
+	F9
+	F10
+	F11
+	F12
+	F13
+	F14
+	F15
+	A0
+	A1
+	A2
+	A3
+	A4
+	A5
+	A6
+	A7
+	A8
+	A9
+	A10
+	A11
+	A12
+	A13
+	A14
+	A15
+	C0
+	C1
+	C2
+	C3
+	C4
+	C5
+	C6
+	C7
+	C8
+	C9
+	C10
+	C11
+	C12
+	C13
+	C14
+	C15
+)
+
+func (Reg) IsArg() {}
+func (r Reg) String(pc uint64) string {
+	s := "%"
+	switch {
+	case R0 <= r && r <= R15:
+		return fmt.Sprintf("%sr%d", s, int(r-R0))
+	case F0 <= r && r <= F15:
+		return fmt.Sprintf("%sf%d", s, int(r-F0))
+	case A0 <= r && r <= A15:
+		return fmt.Sprintf("%sa%d", s, int(r-A0))
+	case C0 <= r && r <= C15:
+		return fmt.Sprintf("%sc%d", s, int(r-C0))
+	default:
+		return fmt.Sprintf("Reg(%d)", int(r))
+	}
+}
+
+// VReg is a vector register. The zero value means V0, not the absence of a register.
+
+type VReg uint8
+
+const (
+	V0 VReg = iota
+	V1
+	V2
+	V3
+	V4
+	V5
+	V6
+	V7
+	V8
+	V9
+	V10
+	V11
+	V12
+	V13
+	V14
+	V15
+	V16
+	V17
+	V18
+	V19
+	V20
+	V21
+	V22
+	V23
+	V24
+	V25
+	V26
+	V27
+	V28
+	V29
+	V30
+	V31
+)
+
+func (VReg) IsArg() {}
+func (r VReg) String(pc uint64) string {
+	s := "%"
+	if V0 <= r && r <= V31 {
+		return fmt.Sprintf("%sv%d", s, int(r-V0))
+	} else {
+		return fmt.Sprintf("VReg(%d)", int(r))
+	}
+}
+
+// Imm represents an immediate number.
+type Imm uint32
+
+func (Imm) IsArg() {}
+func (i Imm) String(pc uint64) string {
+	return fmt.Sprintf("%d", uint32(i))
+}
+
+// Sign8 represents an 8-bit signed immediate number.
+type Sign8 int8
+
+func (Sign8) IsArg() {}
+func (i Sign8) String(pc uint64) string {
+	return fmt.Sprintf("%d", i)
+}
+
+// Sign16 represents an 16-bit signed immediate number.
+type Sign16 int16
+
+func (Sign16) IsArg() {}
+func (i Sign16) String(pc uint64) string {
+	return fmt.Sprintf("%d", i)
+}
+
+// Sign32 represents an 32-bit signed immediate number.
+type Sign32 int32
+
+func (Sign32) IsArg() {}
+func (i Sign32) String(pc uint64) string {
+	return fmt.Sprintf("%d", i)
+}
+
+// Mask represents an 4-bit mask value
+type Mask uint8
+
+func (Mask) IsArg() {}
+func (i Mask) String(pc uint64) string {
+	return fmt.Sprintf("%d", i)
+}
+
+// Len represents an 8-bit type holds 4/8-bit Len argument
+type Len uint8
+
+func (Len) IsArg() {}
+func (i Len) String(pc uint64) string {
+	return fmt.Sprintf("%d,", uint16(i)+1)
+}
diff --git a/s390x/s390xasm/tables.go b/s390x/s390xasm/tables.go
new file mode 100644
index 00000000..f0db5e90
--- /dev/null
+++ b/s390x/s390xasm/tables.go
@@ -0,0 +1,5046 @@
+// Code generated by s390xmap -fmt=decoder ../s390x.csv DO NOT EDIT.
+
+package s390xasm
+
+const (
+	_ Op = iota
+	A
+	AR
+	ARK
+	AY
+	AG
+	AGR
+	AGRK
+	AGF
+	AGFR
+	AXBR
+	AXTR
+	AXTRA
+	ADB
+	ADBR
+	ADTR
+	ADTRA
+	AEB
+	AEBR
+	AP
+	AH
+	AHY
+	AGH
+	AHI
+	AGHI
+	AHHHR
+	AHHLR
+	AFI
+	AHIK
+	ASI
+	AGHIK
+	AGFI
+	AGSI
+	AIH
+	AL
+	ALR
+	ALRK
+	ALY
+	ALG
+	ALGR
+	ALGRK
+	ALGF
+	ALGFR
+	ALHHHR
+	ALHHLR
+	ALFI
+	ALGFI
+	ALC
+	ALCR
+	ALCG
+	ALCGR
+	ALHSIK
+	ALSI
+	ALGHSIK
+	ALGSI
+	ALSIH
+	ALSIHN
+	AXR
+	AD
+	ADR
+	AE
+	AER
+	AW
+	AWR
+	AU
+	AUR
+	N
+	NR
+	NRK
+	NY
+	NG
+	NGR
+	NGRK
+	NC
+	NI
+	NIY
+	NIHH
+	NIHL
+	NIHF
+	NILH
+	NILL
+	NILF
+	NCRK
+	NCGRK
+	BAL
+	BALR
+	BAS
+	BASR
+	BASSM
+	BSA
+	BSM
+	BAKR
+	BSG
+	BIC
+	BC
+	BCR
+	BCT
+	BCTR
+	BCTG
+	BCTGR
+	BXH
+	BXHG
+	BXLE
+	BXLEG
+	BPP
+	BPRP
+	BRAS
+	BRASL
+	BRC
+	BRCL
+	BRCT
+	BRCTG
+	BRCTH
+	BRXH
+	BRXHG
+	BRXLE
+	BRXLG
+	XSCH
+	CKSM
+	KM
+	KMA
+	KMC
+	KMF
+	KMCTR
+	KMO
+	CSCH
+	C
+	CR
+	CY
+	CG
+	CGR
+	CGF
+	CGFR
+	CXBR
+	CXTR
+	CXR
+	CDB
+	CDBR
+	CDTR
+	CD
+	CDR
+	CEB
+	CEBR
+	CE
+	CER
+	CRB
+	CGRB
+	CRJ
+	CGRJ
+	CFC
+	CRDTE
+	KXBR
+	KXTR
+	KDB
+	KDBR
+	KDTR
+	KEB
+	KEBR
+	CS
+	CSY
+	CSG
+	CSP
+	CSPG
+	CSST
+	CRT
+	CGRT
+	CEXTR
+	CEDTR
+	CP
+	CDS
+	CDSY
+	CDSG
+	CH
+	CHY
+	CGH
+	CHHSI
+	CHI
+	CHSI
+	CGHI
+	CGHSI
+	CHRL
+	CGHRL
+	CHF
+	CHHR
+	CHLR
+	CFI
+	CGFI
+	CIB
+	CGIB
+	CIJ
+	CGIJ
+	CIT
+	CGIT
+	CIH
+	CL
+	CLR
+	CLY
+	CLG
+	CLGR
+	CLGF
+	CLGFR
+	CLC
+	CLI
+	CLIY
+	CLRB
+	CLGRB
+	CLRJ
+	CLGRJ
+	CLRT
+	CLT
+	CLGRT
+	CLGT
+	CLMH
+	CLM
+	CLMY
+	CLHF
+	CLHHR
+	CLHLR
+	CLHHSI
+	CLFI
+	CLFHSI
+	CLGHSI
+	CLGFI
+	CLIB
+	CLGIB
+	CLIJ
+	CLGIJ
+	CLFIT
+	CLGIT
+	CLIH
+	CLCL
+	CLCLE
+	CLCLU
+	CLRL
+	CLHRL
+	CLGRL
+	CLGHRL
+	CLGFRL
+	CLST
+	CRL
+	CGRL
+	CGFRL
+	CUSE
+	CMPSC
+	KDSA
+	KIMD
+	KLMD
+	KMAC
+	THDR
+	THDER
+	CXFBR
+	CXFBRA
+	CXFTR
+	CXFR
+	CDFBR
+	CDFBRA
+	CDFTR
+	CDFR
+	CEFBR
+	CEFBRA
+	CEFR
+	CXGBR
+	CXGBRA
+	CXGTR
+	CXGTRA
+	CXGR
+	CDGBR
+	CDGBRA
+	CDGTR
+	CDGTRA
+	CDGR
+	CEGBR
+	CEGBRA
+	CEGR
+	CXLFBR
+	CXLFTR
+	CDLFBR
+	CDLFTR
+	CELFBR
+	CXLGBR
+	CXLGTR
+	CDLGBR
+	CDLGTR
+	CELGBR
+	CXPT
+	CDPT
+	CXSTR
+	CDSTR
+	CXUTR
+	CDUTR
+	CXZT
+	CDZT
+	TBEDR
+	TBDR
+	CVB
+	CVBY
+	CVBG
+	CVD
+	CVDY
+	CVDG
+	CFXBR
+	CFXBRA
+	CGXBR
+	CGXBRA
+	CFXTR
+	CGXTR
+	CGXTRA
+	CFXR
+	CGXR
+	CFDBR
+	CFDBRA
+	CGDBR
+	CGDBRA
+	CFDTR
+	CGDTR
+	CGDTRA
+	CFDR
+	CGDR
+	CFEBR
+	CFEBRA
+	CGEBR
+	CGEBRA
+	CFER
+	CGER
+	CLFXBR
+	CLGXBR
+	CLFXTR
+	CLGXTR
+	CLFDBR
+	CLGDBR
+	CLFDTR
+	CLGDTR
+	CLFEBR
+	CLGEBR
+	CPXT
+	CPDT
+	CSXTR
+	CSDTR
+	CUXTR
+	CUDTR
+	CZXT
+	CZDT
+	CU24
+	CU21
+	CU12
+	CU14
+	CU42
+	CU41
+	CPYA
+	CPSDR
+	VSCSHP
+	VSCHP
+	DFLTCC
+	D
+	DR
+	DXBR
+	DXTR
+	DXTRA
+	DXR
+	DDB
+	DDBR
+	DDTR
+	DDTRA
+	DD
+	DDR
+	DEB
+	DEBR
+	DE
+	DER
+	DP
+	DL
+	DLR
+	DLG
+	DLGR
+	DSG
+	DSGR
+	DSGF
+	DSGFR
+	DIDBR
+	DIEBR
+	ED
+	EDMK
+	X
+	XR
+	XRK
+	XY
+	XG
+	XGR
+	XGRK
+	XC
+	XI
+	XIY
+	XIHF
+	XILF
+	EX
+	EXRL
+	EAR
+	ESEA
+	EEXTR
+	EEDTR
+	ECAG
+	ECTG
+	EFPC
+	EPAR
+	EPAIR
+	EPSW
+	ESAR
+	ESAIR
+	ESXTR
+	ESDTR
+	EREG
+	EREGG
+	ESTA
+	ETND
+	FLOGR
+	HSCH
+	HDR
+	HER
+	IAC
+	IEXTR
+	IEDTR
+	IC
+	ICY
+	ICMH
+	ICM
+	ICMY
+	IIHH
+	IIHL
+	IIHF
+	IILH
+	IILL
+	IILF
+	IPM
+	IPK
+	IRBM
+	ISKE
+	IVSK
+	IDTE
+	IPTE
+	L
+	LR
+	LY
+	LG
+	LGR
+	LGF
+	LGFR
+	LXR
+	LD
+	LDR
+	LDY
+	LE
+	LER
+	LEY
+	LAM
+	LAMY
+	LA
+	LAY
+	LAE
+	LAEY
+	LARL
+	LASP
+	LAA
+	LAAG
+	LAAL
+	LAALG
+	LAN
+	LANG
+	LAX
+	LAXG
+	LAO
+	LAOG
+	LT
+	LTR
+	LTG
+	LTGR
+	LTGF
+	LTGFR
+	LTXBR
+	LTXTR
+	LTXR
+	LTDBR
+	LTDTR
+	LTDR
+	LTEBR
+	LTER
+	LAT
+	LGAT
+	LZRF
+	LZRG
+	LBEAR
+	LB
+	LBR
+	LGB
+	LGBR
+	LBH
+	LCR
+	LCGR
+	LCGFR
+	LCXBR
+	LCXR
+	LCDBR
+	LCDR
+	LCDFR
+	LCEBR
+	LCER
+	LCTL
+	LCTLG
+	LCBB
+	FIXBR
+	FIXBRA
+	FIXTR
+	FIXR
+	FIDBR
+	FIDBRA
+	FIDTR
+	FIDR
+	FIEBR
+	FIEBRA
+	FIER
+	LFPC
+	LFAS
+	LDGR
+	LGDR
+	LGG
+	LGSC
+	LH
+	LHR
+	LHY
+	LGH
+	LGHR
+	LHH
+	LOCHHI
+	LHI
+	LGHI
+	LOCHI
+	LOCGHI
+	LHRL
+	LGHRL
+	LFH
+	LFHAT
+	LOCFH
+	LOCFHR
+	LGFI
+	LXDB
+	LXDBR
+	LXDTR
+	LXD
+	LXDR
+	LXEB
+	LXEBR
+	LXE
+	LXER
+	LDEB
+	LDEBR
+	LDETR
+	LDE
+	LDER
+	LLGF
+	LLGFR
+	LLGFSG
+	LLGFAT
+	LLZRGF
+	LLC
+	LLCR
+	LLGC
+	LLGCR
+	LLCH
+	LLH
+	LLHR
+	LLGH
+	LLGHR
+	LLHH
+	LLHRL
+	LLGHRL
+	LLIHH
+	LLIHL
+	LLIHF
+	LLILH
+	LLILL
+	LLILF
+	LLGFRL
+	LLGT
+	LLGTR
+	LLGTAT
+	LM
+	LMY
+	LMG
+	LMD
+	LMH
+	LNR
+	LNGR
+	LNGFR
+	LNXBR
+	LNXR
+	LNDBR
+	LNDR
+	LNDFR
+	LNEBR
+	LNER
+	LOC
+	LOCR
+	LOCG
+	LOCGR
+	LPTEA
+	LPD
+	LPDG
+	LPQ
+	LPR
+	LPGR
+	LPGFR
+	LPXBR
+	LPXR
+	LPDBR
+	LPDR
+	LPDFR
+	LPEBR
+	LPER
+	LPSW
+	LPSWE
+	LPSWEY
+	LRA
+	LRAY
+	LRAG
+	LRL
+	LGRL
+	LGFRL
+	LRVH
+	LRV
+	LRVR
+	LRVG
+	LRVGR
+	LDXBR
+	LDXBRA
+	LDXTR
+	LDXR
+	LRDR
+	LEXBR
+	LEXBRA
+	LEXR
+	LEDBR
+	LEDBRA
+	LEDTR
+	LEDR
+	LRER
+	LURA
+	LURAG
+	LZXR
+	LZDR
+	LZER
+	MSTA
+	MSCH
+	MC
+	MVHHI
+	MVHI
+	MVGHI
+	MVC
+	MVI
+	MVIY
+	MVCIN
+	MVCL
+	MVCLE
+	MVCLU
+	MVN
+	MVPG
+	MVCRL
+	MVST
+	MVCP
+	MVCS
+	MVCDK
+	MVCK
+	MVO
+	MVCOS
+	MVCSK
+	MVZ
+	MG
+	MGRK
+	M
+	MFY
+	MR
+	MXBR
+	MXTR
+	MXTRA
+	MXR
+	MDB
+	MDBR
+	MDTR
+	MDTRA
+	MD
+	MDR
+	MXDB
+	MXDBR
+	MXD
+	MXDR
+	MEEB
+	MEEBR
+	MEE
+	MEER
+	MDEB
+	MDEBR
+	MDE
+	MDER
+	ME
+	MER
+	MAY
+	MAYR
+	MADB
+	MADBR
+	MAD
+	MADR
+	MAEB
+	MAEBR
+	MAE
+	MAER
+	MAYH
+	MAYHR
+	MAYL
+	MAYLR
+	MSDB
+	MSDBR
+	MSD
+	MSDR
+	MSEB
+	MSEBR
+	MSE
+	MSER
+	MP
+	MH
+	MHY
+	MGH
+	MHI
+	MGHI
+	MLG
+	MLGR
+	ML
+	MLR
+	MS
+	MSC
+	MSR
+	MSRKC
+	MSY
+	MSG
+	MSGC
+	MSGR
+	MSGRKC
+	MSGF
+	MSGFR
+	MSFI
+	MSGFI
+	MYH
+	MYHR
+	MYL
+	MYLR
+	MY
+	MYR
+	NNRK
+	NNGRK
+	NNPA
+	NIAI
+	NTSTG
+	NORK
+	NOGRK
+	NXRK
+	NXGRK
+	O
+	OR
+	ORK
+	OY
+	OG
+	OGR
+	OGRK
+	OC
+	OI
+	OIY
+	OIHH
+	OIHL
+	OIHF
+	OILH
+	OILL
+	OILF
+	OCRK
+	OCGRK
+	PACK
+	PKA
+	PKU
+	PGIN
+	PGOUT
+	PCC
+	PCKMO
+	PFPO
+	PFMF
+	PLO
+	PPA
+	PRNO
+	PTFF
+	PTF
+	POPCNT
+	PFD
+	PFDRL
+	PC
+	PR
+	PT
+	PTI
+	PALB
+	PTLB
+	QAXTR
+	QADTR
+	QPACI
+	RRXTR
+	RRDTR
+	RCHP
+	RDP
+	RRBE
+	RRBM
+	RP
+	RSCH
+	RLL
+	RLLG
+	RNSBG
+	RXSBG
+	RISBG
+	RISBGN
+	RISBHG
+	RISBLG
+	ROSBG
+	SRST
+	SRSTU
+	SELR
+	SELGR
+	SELFHR
+	SAR
+	SAL
+	SAC
+	SACF
+	SAM24
+	SAM31
+	SAM64
+	SRNM
+	SRNMB
+	SCHM
+	SCK
+	SCKC
+	SCKPF
+	SPT
+	SRNMT
+	SFPC
+	SFASR
+	SPX
+	SPM
+	SPKA
+	SSAR
+	SSAIR
+	SSKE
+	SSM
+	SRP
+	SLDA
+	SLDL
+	SLA
+	SLAK
+	SLAG
+	SLL
+	SLLK
+	SLLG
+	SRDA
+	SRDL
+	SRA
+	SRAK
+	SRAG
+	SRL
+	SRLK
+	SRLG
+	SLXT
+	SLDT
+	SRXT
+	SRDT
+	SIGP
+	SORTL
+	SQXBR
+	SQXR
+	SQDB
+	SQDBR
+	SQD
+	SQDR
+	SQEB
+	SQEBR
+	SQE
+	SQER
+	SSCH
+	ST
+	STY
+	STG
+	STD
+	STDY
+	STE
+	STEY
+	STAM
+	STAMY
+	STBEAR
+	STCPS
+	STCRW
+	STC
+	STCY
+	STCH
+	STCMH
+	STCM
+	STCMY
+	STCK
+	STCKC
+	STCKE
+	STCKF
+	STCTL
+	STCTG
+	STAP
+	STIDP
+	STPT
+	STFL
+	STFLE
+	STFPC
+	STGSC
+	STH
+	STHY
+	STHH
+	STHRL
+	STFH
+	STOCFH
+	STM
+	STMY
+	STMG
+	STMH
+	STOC
+	STOCG
+	STPQ
+	STPX
+	STRAG
+	STRL
+	STGRL
+	STRVH
+	STRV
+	STRVG
+	STSCH
+	STSI
+	STNSM
+	STOSM
+	STURA
+	STURG
+	S
+	SR
+	SRK
+	SY
+	SG
+	SGR
+	SGRK
+	SGF
+	SGFR
+	SXBR
+	SXTR
+	SXTRA
+	SDB
+	SDBR
+	SDTR
+	SDTRA
+	SEB
+	SEBR
+	SP
+	SH
+	SHY
+	SGH
+	SHHHR
+	SHHLR
+	SL
+	SLR
+	SLRK
+	SLY
+	SLG
+	SLGR
+	SLGRK
+	SLGF
+	SLGFR
+	SLHHHR
+	SLHHLR
+	SLFI
+	SLGFI
+	SLB
+	SLBR
+	SLBG
+	SLBGR
+	SXR
+	SD
+	SDR
+	SE
+	SER
+	SW
+	SWR
+	SU
+	SUR
+	SVC
+	TAR
+	TAM
+	TS
+	TB
+	TCXB
+	TDCXT
+	TCDB
+	TDCDT
+	TCEB
+	TDCET
+	TDGXT
+	TDGDT
+	TDGET
+	TP
+	TPEI
+	TPI
+	TPROT
+	TSCH
+	TM
+	TMY
+	TMHH
+	TMHL
+	TMLH
+	TMLL
+	TMH
+	TML
+	TRACE
+	TRACG
+	TABORT
+	TBEGINC
+	TBEGIN
+	TEND
+	TR
+	TRT
+	TRTE
+	TRTR
+	TRTRE
+	TRE
+	TROO
+	TROT
+	TRTO
+	TRTT
+	TRAP2
+	TRAP4
+	UNPK
+	UNPKA
+	UNPKU
+	UPT
+	VA
+	VACC
+	VAP
+	VAC
+	VACCC
+	VN
+	VNC
+	VAVG
+	VAVGL
+	VBPERM
+	VCKSM
+	VCP
+	VCEQ
+	VCH
+	VCHL
+	VCSPH
+	VCVB
+	VCVBG
+	VCVD
+	VCVDG
+	VCLZDP
+	VCLZ
+	VCTZ
+	VDP
+	VEC
+	VECL
+	VERIM
+	VERLL
+	VERLLV
+	VESLV
+	VESL
+	VESRA
+	VESRAV
+	VESRL
+	VESRLV
+	VX
+	VFAE
+	VFEE
+	VFENE
+	VFA
+	WFK
+	VFCE
+	VFCH
+	VFCHE
+	WFC
+	VCLFNH
+	VCLFNL
+	VCRNF
+	VCFPS
+	VCDG
+	VCFPL
+	VCDLG
+	VCFN
+	VCSFP
+	VCGD
+	VCLFP
+	VCLGD
+	VCNF
+	VFD
+	VFLL
+	VFLR
+	VFMAX
+	VFMIN
+	VFM
+	VFMA
+	VFMS
+	VFNMA
+	VFNMS
+	VFPSO
+	VFSQ
+	VFS
+	VFTCI
+	VGFM
+	VGFMA
+	VGEF
+	VGEG
+	VGBM
+	VGM
+	VISTR
+	VL
+	VLR
+	VLREP
+	VLEBRH
+	VLEBRF
+	VLEBRG
+	VLBRREP
+	VLLEBRZ
+	VLBR
+	VLC
+	VLEH
+	VLEF
+	VLEG
+	VLEB
+	VLEIH
+	VLEIF
+	VLEIG
+	VLEIB
+	VLER
+	VFI
+	VLGV
+	VLIP
+	VLLEZ
+	VLM
+	VLP
+	VLRL
+	VLRLR
+	VLBB
+	VLVG
+	VLVGP
+	VLL
+	VMX
+	VMXL
+	VMRH
+	VMRL
+	VMN
+	VMNL
+	VMAE
+	VMAH
+	VMALE
+	VMALH
+	VMALO
+	VMAL
+	VMAO
+	VMSP
+	VMP
+	VME
+	VMH
+	VMLE
+	VMLH
+	VMLO
+	VML
+	VMO
+	VMSL
+	VNN
+	VNO
+	VNX
+	VO
+	VOC
+	VPK
+	VPKLS
+	VPKS
+	VPKZ
+	VPKZR
+	VPSOP
+	VPERM
+	VPDI
+	VPOPCT
+	VRP
+	VREP
+	VREPI
+	VSCEF
+	VSCEG
+	VSEL
+	VSDP
+	VSRP
+	VSRPR
+	VSL
+	VSLB
+	VSLD
+	VSLDB
+	VSRA
+	VSRAB
+	VSRD
+	VSRL
+	VSRLB
+	VSEG
+	VST
+	VSTEBRH
+	VSTEBRF
+	VSTEBRG
+	VSTBR
+	VSTEH
+	VSTEF
+	VSTEG
+	VSTEB
+	VSTER
+	VSTM
+	VSTRL
+	VSTRLR
+	VSTL
+	VSTRC
+	VSTRS
+	VS
+	VSCBI
+	VSP
+	VSBCBI
+	VSBI
+	VSUMG
+	VSUMQ
+	VSUM
+	VTP
+	VTM
+	VUPH
+	VUPLH
+	VUPLL
+	VUPL
+	VUPKZ
+	VUPKZH
+	VUPKZL
+	ZAP
+)
+
+var opstr = [...]string{
+	A:       "a",
+	AR:      "ar",
+	ARK:     "ark",
+	AY:      "ay",
+	AG:      "ag",
+	AGR:     "agr",
+	AGRK:    "agrk",
+	AGF:     "agf",
+	AGFR:    "agfr",
+	AXBR:    "axbr",
+	AXTR:    "axtr",
+	AXTRA:   "axtra",
+	ADB:     "adb",
+	ADBR:    "adbr",
+	ADTR:    "adtr",
+	ADTRA:   "adtra",
+	AEB:     "aeb",
+	AEBR:    "aebr",
+	AP:      "ap",
+	AH:      "ah",
+	AHY:     "ahy",
+	AGH:     "agh",
+	AHI:     "ahi",
+	AGHI:    "aghi",
+	AHHHR:   "ahhhr",
+	AHHLR:   "ahhlr",
+	AFI:     "afi",
+	AHIK:    "ahik",
+	ASI:     "asi",
+	AGHIK:   "aghik",
+	AGFI:    "agfi",
+	AGSI:    "agsi",
+	AIH:     "aih",
+	AL:      "al",
+	ALR:     "alr",
+	ALRK:    "alrk",
+	ALY:     "aly",
+	ALG:     "alg",
+	ALGR:    "algr",
+	ALGRK:   "algrk",
+	ALGF:    "algf",
+	ALGFR:   "algfr",
+	ALHHHR:  "alhhhr",
+	ALHHLR:  "alhhlr",
+	ALFI:    "alfi",
+	ALGFI:   "algfi",
+	ALC:     "alc",
+	ALCR:    "alcr",
+	ALCG:    "alcg",
+	ALCGR:   "alcgr",
+	ALHSIK:  "alhsik",
+	ALSI:    "alsi",
+	ALGHSIK: "alghsik",
+	ALGSI:   "algsi",
+	ALSIH:   "alsih",
+	ALSIHN:  "alsihn",
+	AXR:     "axr",
+	AD:      "ad",
+	ADR:     "adr",
+	AE:      "ae",
+	AER:     "aer",
+	AW:      "aw",
+	AWR:     "awr",
+	AU:      "au",
+	AUR:     "aur",
+	N:       "n",
+	NR:      "nr",
+	NRK:     "nrk",
+	NY:      "ny",
+	NG:      "ng",
+	NGR:     "ngr",
+	NGRK:    "ngrk",
+	NC:      "nc",
+	NI:      "ni",
+	NIY:     "niy",
+	NIHH:    "nihh",
+	NIHL:    "nihl",
+	NIHF:    "nihf",
+	NILH:    "nilh",
+	NILL:    "nill",
+	NILF:    "nilf",
+	NCRK:    "ncrk",
+	NCGRK:   "ncgrk",
+	BAL:     "bal",
+	BALR:    "balr",
+	BAS:     "bas",
+	BASR:    "basr",
+	BASSM:   "bassm",
+	BSA:     "bsa",
+	BSM:     "bsm",
+	BAKR:    "bakr",
+	BSG:     "bsg",
+	BIC:     "bic",
+	BC:      "bc",
+	BCR:     "bcr",
+	BCT:     "bct",
+	BCTR:    "bctr",
+	BCTG:    "bctg",
+	BCTGR:   "bctgr",
+	BXH:     "bxh",
+	BXHG:    "bxhg",
+	BXLE:    "bxle",
+	BXLEG:   "bxleg",
+	BPP:     "bpp",
+	BPRP:    "bprp",
+	BRAS:    "bras",
+	BRASL:   "brasl",
+	BRC:     "brc",
+	BRCL:    "brcl",
+	BRCT:    "brct",
+	BRCTG:   "brctg",
+	BRCTH:   "brcth",
+	BRXH:    "brxh",
+	BRXHG:   "brxhg",
+	BRXLE:   "brxle",
+	BRXLG:   "brxlg",
+	XSCH:    "xsch",
+	CKSM:    "cksm",
+	KM:      "km",
+	KMA:     "kma",
+	KMC:     "kmc",
+	KMF:     "kmf",
+	KMCTR:   "kmctr",
+	KMO:     "kmo",
+	CSCH:    "csch",
+	C:       "c",
+	CR:      "cr",
+	CY:      "cy",
+	CG:      "cg",
+	CGR:     "cgr",
+	CGF:     "cgf",
+	CGFR:    "cgfr",
+	CXBR:    "cxbr",
+	CXTR:    "cxtr",
+	CXR:     "cxr",
+	CDB:     "cdb",
+	CDBR:    "cdbr",
+	CDTR:    "cdtr",
+	CD:      "cd",
+	CDR:     "cdr",
+	CEB:     "ceb",
+	CEBR:    "cebr",
+	CE:      "ce",
+	CER:     "cer",
+	CRB:     "crb",
+	CGRB:    "cgrb",
+	CRJ:     "crj",
+	CGRJ:    "cgrj",
+	CFC:     "cfc",
+	CRDTE:   "crdte",
+	KXBR:    "kxbr",
+	KXTR:    "kxtr",
+	KDB:     "kdb",
+	KDBR:    "kdbr",
+	KDTR:    "kdtr",
+	KEB:     "keb",
+	KEBR:    "kebr",
+	CS:      "cs",
+	CSY:     "csy",
+	CSG:     "csg",
+	CSP:     "csp",
+	CSPG:    "cspg",
+	CSST:    "csst",
+	CRT:     "crt",
+	CGRT:    "cgrt",
+	CEXTR:   "cextr",
+	CEDTR:   "cedtr",
+	CP:      "cp",
+	CDS:     "cds",
+	CDSY:    "cdsy",
+	CDSG:    "cdsg",
+	CH:      "ch",
+	CHY:     "chy",
+	CGH:     "cgh",
+	CHHSI:   "chhsi",
+	CHI:     "chi",
+	CHSI:    "chsi",
+	CGHI:    "cghi",
+	CGHSI:   "cghsi",
+	CHRL:    "chrl",
+	CGHRL:   "cghrl",
+	CHF:     "chf",
+	CHHR:    "chhr",
+	CHLR:    "chlr",
+	CFI:     "cfi",
+	CGFI:    "cgfi",
+	CIB:     "cib",
+	CGIB:    "cgib",
+	CIJ:     "cij",
+	CGIJ:    "cgij",
+	CIT:     "cit",
+	CGIT:    "cgit",
+	CIH:     "cih",
+	CL:      "cl",
+	CLR:     "clr",
+	CLY:     "cly",
+	CLG:     "clg",
+	CLGR:    "clgr",
+	CLGF:    "clgf",
+	CLGFR:   "clgfr",
+	CLC:     "clc",
+	CLI:     "cli",
+	CLIY:    "cliy",
+	CLRB:    "clrb",
+	CLGRB:   "clgrb",
+	CLRJ:    "clrj",
+	CLGRJ:   "clgrj",
+	CLRT:    "clrt",
+	CLT:     "clt",
+	CLGRT:   "clgrt",
+	CLGT:    "clgt",
+	CLMH:    "clmh",
+	CLM:     "clm",
+	CLMY:    "clmy",
+	CLHF:    "clhf",
+	CLHHR:   "clhhr",
+	CLHLR:   "clhlr",
+	CLHHSI:  "clhhsi",
+	CLFI:    "clfi",
+	CLFHSI:  "clfhsi",
+	CLGHSI:  "clghsi",
+	CLGFI:   "clgfi",
+	CLIB:    "clib",
+	CLGIB:   "clgib",
+	CLIJ:    "clij",
+	CLGIJ:   "clgij",
+	CLFIT:   "clfit",
+	CLGIT:   "clgit",
+	CLIH:    "clih",
+	CLCL:    "clcl",
+	CLCLE:   "clcle",
+	CLCLU:   "clclu",
+	CLRL:    "clrl",
+	CLHRL:   "clhrl",
+	CLGRL:   "clgrl",
+	CLGHRL:  "clghrl",
+	CLGFRL:  "clgfrl",
+	CLST:    "clst",
+	CRL:     "crl",
+	CGRL:    "cgrl",
+	CGFRL:   "cgfrl",
+	CUSE:    "cuse",
+	CMPSC:   "cmpsc",
+	KDSA:    "kdsa",
+	KIMD:    "kimd",
+	KLMD:    "klmd",
+	KMAC:    "kmac",
+	THDR:    "thdr",
+	THDER:   "thder",
+	CXFBR:   "cxfbr",
+	CXFBRA:  "cxfbra",
+	CXFTR:   "cxftr",
+	CXFR:    "cxfr",
+	CDFBR:   "cdfbr",
+	CDFBRA:  "cdfbra",
+	CDFTR:   "cdftr",
+	CDFR:    "cdfr",
+	CEFBR:   "cefbr",
+	CEFBRA:  "cefbra",
+	CEFR:    "cefr",
+	CXGBR:   "cxgbr",
+	CXGBRA:  "cxgbra",
+	CXGTR:   "cxgtr",
+	CXGTRA:  "cxgtra",
+	CXGR:    "cxgr",
+	CDGBR:   "cdgbr",
+	CDGBRA:  "cdgbra",
+	CDGTR:   "cdgtr",
+	CDGTRA:  "cdgtra",
+	CDGR:    "cdgr",
+	CEGBR:   "cegbr",
+	CEGBRA:  "cegbra",
+	CEGR:    "cegr",
+	CXLFBR:  "cxlfbr",
+	CXLFTR:  "cxlftr",
+	CDLFBR:  "cdlfbr",
+	CDLFTR:  "cdlftr",
+	CELFBR:  "celfbr",
+	CXLGBR:  "cxlgbr",
+	CXLGTR:  "cxlgtr",
+	CDLGBR:  "cdlgbr",
+	CDLGTR:  "cdlgtr",
+	CELGBR:  "celgbr",
+	CXPT:    "cxpt",
+	CDPT:    "cdpt",
+	CXSTR:   "cxstr",
+	CDSTR:   "cdstr",
+	CXUTR:   "cxutr",
+	CDUTR:   "cdutr",
+	CXZT:    "cxzt",
+	CDZT:    "cdzt",
+	TBEDR:   "tbedr",
+	TBDR:    "tbdr",
+	CVB:     "cvb",
+	CVBY:    "cvby",
+	CVBG:    "cvbg",
+	CVD:     "cvd",
+	CVDY:    "cvdy",
+	CVDG:    "cvdg",
+	CFXBR:   "cfxbr",
+	CFXBRA:  "cfxbra",
+	CGXBR:   "cgxbr",
+	CGXBRA:  "cgxbra",
+	CFXTR:   "cfxtr",
+	CGXTR:   "cgxtr",
+	CGXTRA:  "cgxtra",
+	CFXR:    "cfxr",
+	CGXR:    "cgxr",
+	CFDBR:   "cfdbr",
+	CFDBRA:  "cfdbra",
+	CGDBR:   "cgdbr",
+	CGDBRA:  "cgdbra",
+	CFDTR:   "cfdtr",
+	CGDTR:   "cgdtr",
+	CGDTRA:  "cgdtra",
+	CFDR:    "cfdr",
+	CGDR:    "cgdr",
+	CFEBR:   "cfebr",
+	CFEBRA:  "cfebra",
+	CGEBR:   "cgebr",
+	CGEBRA:  "cgebra",
+	CFER:    "cfer",
+	CGER:    "cger",
+	CLFXBR:  "clfxbr",
+	CLGXBR:  "clgxbr",
+	CLFXTR:  "clfxtr",
+	CLGXTR:  "clgxtr",
+	CLFDBR:  "clfdbr",
+	CLGDBR:  "clgdbr",
+	CLFDTR:  "clfdtr",
+	CLGDTR:  "clgdtr",
+	CLFEBR:  "clfebr",
+	CLGEBR:  "clgebr",
+	CPXT:    "cpxt",
+	CPDT:    "cpdt",
+	CSXTR:   "csxtr",
+	CSDTR:   "csdtr",
+	CUXTR:   "cuxtr",
+	CUDTR:   "cudtr",
+	CZXT:    "czxt",
+	CZDT:    "czdt",
+	CU24:    "cu24",
+	CU21:    "cu21",
+	CU12:    "cu12",
+	CU14:    "cu14",
+	CU42:    "cu42",
+	CU41:    "cu41",
+	CPYA:    "cpya",
+	CPSDR:   "cpsdr",
+	VSCSHP:  "vscshp",
+	VSCHP:   "vschp",
+	DFLTCC:  "dfltcc",
+	D:       "d",
+	DR:      "dr",
+	DXBR:    "dxbr",
+	DXTR:    "dxtr",
+	DXTRA:   "dxtra",
+	DXR:     "dxr",
+	DDB:     "ddb",
+	DDBR:    "ddbr",
+	DDTR:    "ddtr",
+	DDTRA:   "ddtra",
+	DD:      "dd",
+	DDR:     "ddr",
+	DEB:     "deb",
+	DEBR:    "debr",
+	DE:      "de",
+	DER:     "der",
+	DP:      "dp",
+	DL:      "dl",
+	DLR:     "dlr",
+	DLG:     "dlg",
+	DLGR:    "dlgr",
+	DSG:     "dsg",
+	DSGR:    "dsgr",
+	DSGF:    "dsgf",
+	DSGFR:   "dsgfr",
+	DIDBR:   "didbr",
+	DIEBR:   "diebr",
+	ED:      "ed",
+	EDMK:    "edmk",
+	X:       "x",
+	XR:      "xr",
+	XRK:     "xrk",
+	XY:      "xy",
+	XG:      "xg",
+	XGR:     "xgr",
+	XGRK:    "xgrk",
+	XC:      "xc",
+	XI:      "xi",
+	XIY:     "xiy",
+	XIHF:    "xihf",
+	XILF:    "xilf",
+	EX:      "ex",
+	EXRL:    "exrl",
+	EAR:     "ear",
+	ESEA:    "esea",
+	EEXTR:   "eextr",
+	EEDTR:   "eedtr",
+	ECAG:    "ecag",
+	ECTG:    "ectg",
+	EFPC:    "efpc",
+	EPAR:    "epar",
+	EPAIR:   "epair",
+	EPSW:    "epsw",
+	ESAR:    "esar",
+	ESAIR:   "esair",
+	ESXTR:   "esxtr",
+	ESDTR:   "esdtr",
+	EREG:    "ereg",
+	EREGG:   "eregg",
+	ESTA:    "esta",
+	ETND:    "etnd",
+	FLOGR:   "flogr",
+	HSCH:    "hsch",
+	HDR:     "hdr",
+	HER:     "her",
+	IAC:     "iac",
+	IEXTR:   "iextr",
+	IEDTR:   "iedtr",
+	IC:      "ic",
+	ICY:     "icy",
+	ICMH:    "icmh",
+	ICM:     "icm",
+	ICMY:    "icmy",
+	IIHH:    "iihh",
+	IIHL:    "iihl",
+	IIHF:    "iihf",
+	IILH:    "iilh",
+	IILL:    "iill",
+	IILF:    "iilf",
+	IPM:     "ipm",
+	IPK:     "ipk",
+	IRBM:    "irbm",
+	ISKE:    "iske",
+	IVSK:    "ivsk",
+	IDTE:    "idte",
+	IPTE:    "ipte",
+	L:       "l",
+	LR:      "lr",
+	LY:      "ly",
+	LG:      "lg",
+	LGR:     "lgr",
+	LGF:     "lgf",
+	LGFR:    "lgfr",
+	LXR:     "lxr",
+	LD:      "ld",
+	LDR:     "ldr",
+	LDY:     "ldy",
+	LE:      "le",
+	LER:     "ler",
+	LEY:     "ley",
+	LAM:     "lam",
+	LAMY:    "lamy",
+	LA:      "la",
+	LAY:     "lay",
+	LAE:     "lae",
+	LAEY:    "laey",
+	LARL:    "larl",
+	LASP:    "lasp",
+	LAA:     "laa",
+	LAAG:    "laag",
+	LAAL:    "laal",
+	LAALG:   "laalg",
+	LAN:     "lan",
+	LANG:    "lang",
+	LAX:     "lax",
+	LAXG:    "laxg",
+	LAO:     "lao",
+	LAOG:    "laog",
+	LT:      "lt",
+	LTR:     "ltr",
+	LTG:     "ltg",
+	LTGR:    "ltgr",
+	LTGF:    "ltgf",
+	LTGFR:   "ltgfr",
+	LTXBR:   "ltxbr",
+	LTXTR:   "ltxtr",
+	LTXR:    "ltxr",
+	LTDBR:   "ltdbr",
+	LTDTR:   "ltdtr",
+	LTDR:    "ltdr",
+	LTEBR:   "ltebr",
+	LTER:    "lter",
+	LAT:     "lat",
+	LGAT:    "lgat",
+	LZRF:    "lzrf",
+	LZRG:    "lzrg",
+	LBEAR:   "lbear",
+	LB:      "lb",
+	LBR:     "lbr",
+	LGB:     "lgb",
+	LGBR:    "lgbr",
+	LBH:     "lbh",
+	LCR:     "lcr",
+	LCGR:    "lcgr",
+	LCGFR:   "lcgfr",
+	LCXBR:   "lcxbr",
+	LCXR:    "lcxr",
+	LCDBR:   "lcdbr",
+	LCDR:    "lcdr",
+	LCDFR:   "lcdfr",
+	LCEBR:   "lcebr",
+	LCER:    "lcer",
+	LCTL:    "lctl",
+	LCTLG:   "lctlg",
+	LCBB:    "lcbb",
+	FIXBR:   "fixbr",
+	FIXBRA:  "fixbra",
+	FIXTR:   "fixtr",
+	FIXR:    "fixr",
+	FIDBR:   "fidbr",
+	FIDBRA:  "fidbra",
+	FIDTR:   "fidtr",
+	FIDR:    "fidr",
+	FIEBR:   "fiebr",
+	FIEBRA:  "fiebra",
+	FIER:    "fier",
+	LFPC:    "lfpc",
+	LFAS:    "lfas",
+	LDGR:    "ldgr",
+	LGDR:    "lgdr",
+	LGG:     "lgg",
+	LGSC:    "lgsc",
+	LH:      "lh",
+	LHR:     "lhr",
+	LHY:     "lhy",
+	LGH:     "lgh",
+	LGHR:    "lghr",
+	LHH:     "lhh",
+	LOCHHI:  "lochhi",
+	LHI:     "lhi",
+	LGHI:    "lghi",
+	LOCHI:   "lochi",
+	LOCGHI:  "locghi",
+	LHRL:    "lhrl",
+	LGHRL:   "lghrl",
+	LFH:     "lfh",
+	LFHAT:   "lfhat",
+	LOCFH:   "locfh",
+	LOCFHR:  "locfhr",
+	LGFI:    "lgfi",
+	LXDB:    "lxdb",
+	LXDBR:   "lxdbr",
+	LXDTR:   "lxdtr",
+	LXD:     "lxd",
+	LXDR:    "lxdr",
+	LXEB:    "lxeb",
+	LXEBR:   "lxebr",
+	LXE:     "lxe",
+	LXER:    "lxer",
+	LDEB:    "ldeb",
+	LDEBR:   "ldebr",
+	LDETR:   "ldetr",
+	LDE:     "lde",
+	LDER:    "lder",
+	LLGF:    "llgf",
+	LLGFR:   "llgfr",
+	LLGFSG:  "llgfsg",
+	LLGFAT:  "llgfat",
+	LLZRGF:  "llzrgf",
+	LLC:     "llc",
+	LLCR:    "llcr",
+	LLGC:    "llgc",
+	LLGCR:   "llgcr",
+	LLCH:    "llch",
+	LLH:     "llh",
+	LLHR:    "llhr",
+	LLGH:    "llgh",
+	LLGHR:   "llghr",
+	LLHH:    "llhh",
+	LLHRL:   "llhrl",
+	LLGHRL:  "llghrl",
+	LLIHH:   "llihh",
+	LLIHL:   "llihl",
+	LLIHF:   "llihf",
+	LLILH:   "llilh",
+	LLILL:   "llill",
+	LLILF:   "llilf",
+	LLGFRL:  "llgfrl",
+	LLGT:    "llgt",
+	LLGTR:   "llgtr",
+	LLGTAT:  "llgtat",
+	LM:      "lm",
+	LMY:     "lmy",
+	LMG:     "lmg",
+	LMD:     "lmd",
+	LMH:     "lmh",
+	LNR:     "lnr",
+	LNGR:    "lngr",
+	LNGFR:   "lngfr",
+	LNXBR:   "lnxbr",
+	LNXR:    "lnxr",
+	LNDBR:   "lndbr",
+	LNDR:    "lndr",
+	LNDFR:   "lndfr",
+	LNEBR:   "lnebr",
+	LNER:    "lner",
+	LOC:     "loc",
+	LOCR:    "locr",
+	LOCG:    "locg",
+	LOCGR:   "locgr",
+	LPTEA:   "lptea",
+	LPD:     "lpd",
+	LPDG:    "lpdg",
+	LPQ:     "lpq",
+	LPR:     "lpr",
+	LPGR:    "lpgr",
+	LPGFR:   "lpgfr",
+	LPXBR:   "lpxbr",
+	LPXR:    "lpxr",
+	LPDBR:   "lpdbr",
+	LPDR:    "lpdr",
+	LPDFR:   "lpdfr",
+	LPEBR:   "lpebr",
+	LPER:    "lper",
+	LPSW:    "lpsw",
+	LPSWE:   "lpswe",
+	LPSWEY:  "lpswey",
+	LRA:     "lra",
+	LRAY:    "lray",
+	LRAG:    "lrag",
+	LRL:     "lrl",
+	LGRL:    "lgrl",
+	LGFRL:   "lgfrl",
+	LRVH:    "lrvh",
+	LRV:     "lrv",
+	LRVR:    "lrvr",
+	LRVG:    "lrvg",
+	LRVGR:   "lrvgr",
+	LDXBR:   "ldxbr",
+	LDXBRA:  "ldxbra",
+	LDXTR:   "ldxtr",
+	LDXR:    "ldxr",
+	LRDR:    "lrdr",
+	LEXBR:   "lexbr",
+	LEXBRA:  "lexbra",
+	LEXR:    "lexr",
+	LEDBR:   "ledbr",
+	LEDBRA:  "ledbra",
+	LEDTR:   "ledtr",
+	LEDR:    "ledr",
+	LRER:    "lrer",
+	LURA:    "lura",
+	LURAG:   "lurag",
+	LZXR:    "lzxr",
+	LZDR:    "lzdr",
+	LZER:    "lzer",
+	MSTA:    "msta",
+	MSCH:    "msch",
+	MC:      "mc",
+	MVHHI:   "mvhhi",
+	MVHI:    "mvhi",
+	MVGHI:   "mvghi",
+	MVC:     "mvc",
+	MVI:     "mvi",
+	MVIY:    "mviy",
+	MVCIN:   "mvcin",
+	MVCL:    "mvcl",
+	MVCLE:   "mvcle",
+	MVCLU:   "mvclu",
+	MVN:     "mvn",
+	MVPG:    "mvpg",
+	MVCRL:   "mvcrl",
+	MVST:    "mvst",
+	MVCP:    "mvcp",
+	MVCS:    "mvcs",
+	MVCDK:   "mvcdk",
+	MVCK:    "mvck",
+	MVO:     "mvo",
+	MVCOS:   "mvcos",
+	MVCSK:   "mvcsk",
+	MVZ:     "mvz",
+	MG:      "mg",
+	MGRK:    "mgrk",
+	M:       "m",
+	MFY:     "mfy",
+	MR:      "mr",
+	MXBR:    "mxbr",
+	MXTR:    "mxtr",
+	MXTRA:   "mxtra",
+	MXR:     "mxr",
+	MDB:     "mdb",
+	MDBR:    "mdbr",
+	MDTR:    "mdtr",
+	MDTRA:   "mdtra",
+	MD:      "md",
+	MDR:     "mdr",
+	MXDB:    "mxdb",
+	MXDBR:   "mxdbr",
+	MXD:     "mxd",
+	MXDR:    "mxdr",
+	MEEB:    "meeb",
+	MEEBR:   "meebr",
+	MEE:     "mee",
+	MEER:    "meer",
+	MDEB:    "mdeb",
+	MDEBR:   "mdebr",
+	MDE:     "mde",
+	MDER:    "mder",
+	ME:      "me",
+	MER:     "mer",
+	MAY:     "may",
+	MAYR:    "mayr",
+	MADB:    "madb",
+	MADBR:   "madbr",
+	MAD:     "mad",
+	MADR:    "madr",
+	MAEB:    "maeb",
+	MAEBR:   "maebr",
+	MAE:     "mae",
+	MAER:    "maer",
+	MAYH:    "mayh",
+	MAYHR:   "mayhr",
+	MAYL:    "mayl",
+	MAYLR:   "maylr",
+	MSDB:    "msdb",
+	MSDBR:   "msdbr",
+	MSD:     "msd",
+	MSDR:    "msdr",
+	MSEB:    "mseb",
+	MSEBR:   "msebr",
+	MSE:     "mse",
+	MSER:    "mser",
+	MP:      "mp",
+	MH:      "mh",
+	MHY:     "mhy",
+	MGH:     "mgh",
+	MHI:     "mhi",
+	MGHI:    "mghi",
+	MLG:     "mlg",
+	MLGR:    "mlgr",
+	ML:      "ml",
+	MLR:     "mlr",
+	MS:      "ms",
+	MSC:     "msc",
+	MSR:     "msr",
+	MSRKC:   "msrkc",
+	MSY:     "msy",
+	MSG:     "msg",
+	MSGC:    "msgc",
+	MSGR:    "msgr",
+	MSGRKC:  "msgrkc",
+	MSGF:    "msgf",
+	MSGFR:   "msgfr",
+	MSFI:    "msfi",
+	MSGFI:   "msgfi",
+	MYH:     "myh",
+	MYHR:    "myhr",
+	MYL:     "myl",
+	MYLR:    "mylr",
+	MY:      "my",
+	MYR:     "myr",
+	NNRK:    "nnrk",
+	NNGRK:   "nngrk",
+	NNPA:    "nnpa",
+	NIAI:    "niai",
+	NTSTG:   "ntstg",
+	NORK:    "nork",
+	NOGRK:   "nogrk",
+	NXRK:    "nxrk",
+	NXGRK:   "nxgrk",
+	O:       "o",
+	OR:      "or",
+	ORK:     "ork",
+	OY:      "oy",
+	OG:      "og",
+	OGR:     "ogr",
+	OGRK:    "ogrk",
+	OC:      "oc",
+	OI:      "oi",
+	OIY:     "oiy",
+	OIHH:    "oihh",
+	OIHL:    "oihl",
+	OIHF:    "oihf",
+	OILH:    "oilh",
+	OILL:    "oill",
+	OILF:    "oilf",
+	OCRK:    "ocrk",
+	OCGRK:   "ocgrk",
+	PACK:    "pack",
+	PKA:     "pka",
+	PKU:     "pku",
+	PGIN:    "pgin",
+	PGOUT:   "pgout",
+	PCC:     "pcc",
+	PCKMO:   "pckmo",
+	PFPO:    "pfpo",
+	PFMF:    "pfmf",
+	PLO:     "plo",
+	PPA:     "ppa",
+	PRNO:    "prno",
+	PTFF:    "ptff",
+	PTF:     "ptf",
+	POPCNT:  "popcnt",
+	PFD:     "pfd",
+	PFDRL:   "pfdrl",
+	PC:      "pc",
+	PR:      "pr",
+	PT:      "pt",
+	PTI:     "pti",
+	PALB:    "palb",
+	PTLB:    "ptlb",
+	QAXTR:   "qaxtr",
+	QADTR:   "qadtr",
+	QPACI:   "qpaci",
+	RRXTR:   "rrxtr",
+	RRDTR:   "rrdtr",
+	RCHP:    "rchp",
+	RDP:     "rdp",
+	RRBE:    "rrbe",
+	RRBM:    "rrbm",
+	RP:      "rp",
+	RSCH:    "rsch",
+	RLL:     "rll",
+	RLLG:    "rllg",
+	RNSBG:   "rnsbg",
+	RXSBG:   "rxsbg",
+	RISBG:   "risbg",
+	RISBGN:  "risbgn",
+	RISBHG:  "risbhg",
+	RISBLG:  "risblg",
+	ROSBG:   "rosbg",
+	SRST:    "srst",
+	SRSTU:   "srstu",
+	SELR:    "selr",
+	SELGR:   "selgr",
+	SELFHR:  "selfhr",
+	SAR:     "sar",
+	SAL:     "sal",
+	SAC:     "sac",
+	SACF:    "sacf",
+	SAM24:   "sam24",
+	SAM31:   "sam31",
+	SAM64:   "sam64",
+	SRNM:    "srnm",
+	SRNMB:   "srnmb",
+	SCHM:    "schm",
+	SCK:     "sck",
+	SCKC:    "sckc",
+	SCKPF:   "sckpf",
+	SPT:     "spt",
+	SRNMT:   "srnmt",
+	SFPC:    "sfpc",
+	SFASR:   "sfasr",
+	SPX:     "spx",
+	SPM:     "spm",
+	SPKA:    "spka",
+	SSAR:    "ssar",
+	SSAIR:   "ssair",
+	SSKE:    "sske",
+	SSM:     "ssm",
+	SRP:     "srp",
+	SLDA:    "slda",
+	SLDL:    "sldl",
+	SLA:     "sla",
+	SLAK:    "slak",
+	SLAG:    "slag",
+	SLL:     "sll",
+	SLLK:    "sllk",
+	SLLG:    "sllg",
+	SRDA:    "srda",
+	SRDL:    "srdl",
+	SRA:     "sra",
+	SRAK:    "srak",
+	SRAG:    "srag",
+	SRL:     "srl",
+	SRLK:    "srlk",
+	SRLG:    "srlg",
+	SLXT:    "slxt",
+	SLDT:    "sldt",
+	SRXT:    "srxt",
+	SRDT:    "srdt",
+	SIGP:    "sigp",
+	SORTL:   "sortl",
+	SQXBR:   "sqxbr",
+	SQXR:    "sqxr",
+	SQDB:    "sqdb",
+	SQDBR:   "sqdbr",
+	SQD:     "sqd",
+	SQDR:    "sqdr",
+	SQEB:    "sqeb",
+	SQEBR:   "sqebr",
+	SQE:     "sqe",
+	SQER:    "sqer",
+	SSCH:    "ssch",
+	ST:      "st",
+	STY:     "sty",
+	STG:     "stg",
+	STD:     "std",
+	STDY:    "stdy",
+	STE:     "ste",
+	STEY:    "stey",
+	STAM:    "stam",
+	STAMY:   "stamy",
+	STBEAR:  "stbear",
+	STCPS:   "stcps",
+	STCRW:   "stcrw",
+	STC:     "stc",
+	STCY:    "stcy",
+	STCH:    "stch",
+	STCMH:   "stcmh",
+	STCM:    "stcm",
+	STCMY:   "stcmy",
+	STCK:    "stck",
+	STCKC:   "stckc",
+	STCKE:   "stcke",
+	STCKF:   "stckf",
+	STCTL:   "stctl",
+	STCTG:   "stctg",
+	STAP:    "stap",
+	STIDP:   "stidp",
+	STPT:    "stpt",
+	STFL:    "stfl",
+	STFLE:   "stfle",
+	STFPC:   "stfpc",
+	STGSC:   "stgsc",
+	STH:     "sth",
+	STHY:    "sthy",
+	STHH:    "sthh",
+	STHRL:   "sthrl",
+	STFH:    "stfh",
+	STOCFH:  "stocfh",
+	STM:     "stm",
+	STMY:    "stmy",
+	STMG:    "stmg",
+	STMH:    "stmh",
+	STOC:    "stoc",
+	STOCG:   "stocg",
+	STPQ:    "stpq",
+	STPX:    "stpx",
+	STRAG:   "strag",
+	STRL:    "strl",
+	STGRL:   "stgrl",
+	STRVH:   "strvh",
+	STRV:    "strv",
+	STRVG:   "strvg",
+	STSCH:   "stsch",
+	STSI:    "stsi",
+	STNSM:   "stnsm",
+	STOSM:   "stosm",
+	STURA:   "stura",
+	STURG:   "sturg",
+	S:       "s",
+	SR:      "sr",
+	SRK:     "srk",
+	SY:      "sy",
+	SG:      "sg",
+	SGR:     "sgr",
+	SGRK:    "sgrk",
+	SGF:     "sgf",
+	SGFR:    "sgfr",
+	SXBR:    "sxbr",
+	SXTR:    "sxtr",
+	SXTRA:   "sxtra",
+	SDB:     "sdb",
+	SDBR:    "sdbr",
+	SDTR:    "sdtr",
+	SDTRA:   "sdtra",
+	SEB:     "seb",
+	SEBR:    "sebr",
+	SP:      "sp",
+	SH:      "sh",
+	SHY:     "shy",
+	SGH:     "sgh",
+	SHHHR:   "shhhr",
+	SHHLR:   "shhlr",
+	SL:      "sl",
+	SLR:     "slr",
+	SLRK:    "slrk",
+	SLY:     "sly",
+	SLG:     "slg",
+	SLGR:    "slgr",
+	SLGRK:   "slgrk",
+	SLGF:    "slgf",
+	SLGFR:   "slgfr",
+	SLHHHR:  "slhhhr",
+	SLHHLR:  "slhhlr",
+	SLFI:    "slfi",
+	SLGFI:   "slgfi",
+	SLB:     "slb",
+	SLBR:    "slbr",
+	SLBG:    "slbg",
+	SLBGR:   "slbgr",
+	SXR:     "sxr",
+	SD:      "sd",
+	SDR:     "sdr",
+	SE:      "se",
+	SER:     "ser",
+	SW:      "sw",
+	SWR:     "swr",
+	SU:      "su",
+	SUR:     "sur",
+	SVC:     "svc",
+	TAR:     "tar",
+	TAM:     "tam",
+	TS:      "ts",
+	TB:      "tb",
+	TCXB:    "tcxb",
+	TDCXT:   "tdcxt",
+	TCDB:    "tcdb",
+	TDCDT:   "tdcdt",
+	TCEB:    "tceb",
+	TDCET:   "tdcet",
+	TDGXT:   "tdgxt",
+	TDGDT:   "tdgdt",
+	TDGET:   "tdget",
+	TP:      "tp",
+	TPEI:    "tpei",
+	TPI:     "tpi",
+	TPROT:   "tprot",
+	TSCH:    "tsch",
+	TM:      "tm",
+	TMY:     "tmy",
+	TMHH:    "tmhh",
+	TMHL:    "tmhl",
+	TMLH:    "tmlh",
+	TMLL:    "tmll",
+	TMH:     "tmh",
+	TML:     "tml",
+	TRACE:   "trace",
+	TRACG:   "tracg",
+	TABORT:  "tabort",
+	TBEGINC: "tbeginc",
+	TBEGIN:  "tbegin",
+	TEND:    "tend",
+	TR:      "tr",
+	TRT:     "trt",
+	TRTE:    "trte",
+	TRTR:    "trtr",
+	TRTRE:   "trtre",
+	TRE:     "tre",
+	TROO:    "troo",
+	TROT:    "trot",
+	TRTO:    "trto",
+	TRTT:    "trtt",
+	TRAP2:   "trap2",
+	TRAP4:   "trap4",
+	UNPK:    "unpk",
+	UNPKA:   "unpka",
+	UNPKU:   "unpku",
+	UPT:     "upt",
+	VA:      "va",
+	VACC:    "vacc",
+	VAP:     "vap",
+	VAC:     "vac",
+	VACCC:   "vaccc",
+	VN:      "vn",
+	VNC:     "vnc",
+	VAVG:    "vavg",
+	VAVGL:   "vavgl",
+	VBPERM:  "vbperm",
+	VCKSM:   "vcksm",
+	VCP:     "vcp",
+	VCEQ:    "vceq",
+	VCH:     "vch",
+	VCHL:    "vchl",
+	VCSPH:   "vcsph",
+	VCVB:    "vcvb",
+	VCVBG:   "vcvbg",
+	VCVD:    "vcvd",
+	VCVDG:   "vcvdg",
+	VCLZDP:  "vclzdp",
+	VCLZ:    "vclz",
+	VCTZ:    "vctz",
+	VDP:     "vdp",
+	VEC:     "vec",
+	VECL:    "vecl",
+	VERIM:   "verim",
+	VERLL:   "verll",
+	VERLLV:  "verllv",
+	VESLV:   "veslv",
+	VESL:    "vesl",
+	VESRA:   "vesra",
+	VESRAV:  "vesrav",
+	VESRL:   "vesrl",
+	VESRLV:  "vesrlv",
+	VX:      "vx",
+	VFAE:    "vfae",
+	VFEE:    "vfee",
+	VFENE:   "vfene",
+	VFA:     "vfa",
+	WFK:     "wfk",
+	VFCE:    "vfce",
+	VFCH:    "vfch",
+	VFCHE:   "vfche",
+	WFC:     "wfc",
+	VCLFNH:  "vclfnh",
+	VCLFNL:  "vclfnl",
+	VCRNF:   "vcrnf",
+	VCFPS:   "vcfps",
+	VCDG:    "vcdg",
+	VCFPL:   "vcfpl",
+	VCDLG:   "vcdlg",
+	VCFN:    "vcfn",
+	VCSFP:   "vcsfp",
+	VCGD:    "vcgd",
+	VCLFP:   "vclfp",
+	VCLGD:   "vclgd",
+	VCNF:    "vcnf",
+	VFD:     "vfd",
+	VFLL:    "vfll",
+	VFLR:    "vflr",
+	VFMAX:   "vfmax",
+	VFMIN:   "vfmin",
+	VFM:     "vfm",
+	VFMA:    "vfma",
+	VFMS:    "vfms",
+	VFNMA:   "vfnma",
+	VFNMS:   "vfnms",
+	VFPSO:   "vfpso",
+	VFSQ:    "vfsq",
+	VFS:     "vfs",
+	VFTCI:   "vftci",
+	VGFM:    "vgfm",
+	VGFMA:   "vgfma",
+	VGEF:    "vgef",
+	VGEG:    "vgeg",
+	VGBM:    "vgbm",
+	VGM:     "vgm",
+	VISTR:   "vistr",
+	VL:      "vl",
+	VLR:     "vlr",
+	VLREP:   "vlrep",
+	VLEBRH:  "vlebrh",
+	VLEBRF:  "vlebrf",
+	VLEBRG:  "vlebrg",
+	VLBRREP: "vlbrrep",
+	VLLEBRZ: "vllebrz",
+	VLBR:    "vlbr",
+	VLC:     "vlc",
+	VLEH:    "vleh",
+	VLEF:    "vlef",
+	VLEG:    "vleg",
+	VLEB:    "vleb",
+	VLEIH:   "vleih",
+	VLEIF:   "vleif",
+	VLEIG:   "vleig",
+	VLEIB:   "vleib",
+	VLER:    "vler",
+	VFI:     "vfi",
+	VLGV:    "vlgv",
+	VLIP:    "vlip",
+	VLLEZ:   "vllez",
+	VLM:     "vlm",
+	VLP:     "vlp",
+	VLRL:    "vlrl",
+	VLRLR:   "vlrlr",
+	VLBB:    "vlbb",
+	VLVG:    "vlvg",
+	VLVGP:   "vlvgp",
+	VLL:     "vll",
+	VMX:     "vmx",
+	VMXL:    "vmxl",
+	VMRH:    "vmrh",
+	VMRL:    "vmrl",
+	VMN:     "vmn",
+	VMNL:    "vmnl",
+	VMAE:    "vmae",
+	VMAH:    "vmah",
+	VMALE:   "vmale",
+	VMALH:   "vmalh",
+	VMALO:   "vmalo",
+	VMAL:    "vmal",
+	VMAO:    "vmao",
+	VMSP:    "vmsp",
+	VMP:     "vmp",
+	VME:     "vme",
+	VMH:     "vmh",
+	VMLE:    "vmle",
+	VMLH:    "vmlh",
+	VMLO:    "vmlo",
+	VML:     "vml",
+	VMO:     "vmo",
+	VMSL:    "vmsl",
+	VNN:     "vnn",
+	VNO:     "vno",
+	VNX:     "vnx",
+	VO:      "vo",
+	VOC:     "voc",
+	VPK:     "vpk",
+	VPKLS:   "vpkls",
+	VPKS:    "vpks",
+	VPKZ:    "vpkz",
+	VPKZR:   "vpkzr",
+	VPSOP:   "vpsop",
+	VPERM:   "vperm",
+	VPDI:    "vpdi",
+	VPOPCT:  "vpopct",
+	VRP:     "vrp",
+	VREP:    "vrep",
+	VREPI:   "vrepi",
+	VSCEF:   "vscef",
+	VSCEG:   "vsceg",
+	VSEL:    "vsel",
+	VSDP:    "vsdp",
+	VSRP:    "vsrp",
+	VSRPR:   "vsrpr",
+	VSL:     "vsl",
+	VSLB:    "vslb",
+	VSLD:    "vsld",
+	VSLDB:   "vsldb",
+	VSRA:    "vsra",
+	VSRAB:   "vsrab",
+	VSRD:    "vsrd",
+	VSRL:    "vsrl",
+	VSRLB:   "vsrlb",
+	VSEG:    "vseg",
+	VST:     "vst",
+	VSTEBRH: "vstebrh",
+	VSTEBRF: "vstebrf",
+	VSTEBRG: "vstebrg",
+	VSTBR:   "vstbr",
+	VSTEH:   "vsteh",
+	VSTEF:   "vstef",
+	VSTEG:   "vsteg",
+	VSTEB:   "vsteb",
+	VSTER:   "vster",
+	VSTM:    "vstm",
+	VSTRL:   "vstrl",
+	VSTRLR:  "vstrlr",
+	VSTL:    "vstl",
+	VSTRC:   "vstrc",
+	VSTRS:   "vstrs",
+	VS:      "vs",
+	VSCBI:   "vscbi",
+	VSP:     "vsp",
+	VSBCBI:  "vsbcbi",
+	VSBI:    "vsbi",
+	VSUMG:   "vsumg",
+	VSUMQ:   "vsumq",
+	VSUM:    "vsum",
+	VTP:     "vtp",
+	VTM:     "vtm",
+	VUPH:    "vuph",
+	VUPLH:   "vuplh",
+	VUPLL:   "vupll",
+	VUPL:    "vupl",
+	VUPKZ:   "vupkz",
+	VUPKZH:  "vupkzh",
+	VUPKZL:  "vupkzl",
+	ZAP:     "zap",
+}
+
+var (
+	ap_Reg_8_11            = &argField{Type: TypeReg, flags: 0x1, BitField: BitField{8, 4}}
+	ap_DispUnsigned_20_31  = &argField{Type: TypeDispUnsigned, flags: 0x10, BitField: BitField{20, 12}}
+	ap_IndexReg_12_15      = &argField{Type: TypeIndexReg, flags: 0x41, BitField: BitField{12, 4}}
+	ap_BaseReg_16_19       = &argField{Type: TypeBaseReg, flags: 0x21, BitField: BitField{16, 4}}
+	ap_Reg_12_15           = &argField{Type: TypeReg, flags: 0x1, BitField: BitField{12, 4}}
+	ap_Reg_24_27           = &argField{Type: TypeReg, flags: 0x1, BitField: BitField{24, 4}}
+	ap_Reg_28_31           = &argField{Type: TypeReg, flags: 0x1, BitField: BitField{28, 4}}
+	ap_Reg_16_19           = &argField{Type: TypeReg, flags: 0x1, BitField: BitField{16, 4}}
+	ap_DispSigned20_20_39  = &argField{Type: TypeDispSigned20, flags: 0x10, BitField: BitField{20, 20}}
+	ap_FPReg_24_27         = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{24, 4}}
+	ap_FPReg_28_31         = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{28, 4}}
+	ap_FPReg_16_19         = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{16, 4}}
+	ap_Mask_20_23          = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{20, 4}}
+	ap_FPReg_8_11          = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{8, 4}}
+	ap_Len_8_11            = &argField{Type: TypeLen, flags: 0x10, BitField: BitField{8, 4}}
+	ap_DispUnsigned_36_47  = &argField{Type: TypeDispUnsigned, flags: 0x10, BitField: BitField{36, 12}}
+	ap_Len_12_15           = &argField{Type: TypeLen, flags: 0x10, BitField: BitField{12, 4}}
+	ap_BaseReg_32_35       = &argField{Type: TypeBaseReg, flags: 0x21, BitField: BitField{32, 4}}
+	ap_ImmSigned16_16_31   = &argField{Type: TypeImmSigned16, flags: 0x0, BitField: BitField{16, 16}}
+	ap_ImmSigned32_16_47   = &argField{Type: TypeImmSigned32, flags: 0x0, BitField: BitField{16, 32}}
+	ap_ImmSigned8_8_15     = &argField{Type: TypeImmSigned8, flags: 0x0, BitField: BitField{8, 8}}
+	ap_ImmUnsigned_16_47   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 32}}
+	ap_FPReg_12_15         = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{12, 4}}
+	ap_Len_8_15            = &argField{Type: TypeLen, flags: 0x10, BitField: BitField{8, 8}}
+	ap_ImmUnsigned_8_15    = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{8, 8}}
+	ap_ImmUnsigned_16_31   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 16}}
+	ap_Mask_8_11           = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{8, 4}}
+	ap_RegImSigned16_32_47 = &argField{Type: TypeRegImSigned16, flags: 0x80, BitField: BitField{32, 16}}
+	ap_RegImSigned12_12_23 = &argField{Type: TypeRegImSigned12, flags: 0x80, BitField: BitField{12, 12}}
+	ap_RegImSigned24_24_47 = &argField{Type: TypeRegImSigned24, flags: 0x80, BitField: BitField{24, 24}}
+	ap_RegImSigned16_16_31 = &argField{Type: TypeRegImSigned16, flags: 0x80, BitField: BitField{16, 16}}
+	ap_RegImSigned32_16_47 = &argField{Type: TypeRegImSigned32, flags: 0x80, BitField: BitField{16, 32}}
+	ap_Mask_32_35          = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{32, 4}}
+	ap_Mask_16_19          = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{16, 4}}
+	ap_ImmSigned16_32_47   = &argField{Type: TypeImmSigned16, flags: 0x0, BitField: BitField{32, 16}}
+	ap_ImmSigned8_32_39    = &argField{Type: TypeImmSigned8, flags: 0x0, BitField: BitField{32, 8}}
+	ap_Mask_12_15          = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{12, 4}}
+	ap_ImmUnsigned_32_47   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{32, 16}}
+	ap_ImmUnsigned_32_39   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{32, 8}}
+	ap_FPReg_32_35         = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{32, 4}}
+	ap_Mask_36_39          = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{36, 4}}
+	ap_ACReg_24_27         = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{24, 4}}
+	ap_ACReg_28_31         = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{28, 4}}
+	ap_VecReg_8_11         = &argField{Type: TypeVecReg, flags: 0x8, BitField: BitField{8, 4}}
+	ap_VecReg_12_15        = &argField{Type: TypeVecReg, flags: 0x8, BitField: BitField{12, 4}}
+	ap_VecReg_16_19        = &argField{Type: TypeVecReg, flags: 0x8, BitField: BitField{16, 4}}
+	ap_ImmUnsigned_36_39   = &argField{Type: TypeImmUnsigned, flags: 0xc00, BitField: BitField{36, 4}}
+	ap_Mask_24_27          = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{24, 4}}
+	ap_ACReg_8_11          = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{8, 4}}
+	ap_ACReg_12_15         = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{12, 4}}
+	ap_CReg_8_11           = &argField{Type: TypeCReg, flags: 0x4, BitField: BitField{8, 4}}
+	ap_CReg_12_15          = &argField{Type: TypeCReg, flags: 0x4, BitField: BitField{12, 4}}
+	ap_ImmUnsigned_24_27   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{24, 4}}
+	ap_ImmUnsigned_28_31   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{28, 4}}
+	ap_ImmUnsigned_16_23   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 8}}
+	ap_ImmUnsigned_24_31   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{24, 8}}
+	ap_ImmUnsigned_12_15   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{12, 4}}
+	ap_ImmUnsigned_28_35   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{28, 8}}
+	ap_VecReg_32_35        = &argField{Type: TypeVecReg, flags: 0x8, BitField: BitField{32, 4}}
+	ap_Mask_28_31          = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{28, 4}}
+	ap_ImmUnsigned_16_27   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 12}}
+	ap_ImmUnsigned_32_35   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{32, 4}}
+)
+
+var instFormats = [...]instFormat{
+	{A, 0xff00000000000000, 0x5a00000000000000, 0x0, // ADD (32) (A R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AR, 0xff00000000000000, 0x1a00000000000000, 0x0, // ADD (32) (AR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{ARK, 0xffff000000000000, 0xb9f8000000000000, 0xf0000000000, // ADD (32) (ARK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{AY, 0xff00000000ff0000, 0xe3000000005a0000, 0x0, // ADD (32) (AY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AG, 0xff00000000ff0000, 0xe300000000080000, 0x0, // ADD (64) (AG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AGR, 0xffff000000000000, 0xb908000000000000, 0xff0000000000, // ADD (64) (AGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{AGRK, 0xffff000000000000, 0xb9e8000000000000, 0xf0000000000, // ADD (64) (AGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{AGF, 0xff00000000ff0000, 0xe300000000180000, 0x0, // ADD (64←32) (AGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AGFR, 0xffff000000000000, 0xb918000000000000, 0xff0000000000, // ADD (64←32) (AGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{AXBR, 0xffff000000000000, 0xb34a000000000000, 0xff0000000000, // ADD (extended BFP) (AXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{AXTR, 0xffff000000000000, 0xb3da000000000000, 0xf0000000000, // ADD (extended DFP) (AXTR R1,R2,R3)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}},
+	{AXTRA, 0xffff000000000000, 0xb3da000000000000, 0x0, // ADD (extended DFP) (AXTRA R1,R2,R3,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}},
+	{ADB, 0xff00000000ff0000, 0xed000000001a0000, 0xff000000, // ADD (long BFP) (ADB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{ADBR, 0xffff000000000000, 0xb31a000000000000, 0xff0000000000, // ADD (long BFP) (ADBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{ADTR, 0xffff000000000000, 0xb3d2000000000000, 0xf0000000000, // ADD (long DFP) (ADTR R1,R2,R3)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}},
+	{ADTRA, 0xffff000000000000, 0xb3d2000000000000, 0x0, // ADD (long DFP) (ADTRA R1,R2,R3,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}},
+	{AEB, 0xff00000000ff0000, 0xed000000000a0000, 0xff000000, // ADD (short BFP) (AEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AEBR, 0xffff000000000000, 0xb30a000000000000, 0xff0000000000, // ADD (short BFP) (AEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{AP, 0xff00000000000000, 0xfa00000000000000, 0x0, // ADD DECIMAL (AP D1(L1,B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}},
+	{AH, 0xff00000000000000, 0x4a00000000000000, 0x0, // ADD HALFWORD (32←16) (AH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AHY, 0xff00000000ff0000, 0xe3000000007a0000, 0x0, // ADD HALFWORD (32←16) (AHY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AGH, 0xff00000000ff0000, 0xe300000000380000, 0x0, // ADD HALFWORD (64→16) (AGH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AHI, 0xff0f000000000000, 0xa70a000000000000, 0x0, // ADD HALFWORD IMMEDIATE (32←16) (AHI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
+	{AGHI, 0xff0f000000000000, 0xa70b000000000000, 0x0, // ADD HALFWORD IMMEDIATE (64←16) (AGHI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
+	{AHHHR, 0xffff000000000000, 0xb9c8000000000000, 0xf0000000000, // ADD HIGH (32) (AHHHR R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{AHHLR, 0xffff000000000000, 0xb9d8000000000000, 0xf0000000000, // ADD HIGH (32) (AHHLR R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{AFI, 0xff0f000000000000, 0xc209000000000000, 0x0, // ADD IMMEDIATE (32) (AFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
+	{AHIK, 0xff00000000ff0000, 0xec00000000d80000, 0xff000000, // ADD IMMEDIATE (32←16) (AHIK R1,R3,I2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmSigned16_16_31}},
+	{ASI, 0xff00000000ff0000, 0xeb000000006a0000, 0x0, // ADD IMMEDIATE (32←8) (ASI D1(B1),I2)
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}},
+	{AGHIK, 0xff00000000ff0000, 0xec00000000d90000, 0xff000000, // ADD IMMEDIATE (64←16) (AGHIK R1,R3,I2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmSigned16_16_31}},
+	{AGFI, 0xff0f000000000000, 0xc208000000000000, 0x0, // ADD IMMEDIATE (64←32) (AGFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
+	{AGSI, 0xff00000000ff0000, 0xeb000000007a0000, 0x0, // ADD IMMEDIATE (64←8) (AGSI D1(B1),I2)
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}},
+	{AIH, 0xff0f000000000000, 0xcc08000000000000, 0x0, // ADD IMMEDIATE HIGH (32) (AIH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
+	{AL, 0xff00000000000000, 0x5e00000000000000, 0x0, // ADD LOGICAL (32) (AL R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{ALR, 0xff00000000000000, 0x1e00000000000000, 0x0, // ADD LOGICAL (32) (ALR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{ALRK, 0xffff000000000000, 0xb9fa000000000000, 0xf0000000000, // ADD LOGICAL (32) (ALRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{ALY, 0xff00000000ff0000, 0xe3000000005e0000, 0x0, // ADD LOGICAL (32) (ALY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{ALG, 0xff00000000ff0000, 0xe3000000000a0000, 0x0, // ADD LOGICAL (64) (ALG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{ALGR, 0xffff000000000000, 0xb90a000000000000, 0xff0000000000, // ADD LOGICAL (64) (ALGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{ALGRK, 0xffff000000000000, 0xb9ea000000000000, 0xf0000000000, // ADD LOGICAL (64) (ALGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{ALGF, 0xff00000000ff0000, 0xe3000000001a0000, 0x0, // ADD LOGICAL (64←32) (ALGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{ALGFR, 0xffff000000000000, 0xb91a000000000000, 0xff0000000000, // ADD LOGICAL (64←32) (ALGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{ALHHHR, 0xffff000000000000, 0xb9ca000000000000, 0xf0000000000, // ADD LOGICAL HIGH (32) (ALHHHR R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{ALHHLR, 0xffff000000000000, 0xb9da000000000000, 0xf0000000000, // ADD LOGICAL HIGH (32) (ALHHLR R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{ALFI, 0xff0f000000000000, 0xc20b000000000000, 0x0, // ADD LOGICAL IMMEDIATE (32) (ALFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{ALGFI, 0xff0f000000000000, 0xc20a000000000000, 0x0, // ADD LOGICAL IMMEDIATE (64←32) (ALGFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{ALC, 0xff00000000ff0000, 0xe300000000980000, 0x0, // ADD LOGICAL WITH CARRY (32) (ALC R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{ALCR, 0xffff000000000000, 0xb998000000000000, 0xff0000000000, // ADD LOGICAL WITH CARRY (32) (ALCR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{ALCG, 0xff00000000ff0000, 0xe300000000880000, 0x0, // ADD LOGICAL WITH CARRY (64) (ALCG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{ALCGR, 0xffff000000000000, 0xb988000000000000, 0xff0000000000, // ADD LOGICAL WITH CARRY (64) (ALCGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{ALHSIK, 0xff00000000ff0000, 0xec00000000da0000, 0xff000000, // ADD LOGICAL WITH SIGNED IMMEDIATE(32→16) (ALHSIK R1,R3,I2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmSigned16_16_31}},
+	{ALSI, 0xff00000000ff0000, 0xeb000000006e0000, 0x0, // ADD LOGICAL WITH SIGNED IMMEDIATE (32←8) (ALSI D1(B1),I2)
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}},
+	{ALGHSIK, 0xff00000000ff0000, 0xec00000000db0000, 0xff000000, // ADD LOGICAL WITH SIGNED IMMEDIATE(64→16) (ALGHSIK R1,R3,I2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmSigned16_16_31}},
+	{ALGSI, 0xff00000000ff0000, 0xeb000000007e0000, 0x0, // ADD LOGICAL WITH SIGNED IMMEDIATE (64→8) (ALGSI D1(B1),I2)
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}},
+	{ALSIH, 0xff0f000000000000, 0xcc0a000000000000, 0x0, // ADD LOGICAL WITH SIGNED IMMEDIATE HIGH(32) (ALSIH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{ALSIHN, 0xff0f000000000000, 0xcc0b000000000000, 0x0, // ADD LOGICAL WITH SIGNED IMMEDIATE HIGH(32) (ALSIHN R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{AXR, 0xff00000000000000, 0x3600000000000000, 0x0, // ADD NORMALIZED (extended HFP) (AXR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{AD, 0xff00000000000000, 0x6a00000000000000, 0x0, // ADD NORMALIZED (long HFP) (AD R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{ADR, 0xff00000000000000, 0x2a00000000000000, 0x0, // ADD NORMALIZED (long HFP) (ADR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{AE, 0xff00000000000000, 0x7a00000000000000, 0x0, // ADD NORMALIZED (short HFP) (AE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AER, 0xff00000000000000, 0x3a00000000000000, 0x0, // ADD NORMALIZED (short HFP) (AER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{AW, 0xff00000000000000, 0x6e00000000000000, 0x0, // ADD UNNORMALIZED (long HFP) (AW R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AWR, 0xff00000000000000, 0x2e00000000000000, 0x0, // ADD UNNORMALIZED (long HFP) (AWR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{AU, 0xff00000000000000, 0x7e00000000000000, 0x0, // ADD UNNORMALIZED (short HFP) (AU R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{AUR, 0xff00000000000000, 0x3e00000000000000, 0x0, // ADD UNNORMALIZED (short HFP) (AUR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{N, 0xff00000000000000, 0x5400000000000000, 0x0, // AND (32) (N R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{NR, 0xff00000000000000, 0x1400000000000000, 0x0, // AND (32) (NR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{NRK, 0xffff000000000000, 0xb9f4000000000000, 0xf0000000000, // AND (32) (NRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{NY, 0xff00000000ff0000, 0xe300000000540000, 0x0, // AND (32) (NY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{NG, 0xff00000000ff0000, 0xe300000000800000, 0x0, // AND (64) (NG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{NGR, 0xffff000000000000, 0xb980000000000000, 0xff0000000000, // AND (64) (NGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{NGRK, 0xffff000000000000, 0xb9e4000000000000, 0xf0000000000, // AND (64) (NGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{NC, 0xff00000000000000, 0xd400000000000000, 0x0, // AND (character) (NC D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{NI, 0xff00000000000000, 0x9400000000000000, 0x0, // AND (immediate) (NI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{NIY, 0xff00000000ff0000, 0xeb00000000540000, 0x0, // AND (immediate) (NIY D1(B1),I2)
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{NIHH, 0xff0f000000000000, 0xa504000000000000, 0x0, // AND IMMEDIATE (high high) (NIHH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{NIHL, 0xff0f000000000000, 0xa505000000000000, 0x0, // AND IMMEDIATE (high low) (NIHL R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{NIHF, 0xff0f000000000000, 0xc00a000000000000, 0x0, // AND IMMEDIATE (high) (NIHF R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{NILH, 0xff0f000000000000, 0xa506000000000000, 0x0, // AND IMMEDIATE (low high) (NILH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{NILL, 0xff0f000000000000, 0xa507000000000000, 0x0, // AND IMMEDIATE (low low) (NILL R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{NILF, 0xff0f000000000000, 0xc00b000000000000, 0x0, // AND IMMEDIATE (low) (NILF R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{NCRK, 0xffff000000000000, 0xb9f5000000000000, 0xf0000000000, // AND WITH COMPLEMENT(32) (NCRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{NCGRK, 0xffff000000000000, 0xb9e5000000000000, 0xf0000000000, // AND WITH COMPLEMENT(64) (NCGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{BAL, 0xff00000000000000, 0x4500000000000000, 0x0, // BRANCH AND LINK (BAL R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{BALR, 0xff00000000000000, 0x500000000000000, 0x0, // BRANCH AND LINK (BALR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{BAS, 0xff00000000000000, 0x4d00000000000000, 0x0, // BRANCH AND SAVE (BAS R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{BASR, 0xff00000000000000, 0xd00000000000000, 0x0, // BRANCH AND SAVE (BASR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{BASSM, 0xff00000000000000, 0xc00000000000000, 0x0, // BRANCH AND SAVE AND SET MODE (BASSM R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{BSA, 0xffff000000000000, 0xb25a000000000000, 0xff0000000000, // BRANCH AND SET AUTHORITY (BSA R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{BSM, 0xff00000000000000, 0xb00000000000000, 0x0, // BRANCH AND SET MODE (BSM R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{BAKR, 0xffff000000000000, 0xb240000000000000, 0xff0000000000, // BRANCH AND STACK (BAKR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{BSG, 0xffff000000000000, 0xb258000000000000, 0xff0000000000, // BRANCH IN SUBSPACE GROUP (BSG R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{BIC, 0xff00000000ff0000, 0xe300000000470000, 0x0, // BRANCH INDIRECT ON CONDITION (BIC M1,D2(X2,B2))
+		[8]*argField{ap_Mask_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{BC, 0xff00000000000000, 0x4700000000000000, 0x0, // BRANCH ON CONDITION (BC M1,D2(X2,B2))
+		[8]*argField{ap_Mask_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{BCR, 0xff00000000000000, 0x700000000000000, 0x0, // BRANCH ON CONDITION (BCR M1,R2)
+		[8]*argField{ap_Mask_8_11, ap_Reg_12_15}},
+	{BCT, 0xff00000000000000, 0x4600000000000000, 0x0, // BRANCH ON COUNT (32) (BCT R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{BCTR, 0xff00000000000000, 0x600000000000000, 0x0, // BRANCH ON COUNT (32) (BCTR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{BCTG, 0xff00000000ff0000, 0xe300000000460000, 0x0, // BRANCH ON COUNT (64) (BCTG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{BCTGR, 0xffff000000000000, 0xb946000000000000, 0xff0000000000, // BRANCH ON COUNT (64) (BCTGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{BXH, 0xff00000000000000, 0x8600000000000000, 0x0, // BRANCH ON INDEX HIGH (32) (BXH R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{BXHG, 0xff00000000ff0000, 0xeb00000000440000, 0x0, // BRANCH ON INDEX HIGH (64) (BXHG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{BXLE, 0xff00000000000000, 0x8700000000000000, 0x0, // BRANCH ON INDEX LOW OR EQUAL (32) (BXLE R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{BXLEG, 0xff00000000ff0000, 0xeb00000000450000, 0x0, // BRANCH ON INDEX LOW OR EQUAL (64) (BXLEG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{BPP, 0xff00000000000000, 0xc700000000000000, 0xf000000000000, // BRANCH PREDICTION PRELOAD (BPP M1,RI2,D3(B3))
+		[8]*argField{ap_Mask_8_11, ap_RegImSigned16_32_47, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{BPRP, 0xff00000000000000, 0xc500000000000000, 0x0, // BRANCH PREDICTION RELATIVE PRELOAD (BPRP M1,RI2,RI3)
+		[8]*argField{ap_Mask_8_11, ap_RegImSigned12_12_23, ap_RegImSigned24_24_47}},
+	{BRAS, 0xff0f000000000000, 0xa705000000000000, 0x0, // BRANCH RELATIVE AND SAVE (BRAS R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned16_16_31}},
+	{BRASL, 0xff0f000000000000, 0xc005000000000000, 0x0, // BRANCH RELATIVE AND SAVE LONG (BRASL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{BRC, 0xff0f000000000000, 0xa704000000000000, 0x0, // BRANCH RELATIVE ON CONDITION (BRC M1,RI2)
+		[8]*argField{ap_Mask_8_11, ap_RegImSigned16_16_31}},
+	{BRCL, 0xff0f000000000000, 0xc004000000000000, 0x0, // BRANCH RELATIVE ON CONDITION LONG (BRCL M1,RI2)
+		[8]*argField{ap_Mask_8_11, ap_RegImSigned32_16_47}},
+	{BRCT, 0xff0f000000000000, 0xa706000000000000, 0x0, // BRANCH RELATIVE ON COUNT (32) (BRCT R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned16_16_31}},
+	{BRCTG, 0xff0f000000000000, 0xa707000000000000, 0x0, // BRANCH RELATIVE ON COUNT (64) (BRCTG R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned16_16_31}},
+	{BRCTH, 0xff0f000000000000, 0xcc06000000000000, 0x0, // BRANCH RELATIVE ON COUNT HIGH (32) (BRCTH R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{BRXH, 0xff00000000000000, 0x8400000000000000, 0x0, // BRANCH RELATIVE ON INDEX HIGH (32) (BRXH R1,R3,RI2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_RegImSigned16_16_31}},
+	{BRXHG, 0xff00000000ff0000, 0xec00000000440000, 0xff000000, // BRANCH RELATIVE ON INDEX HIGH (64) (BRXHG R1,R3,RI2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_RegImSigned16_16_31}},
+	{BRXLE, 0xff00000000000000, 0x8500000000000000, 0x0, // BRANCH RELATIVE ON INDEX LOW OR EQ. (32) (BRXLE R1,R3,RI2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_RegImSigned16_16_31}},
+	{BRXLG, 0xff00000000ff0000, 0xec00000000450000, 0xff000000, // BRANCH RELATIVE ON INDEX LOW OR EQ. (64) (BRXLG R1,R3,RI2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_RegImSigned16_16_31}},
+	{XSCH, 0xffff000000000000, 0xb276000000000000, 0xffff00000000, // CANCEL SUBCHANNEL (XSCH)
+		[8]*argField{}},
+	{CKSM, 0xffff000000000000, 0xb241000000000000, 0xff0000000000, // CHECKSUM (CKSM R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{KM, 0xffff000000000000, 0xb92e000000000000, 0xff0000000000, // CIPHER MESSAGE (KM R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{KMA, 0xffff000000000000, 0xb929000000000000, 0xf0000000000, // CIPHER MESSAGE WITH AUTHENTICATION (KMA R1,R3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31}},
+	{KMC, 0xffff000000000000, 0xb92f000000000000, 0xff0000000000, // CIPHER MESSAGE WITH CHAINING (KMC R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{KMF, 0xffff000000000000, 0xb92a000000000000, 0xff0000000000, // CIPHER MESSAGE WITH CIPHER FEEDBACK (KMF R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{KMCTR, 0xffff000000000000, 0xb92d000000000000, 0xf0000000000, // CIPHER MESSAGE WITH COUNTER (KMCTR R1,R3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31}},
+	{KMO, 0xffff000000000000, 0xb92b000000000000, 0xff0000000000, // CIPHER MESSAGE WITH OUTPUT FEEDBACK (KMO R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CSCH, 0xffff000000000000, 0xb230000000000000, 0xffff00000000, // CLEAR SUBCHANNEL (CSCH)
+		[8]*argField{}},
+	{C, 0xff00000000000000, 0x5900000000000000, 0x0, // COMPARE (32) (C R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CR, 0xff00000000000000, 0x1900000000000000, 0x0, // COMPARE (32) (CR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{CY, 0xff00000000ff0000, 0xe300000000590000, 0x0, // COMPARE (32) (CY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CG, 0xff00000000ff0000, 0xe300000000200000, 0x0, // COMPARE (64) (CG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CGR, 0xffff000000000000, 0xb920000000000000, 0xff0000000000, // COMPARE (64) (CGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CGF, 0xff00000000ff0000, 0xe300000000300000, 0x0, // COMPARE (64←32) (CGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CGFR, 0xffff000000000000, 0xb930000000000000, 0xff0000000000, // COMPARE (64←32) (CGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CXBR, 0xffff000000000000, 0xb349000000000000, 0xff0000000000, // COMPARE (extended BFP) (CXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{CXTR, 0xffff000000000000, 0xb3ec000000000000, 0xff0000000000, // COMPARE (extended DFP) (CXTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{CXR, 0xffff000000000000, 0xb369000000000000, 0xff0000000000, // COMPARE (extended HFP) (CXR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{CDB, 0xff00000000ff0000, 0xed00000000190000, 0xff000000, // COMPARE (long BFP) (CDB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CDBR, 0xffff000000000000, 0xb319000000000000, 0xff0000000000, // COMPARE (long BFP) (CDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{CDTR, 0xffff000000000000, 0xb3e4000000000000, 0xff0000000000, // COMPARE (long DFP) (CDTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{CD, 0xff00000000000000, 0x6900000000000000, 0x0, // COMPARE (long HFP) (CD R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CDR, 0xff00000000000000, 0x2900000000000000, 0x0, // COMPARE (long HFP) (CDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{CEB, 0xff00000000ff0000, 0xed00000000090000, 0xff000000, // COMPARE (short BFP) (CEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CEBR, 0xffff000000000000, 0xb309000000000000, 0xff0000000000, // COMPARE (short BFP) (CEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{CE, 0xff00000000000000, 0x7900000000000000, 0x0, // COMPARE (short HFP) (CE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CER, 0xff00000000000000, 0x3900000000000000, 0x0, // COMPARE (short HFP) (CER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{CRB, 0xff00000000ff0000, 0xec00000000f60000, 0xf000000, // COMPARE AND BRANCH (32) (CRB R1,R2,M3,D4(B4))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CGRB, 0xff00000000ff0000, 0xec00000000e40000, 0xf000000, // COMPARE AND BRANCH (64) (CGRB R1,R2,M3,D4(B4))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CRJ, 0xff00000000ff0000, 0xec00000000760000, 0xf000000, // COMPARE AND BRANCH RELATIVE (32) (CRJ R1,R2,M3,RI4)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_RegImSigned16_16_31}},
+	{CGRJ, 0xff00000000ff0000, 0xec00000000640000, 0xf000000, // COMPARE AND BRANCH RELATIVE (64) (CGRJ R1,R2,M3,RI4)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_RegImSigned16_16_31}},
+	{CFC, 0xffff000000000000, 0xb21a000000000000, 0x0, // COMPARE AND FORM CODEWORD (CFC D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CRDTE, 0xffff000000000000, 0xb98f000000000000, 0x0, // COMPARE AND REPLACE DAT TABLE ENTRY (CRDTE R1,R3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{KXBR, 0xffff000000000000, 0xb348000000000000, 0xff0000000000, // COMPARE AND SIGNAL (extended BFP) (KXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{KXTR, 0xffff000000000000, 0xb3e8000000000000, 0xff0000000000, // COMPARE AND SIGNAL (extended DFP) (KXTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{KDB, 0xff00000000ff0000, 0xed00000000180000, 0xff000000, // COMPARE AND SIGNAL (long BFP) (KDB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{KDBR, 0xffff000000000000, 0xb318000000000000, 0xff0000000000, // COMPARE AND SIGNAL (long BFP) (KDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{KDTR, 0xffff000000000000, 0xb3e0000000000000, 0xff0000000000, // COMPARE AND SIGNAL (long DFP) (KDTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{KEB, 0xff00000000ff0000, 0xed00000000080000, 0xff000000, // COMPARE AND SIGNAL (short BFP) (KEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{KEBR, 0xffff000000000000, 0xb308000000000000, 0xff0000000000, // COMPARE AND SIGNAL (short BFP) (KEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{CS, 0xff00000000000000, 0xba00000000000000, 0x0, // COMPARE AND SWAP (32) (CS R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CSY, 0xff00000000ff0000, 0xeb00000000140000, 0x0, // COMPARE AND SWAP (32) (CSY R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{CSG, 0xff00000000ff0000, 0xeb00000000300000, 0x0, // COMPARE AND SWAP (64) (CSG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{CSP, 0xffff000000000000, 0xb250000000000000, 0xff0000000000, // COMPARE AND SWAP AND PURGE (32) (CSP R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CSPG, 0xffff000000000000, 0xb98a000000000000, 0xff0000000000, // COMPARE AND SWAP AND PURGE (64) (CSPG R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CSST, 0xff0f000000000000, 0xc802000000000000, 0x0, // COMPARE AND SWAP AND STORE (CSST D1(B1),D2(B2),R3)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_8_11}},
+	{CRT, 0xffff000000000000, 0xb972000000000000, 0xf0000000000, // COMPARE AND TRAP (32) (CRT R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{CGRT, 0xffff000000000000, 0xb960000000000000, 0xf0000000000, // COMPARE AND TRAP (64) (CGRT R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{CEXTR, 0xffff000000000000, 0xb3fc000000000000, 0xff0000000000, // COMPARE BIASED EXPONENT (extended DFP) (CEXTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{CEDTR, 0xffff000000000000, 0xb3f4000000000000, 0xff0000000000, // COMPARE BIASED EXPONENT (long DFP) (CEDTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{CP, 0xff00000000000000, 0xf900000000000000, 0x0, // COMPARE DECIMAL (CP D1(L1,B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}},
+	{CDS, 0xff00000000000000, 0xbb00000000000000, 0x0, // COMPARE DOUBLE AND SWAP (32) (CDS R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CDSY, 0xff00000000ff0000, 0xeb00000000310000, 0x0, // COMPARE DOUBLE AND SWAP (32) (CDSY R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{CDSG, 0xff00000000ff0000, 0xeb000000003e0000, 0x0, // COMPARE DOUBLE AND SWAP (64) (CDSG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{CH, 0xff00000000000000, 0x4900000000000000, 0x0, // COMPARE HALFWORD (32→16) (CH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CHY, 0xff00000000ff0000, 0xe300000000790000, 0x0, // COMPARE HALFWORD (32→16) (CHY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CGH, 0xff00000000ff0000, 0xe300000000340000, 0x0, // COMPARE HALFWORD (64←16) (CGH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CHHSI, 0xffff000000000000, 0xe554000000000000, 0x0, // COMPARE HALFWORD IMMEDIATE (16→16) (CHHSI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned16_32_47}},
+	{CHI, 0xff0f000000000000, 0xa70e000000000000, 0x0, // COMPARE HALFWORD IMMEDIATE (32←16) (CHI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
+	{CHSI, 0xffff000000000000, 0xe55c000000000000, 0x0, // COMPARE HALFWORD IMMEDIATE (32←16) (CHSI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned16_32_47}},
+	{CGHI, 0xff0f000000000000, 0xa70f000000000000, 0x0, // COMPARE HALFWORD IMMEDIATE (64←16) (CGHI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
+	{CGHSI, 0xffff000000000000, 0xe558000000000000, 0x0, // COMPARE HALFWORD IMMEDIATE (64←16) (CGHSI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned16_32_47}},
+	{CHRL, 0xff0f000000000000, 0xc605000000000000, 0x0, // COMPAREHALFWORDRELATIVE LONG (32→16) (CHRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{CGHRL, 0xff0f000000000000, 0xc604000000000000, 0x0, // COMPAREHALFWORDRELATIVE LONG (64←16) (CGHRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{CHF, 0xff00000000ff0000, 0xe300000000cd0000, 0x0, // COMPARE HIGH (32) (CHF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CHHR, 0xffff000000000000, 0xb9cd000000000000, 0xff0000000000, // COMPARE HIGH (32) (CHHR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CHLR, 0xffff000000000000, 0xb9dd000000000000, 0xff0000000000, // COMPARE HIGH (32) (CHLR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CFI, 0xff0f000000000000, 0xc20d000000000000, 0x0, // COMPARE IMMEDIATE (32) (CFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
+	{CGFI, 0xff0f000000000000, 0xc20c000000000000, 0x0, // COMPARE IMMEDIATE (64←32) (CGFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
+	{CIB, 0xff00000000ff0000, 0xec00000000fe0000, 0x0, // COMPARE IMMEDIATE AND BRANCH (32←8) (CIB R1,I2,M3,D4(B4))
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned8_32_39, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CGIB, 0xff00000000ff0000, 0xec00000000fc0000, 0x0, // COMPARE IMMEDIATE AND BRANCH (64←8) (CGIB R1,I2,M3,D4(B4))
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned8_32_39, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CIJ, 0xff00000000ff0000, 0xec000000007e0000, 0x0, // COMPARE IMMEDIATE AND BRANCH RELATIVE(32→8) (CIJ R1,I2,M3,RI4)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned8_32_39, ap_Mask_12_15, ap_RegImSigned16_16_31}},
+	{CGIJ, 0xff00000000ff0000, 0xec000000007c0000, 0x0, // COMPARE IMMEDIATE AND BRANCH RELATIVE(64→8) (CGIJ R1,I2,M3,RI4)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned8_32_39, ap_Mask_12_15, ap_RegImSigned16_16_31}},
+	{CIT, 0xff00000000ff0000, 0xec00000000720000, 0xf00000f000000, // COMPARE IMMEDIATE AND TRAP (32→16) (CIT R1,I2,M3)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35}},
+	{CGIT, 0xff00000000ff0000, 0xec00000000700000, 0xf00000f000000, // COMPARE IMMEDIATE AND TRAP (64←16) (CGIT R1,I2,M3)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35}},
+	{CIH, 0xff0f000000000000, 0xcc0d000000000000, 0x0, // COMPARE IMMEDIATE HIGH (32) (CIH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
+	{CL, 0xff00000000000000, 0x5500000000000000, 0x0, // COMPARE LOGICAL (32) (CL R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CLR, 0xff00000000000000, 0x1500000000000000, 0x0, // COMPARE LOGICAL (32) (CLR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{CLY, 0xff00000000ff0000, 0xe300000000550000, 0x0, // COMPARE LOGICAL (32) (CLY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CLG, 0xff00000000ff0000, 0xe300000000210000, 0x0, // COMPARE LOGICAL (64) (CLG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CLGR, 0xffff000000000000, 0xb921000000000000, 0xff0000000000, // COMPARE LOGICAL (64) (CLGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CLGF, 0xff00000000ff0000, 0xe300000000310000, 0x0, // COMPARE LOGICAL (64→32) (CLGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CLGFR, 0xffff000000000000, 0xb931000000000000, 0xff0000000000, // COMPARE LOGICAL (64→32) (CLGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CLC, 0xff00000000000000, 0xd500000000000000, 0x0, // COMPARE LOGICAL (character) (CLC D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{CLI, 0xff00000000000000, 0x9500000000000000, 0x0, // COMPARE LOGICAL (immediate) (CLI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{CLIY, 0xff00000000ff0000, 0xeb00000000550000, 0x0, // COMPARE LOGICAL (immediate) (CLIY D1(B1),I2)
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{CLRB, 0xff00000000ff0000, 0xec00000000f70000, 0xf000000, // COMPARE LOGICAL AND BRANCH (32) (CLRB R1,R2,M3,D4(B4))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CLGRB, 0xff00000000ff0000, 0xec00000000e50000, 0xf000000, // COMPARE LOGICAL AND BRANCH (64) (CLGRB R1,R2,M3,D4(B4))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CLRJ, 0xff00000000ff0000, 0xec00000000770000, 0xf000000, // COMPARE LOGICAL AND BRANCH RELATIVE(32) (CLRJ R1,R2,M3,RI4)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_RegImSigned16_16_31}},
+	{CLGRJ, 0xff00000000ff0000, 0xec00000000650000, 0xf000000, // COMPARE LOGICAL AND BRANCH RELATIVE(64) (CLGRJ R1,R2,M3,RI4)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_Mask_32_35, ap_RegImSigned16_16_31}},
+	{CLRT, 0xffff000000000000, 0xb973000000000000, 0xf0000000000, // COMPARE LOGICAL AND TRAP (32) (CLRT R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{CLT, 0xff00000000ff0000, 0xeb00000000230000, 0x0, // COMPARE LOGICAL AND TRAP (32) (CLT R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{CLGRT, 0xffff000000000000, 0xb961000000000000, 0xf0000000000, // COMPARE LOGICAL AND TRAP (64) (CLGRT R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{CLGT, 0xff00000000ff0000, 0xeb000000002b0000, 0x0, // COMPARE LOGICAL AND TRAP (64) (CLGT R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{CLMH, 0xff00000000ff0000, 0xeb00000000200000, 0x0, // COMPARE LOGICAL CHAR. UNDER MASK (high) (CLMH R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{CLM, 0xff00000000000000, 0xbd00000000000000, 0x0, // COMPARE LOGICAL CHAR. UNDER MASK (low) (CLM R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CLMY, 0xff00000000ff0000, 0xeb00000000210000, 0x0, // COMPARE LOGICAL CHAR. UNDER MASK (low) (CLMY R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{CLHF, 0xff00000000ff0000, 0xe300000000cf0000, 0x0, // COMPARE LOGICAL HIGH (32) (CLHF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CLHHR, 0xffff000000000000, 0xb9cf000000000000, 0xff0000000000, // COMPARE LOGICAL HIGH (32) (CLHHR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CLHLR, 0xffff000000000000, 0xb9df000000000000, 0xff0000000000, // COMPARE LOGICAL HIGH (32) (CLHLR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CLHHSI, 0xffff000000000000, 0xe555000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE (16←16) (CLHHSI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}},
+	{CLFI, 0xff0f000000000000, 0xc20f000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE (32) (CLFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{CLFHSI, 0xffff000000000000, 0xe55d000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE (32←16) (CLFHSI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}},
+	{CLGHSI, 0xffff000000000000, 0xe559000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE (64←16) (CLGHSI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}},
+	{CLGFI, 0xff0f000000000000, 0xc20e000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE (64←32) (CLGFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{CLIB, 0xff00000000ff0000, 0xec00000000ff0000, 0x0, // COMPARE LOGICAL IMMEDIATE AND BRANCH(32←8) (CLIB R1,I2,M3,D4(B4))
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_32_39, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CLGIB, 0xff00000000ff0000, 0xec00000000fd0000, 0x0, // COMPARE LOGICAL IMMEDIATE AND BRANCH(64→8) (CLGIB R1,I2,M3,D4(B4))
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_32_39, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CLIJ, 0xff00000000ff0000, 0xec000000007f0000, 0x0, // COMPARE LOGICAL IMMEDIATE AND BRANCH (CLIJ R1,I2,M3,RI4)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_32_39, ap_Mask_12_15, ap_RegImSigned16_16_31}},
+	{CLGIJ, 0xff00000000ff0000, 0xec000000007d0000, 0x0, // RELATIVE (32→8)10COMPARE LOGICAL IMMEDIATE AND BRANCH (CLGIJ R1,I2,M3,RI4)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_32_39, ap_Mask_12_15, ap_RegImSigned16_16_31}},
+	{CLFIT, 0xff00000000ff0000, 0xec00000000730000, 0xf00000f000000, // RELATIVE (64→8)COMPARE LOGICAL IMMEDIATE AND TRAP(32→16) (CLFIT R1,I2,M3)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35}},
+	{CLGIT, 0xff00000000ff0000, 0xec00000000710000, 0xf00000f000000, // COMPARE LOGICAL IMMEDIATE AND TRAP(64←16) (CLGIT R1,I2,M3)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35}},
+	{CLIH, 0xff0f000000000000, 0xcc0f000000000000, 0x0, // COMPARE LOGICAL IMMEDIATE HIGH (32) (CLIH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{CLCL, 0xff00000000000000, 0xf00000000000000, 0x0, // COMPARE LOGICAL LONG (CLCL R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{CLCLE, 0xff00000000000000, 0xa900000000000000, 0x0, // COMPARE LOGICAL LONG EXTENDED (CLCLE R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{CLCLU, 0xff00000000ff0000, 0xeb000000008f0000, 0x0, // COMPARE LOGICAL LONG UNICODE (CLCLU R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{CLRL, 0xff0f000000000000, 0xc60f000000000000, 0x0, // COMPARE LOGICAL RELATIVE LONG (32) (CLRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{CLHRL, 0xff0f000000000000, 0xc607000000000000, 0x0, // COMPARE LOGICAL RELATIVE LONG (32→16) (CLHRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{CLGRL, 0xff0f000000000000, 0xc60a000000000000, 0x0, // COMPARE LOGICAL RELATIVE LONG (64) (CLGRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{CLGHRL, 0xff0f000000000000, 0xc606000000000000, 0x0, // COMPARE LOGICAL RELATIVE LONG (64→16) (CLGHRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{CLGFRL, 0xff0f000000000000, 0xc60e000000000000, 0x0, // COMPARE LOGICAL RELATIVE LONG (64→32) (CLGFRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{CLST, 0xffff000000000000, 0xb25d000000000000, 0xff0000000000, // COMPARE LOGICAL STRING (CLST R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CRL, 0xff0f000000000000, 0xc60d000000000000, 0x0, // COMPARE RELATIVE LONG (32) (CRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{CGRL, 0xff0f000000000000, 0xc608000000000000, 0x0, // COMPARE RELATIVE LONG (64) (CGRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{CGFRL, 0xff0f000000000000, 0xc60c000000000000, 0x0, // COMPARE RELATIVE LONG (64←32) (CGFRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{CUSE, 0xffff000000000000, 0xb257000000000000, 0xff0000000000, // COMPARE UNTIL SUBSTRING EQUAL (CUSE R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CMPSC, 0xffff000000000000, 0xb263000000000000, 0xff0000000000, // COMPRESSION CALL (CMPSC R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{KDSA, 0xffff000000000000, 0xb93a000000000000, 0xff0000000000, // COMPUTE DIGITAL SIGNATURE AUTHENTICATION (KDSA R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{KIMD, 0xffff000000000000, 0xb93e000000000000, 0xff0000000000, // COMPUTE INTERMEDIATE MESSAGE DIGEST (KIMD R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{KLMD, 0xffff000000000000, 0xb93f000000000000, 0xff0000000000, // COMPUTE LAST MESSAGE DIGEST (KLMD R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{KMAC, 0xffff000000000000, 0xb91e000000000000, 0xff0000000000, // COMPUTE MESSAGE AUTHENTICATION CODE (KMAC R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{THDR, 0xffff000000000000, 0xb359000000000000, 0xff0000000000, // CONVERT BFP TO HFP (long) (THDR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{THDER, 0xffff000000000000, 0xb358000000000000, 0xff0000000000, // CONVERT BFP TO HFP (short to long) (THDER R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{CXFBR, 0xffff000000000000, 0xb396000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to extended BFP) (CXFBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CXFBRA, 0xffff000000000000, 0xb396000000000000, 0x0, // CONVERT FROM FIXED (32 to extended BFP) (CXFBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CXFTR, 0xffff000000000000, 0xb959000000000000, 0x0, // CONVERT FROM FIXED (32 to extended DFP) (CXFTR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CXFR, 0xffff000000000000, 0xb3b6000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to extended HFP) (CXFR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CDFBR, 0xffff000000000000, 0xb395000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to long BFP) (CDFBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CDFBRA, 0xffff000000000000, 0xb395000000000000, 0x0, // CONVERT FROM FIXED (32 to long BFP) (CDFBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CDFTR, 0xffff000000000000, 0xb951000000000000, 0x0, // CONVERT FROM FIXED (32 to long DFP) (CDFTR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CDFR, 0xffff000000000000, 0xb3b5000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to long HFP) (CDFR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CEFBR, 0xffff000000000000, 0xb394000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to short BFP) (CEFBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CEFBRA, 0xffff000000000000, 0xb394000000000000, 0x0, // CONVERT FROM FIXED (32 to short BFP) (CEFBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CEFR, 0xffff000000000000, 0xb3b4000000000000, 0xff0000000000, // CONVERT FROM FIXED (32 to short HFP) (CEFR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CXGBR, 0xffff000000000000, 0xb3a6000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to extended BFP) (CXGBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CXGBRA, 0xffff000000000000, 0xb3a6000000000000, 0x0, // CONVERT FROM FIXED (64 to extended BFP) (CXGBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CXGTR, 0xffff000000000000, 0xb3f9000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to extended DFP) (CXGTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CXGTRA, 0xffff000000000000, 0xb3f9000000000000, 0x0, // CONVERT FROM FIXED (64 to extended DFP) (CXGTRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CXGR, 0xffff000000000000, 0xb3c6000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to extended HFP) (CXGR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CDGBR, 0xffff000000000000, 0xb3a5000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to long BFP) (CDGBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CDGBRA, 0xffff000000000000, 0xb3a5000000000000, 0x0, // CONVERT FROM FIXED (64 to long BFP) (CDGBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CDGTR, 0xffff000000000000, 0xb3f1000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to long DFP) (CDGTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CDGTRA, 0xffff000000000000, 0xb3f1000000000000, 0x0, // CONVERT FROM FIXED (64 to long DFP) (CDGTRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CDGR, 0xffff000000000000, 0xb3c5000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to long HFP) (CDGR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CEGBR, 0xffff000000000000, 0xb3a4000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to short BFP) (CEGBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CEGBRA, 0xffff000000000000, 0xb3a4000000000000, 0x0, // CONVERT FROM FIXED (64 to short BFP) (CEGBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CEGR, 0xffff000000000000, 0xb3c4000000000000, 0xff0000000000, // CONVERT FROM FIXED (64 to short HFP) (CEGR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CXLFBR, 0xffff000000000000, 0xb392000000000000, 0x0, // CONVERT FROM LOGICAL (32 to extended BFP) (CXLFBR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CXLFTR, 0xffff000000000000, 0xb95b000000000000, 0x0, // CONVERT FROM LOGICAL (32 to extended DFP) (CXLFTR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CDLFBR, 0xffff000000000000, 0xb391000000000000, 0x0, // CONVERT FROM LOGICAL (32 to long BFP) (CDLFBR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CDLFTR, 0xffff000000000000, 0xb953000000000000, 0x0, // CONVERT FROM LOGICAL (32 to long DFP) (CDLFTR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CELFBR, 0xffff000000000000, 0xb390000000000000, 0x0, // CONVERT FROM LOGICAL (32 to short BFP) (CELFBR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CXLGBR, 0xffff000000000000, 0xb3a2000000000000, 0x0, // CONVERT FROM LOGICAL (64 to extended BFP) (CXLGBR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CXLGTR, 0xffff000000000000, 0xb95a000000000000, 0x0, // CONVERT FROM LOGICAL (64 to extended DFP) (CXLGTR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CDLGBR, 0xffff000000000000, 0xb3a1000000000000, 0x0, // CONVERT FROM LOGICAL (64 to long BFP) (CDLGBR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CDLGTR, 0xffff000000000000, 0xb952000000000000, 0x0, // CONVERT FROM LOGICAL (64 to long DFP) (CDLGTR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CELGBR, 0xffff000000000000, 0xb3a0000000000000, 0x0, // CONVERT FROM LOGICAL (64 to short BFP) (CELGBR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{CXPT, 0xff00000000ff0000, 0xed00000000af0000, 0x0, // CONVERT FROM PACKED (to extended DFP) (CXPT R1,D2(L2,B2),M3)
+		[8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}},
+	{CDPT, 0xff00000000ff0000, 0xed00000000ae0000, 0x0, // CONVERT FROM PACKED (to long DFP) (CDPT R1,D2(L2,B2),M3)
+		[8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}},
+	{CXSTR, 0xffff000000000000, 0xb3fb000000000000, 0xff0000000000, // CONVERT FROM SIGNED PACKED (128 to extended DFP) (CXSTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CDSTR, 0xffff000000000000, 0xb3f3000000000000, 0xff0000000000, // CONVERT FROM SIGNED PACKED (64 to long DFP) (CDSTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CXUTR, 0xffff000000000000, 0xb3fa000000000000, 0xff0000000000, // CONVERT FROM UNSIGNED PACKED (128 to ext. DFP) (CXUTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CDUTR, 0xffff000000000000, 0xb3f2000000000000, 0xff0000000000, // CONVERT FROM UNSIGNED PACKED (64 to long DFP) (CDUTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{CXZT, 0xff00000000ff0000, 0xed00000000ab0000, 0x0, // CONVERT FROM ZONED (to extended DFP) (CXZT R1,D2(L2,B2),M3)
+		[8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}},
+	{CDZT, 0xff00000000ff0000, 0xed00000000aa0000, 0x0, // CONVERT FROM ZONED (to long DFP) (CDZT R1,D2(L2,B2),M3)
+		[8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}},
+	{TBEDR, 0xffff000000000000, 0xb350000000000000, 0xf0000000000, // CONVERT HFP TO BFP (long to short) (TBEDR R1,M3,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{TBDR, 0xffff000000000000, 0xb351000000000000, 0xf0000000000, // CONVERT HFP TO BFP (long) (TBDR R1,M3,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CVB, 0xff00000000000000, 0x4f00000000000000, 0x0, // CONVERT TO BINARY (32) (CVB R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CVBY, 0xff00000000ff0000, 0xe300000000060000, 0x0, // CONVERT TO BINARY (32) (CVBY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CVBG, 0xff00000000ff0000, 0xe3000000000e0000, 0x0, // CONVERT TO BINARY (64) (CVBG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CVD, 0xff00000000000000, 0x4e00000000000000, 0x0, // CONVERT TO DECIMAL (32) (CVD R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CVDY, 0xff00000000ff0000, 0xe300000000260000, 0x0, // CONVERT TO DECIMAL (32) (CVDY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CVDG, 0xff00000000ff0000, 0xe3000000002e0000, 0x0, // CONVERT TO DECIMAL (64) (CVDG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{CFXBR, 0xffff000000000000, 0xb39a000000000000, 0xf0000000000, // CONVERT TO FIXED (extended BFP to 32) (CFXBR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CFXBRA, 0xffff000000000000, 0xb39a000000000000, 0x0, // CONVERT TO FIXED (extended BFP to 32) (CFXBRA R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CGXBR, 0xffff000000000000, 0xb3aa000000000000, 0xf0000000000, // CONVERT TO FIXED (extended BFP to 64) (CGXBR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CGXBRA, 0xffff000000000000, 0xb3aa000000000000, 0x0, // CONVERT TO FIXED (extended BFP to 64) (CGXBRA R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CFXTR, 0xffff000000000000, 0xb949000000000000, 0x0, // CONVERT TO FIXED (extended DFP to 32) (CFXTR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CGXTR, 0xffff000000000000, 0xb3e9000000000000, 0xf0000000000, // CONVERT TO FIXED (extended DFP to 64) (CGXTR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CGXTRA, 0xffff000000000000, 0xb3e9000000000000, 0x0, // CONVERT TO FIXED (extended DFP to 64) (CGXTRA R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CFXR, 0xffff000000000000, 0xb3ba000000000000, 0xf0000000000, // CONVERT TO FIXED (extended HFP to 32) (CFXR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CGXR, 0xffff000000000000, 0xb3ca000000000000, 0xf0000000000, // CONVERT TO FIXED (extended HFP to 64) (CGXR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CFDBR, 0xffff000000000000, 0xb399000000000000, 0xf0000000000, // CONVERT TO FIXED (long BFP to 32) (CFDBR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CFDBRA, 0xffff000000000000, 0xb399000000000000, 0x0, // CONVERT TO FIXED (long BFP to 32) (CFDBRA R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CGDBR, 0xffff000000000000, 0xb3a9000000000000, 0xf0000000000, // CONVERT TO FIXED (long BFP to 64) (CGDBR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CGDBRA, 0xffff000000000000, 0xb3a9000000000000, 0x0, // CONVERT TO FIXED (long BFP to 64) (CGDBRA R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CFDTR, 0xffff000000000000, 0xb941000000000000, 0x0, // CONVERT TO FIXED (long DFP to 32) (CFDTR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CGDTR, 0xffff000000000000, 0xb3e1000000000000, 0xf0000000000, // CONVERT TO FIXED (long DFP to 64) (CGDTR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CGDTRA, 0xffff000000000000, 0xb3e1000000000000, 0x0, // CONVERT TO FIXED (long DFP to 64) (CGDTRA R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CFDR, 0xffff000000000000, 0xb3b9000000000000, 0xf0000000000, // CONVERT TO FIXED (long HFP to 32) (CFDR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CGDR, 0xffff000000000000, 0xb3c9000000000000, 0xf0000000000, // CONVERT TO FIXED (long HFP to 64) (CGDR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CFEBR, 0xffff000000000000, 0xb398000000000000, 0xf0000000000, // CONVERT TO FIXED (short BFP to 32) (CFEBR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CFEBRA, 0xffff000000000000, 0xb398000000000000, 0x0, // CONVERT TO FIXED (short BFP to 32) (CFEBRA R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CGEBR, 0xffff000000000000, 0xb3a8000000000000, 0xf0000000000, // CONVERT TO FIXED (short BFP to 64) (CGEBR R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CGEBRA, 0xffff000000000000, 0xb3a8000000000000, 0x0, // CONVERT TO FIXED (short BFP to 64) (CGEBRA R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CFER, 0xffff000000000000, 0xb3b8000000000000, 0xf0000000000, // CONVERT TO FIXED (short HFP to 32) (CFER R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CGER, 0xffff000000000000, 0xb3c8000000000000, 0xf0000000000, // CONVERT TO FIXED (short HFP to 64) (CGER R1,M3,R2)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{CLFXBR, 0xffff000000000000, 0xb39e000000000000, 0x0, // CONVERT TO LOGICAL (extended BFP to 32) (CLFXBR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CLGXBR, 0xffff000000000000, 0xb3ae000000000000, 0x0, // CONVERT TO LOGICAL (extended BFP to 64) (CLGXBR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CLFXTR, 0xffff000000000000, 0xb94b000000000000, 0x0, // CONVERT TO LOGICAL (extended DFP to 32) (CLFXTR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CLGXTR, 0xffff000000000000, 0xb94a000000000000, 0x0, // CONVERT TO LOGICAL (extended DFP to 64) (CLGXTR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CLFDBR, 0xffff000000000000, 0xb39d000000000000, 0x0, // CONVERT TO LOGICAL (long BFP to 32) (CLFDBR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CLGDBR, 0xffff000000000000, 0xb3ad000000000000, 0x0, // CONVERT TO LOGICAL (long BFP to 64) (CLGDBR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CLFDTR, 0xffff000000000000, 0xb943000000000000, 0x0, // CONVERT TO LOGICAL (long DFP to 32) (CLFDTR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CLGDTR, 0xffff000000000000, 0xb942000000000000, 0x0, // CONVERT TO LOGICAL (long DFP to 64) (CLGDTR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CLFEBR, 0xffff000000000000, 0xb39c000000000000, 0x0, // CONVERT TO LOGICAL (short BFP to 32) (CLFEBR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CLGEBR, 0xffff000000000000, 0xb3ac000000000000, 0x0, // CONVERT TO LOGICAL (short BFP to 64) (CLGEBR R1,M3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CPXT, 0xff00000000ff0000, 0xed00000000ad0000, 0x0, // CONVERT TO PACKED (from extended DFP) (CPXT R1,D2(L2,B2),M3)
+		[8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}},
+	{CPDT, 0xff00000000ff0000, 0xed00000000ac0000, 0x0, // CONVERT TO PACKED (from long DFP) (CPDT R1,D2(L2,B2),M3)
+		[8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}},
+	{CSXTR, 0xffff000000000000, 0xb3eb000000000000, 0xf00000000000, // CONVERT TO SIGNED PACKED (extended DFP to 128) (CSXTR R1,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CSDTR, 0xffff000000000000, 0xb3e3000000000000, 0xf00000000000, // CONVERT TO SIGNED PACKED (long DFP to 64) (CSDTR R1,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_FPReg_28_31, ap_Mask_20_23}},
+	{CUXTR, 0xffff000000000000, 0xb3ea000000000000, 0xff0000000000, // CONVERTTOUNSIGNEDPACKED(extendedDFP to 128) (CUXTR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_FPReg_28_31}},
+	{CUDTR, 0xffff000000000000, 0xb3e2000000000000, 0xff0000000000, // CONVERT TO UNSIGNED PACKED (long DFP to 64) (CUDTR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_FPReg_28_31}},
+	{CZXT, 0xff00000000ff0000, 0xed00000000a90000, 0x0, // CONVERT TO ZONED (from extended DFP) (CZXT R1,D2(L2,B2),M3)
+		[8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}},
+	{CZDT, 0xff00000000ff0000, 0xed00000000a80000, 0x0, // CONVERT TO ZONED (from long DFP) (CZDT R1,D2(L2,B2),M3)
+		[8]*argField{ap_FPReg_32_35, ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_Mask_36_39}},
+	{CU24, 0xffff000000000000, 0xb9b1000000000000, 0xf0000000000, // CONVERT UTF-16 TO UTF-32 (CU24 R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{CU21, 0xffff000000000000, 0xb2a6000000000000, 0xf0000000000, // CONVERT UTF-16 TO UTF-8 (CU21 R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{CU12, 0xffff000000000000, 0xb2a7000000000000, 0xf0000000000, // CONVERT UTF-8 TO UTF-16 (CU12 R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{CU14, 0xffff000000000000, 0xb9b0000000000000, 0xf0000000000, // CONVERT UTF-8 TO UTF-32 (CU14 R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{CU42, 0xffff000000000000, 0xb9b3000000000000, 0xff0000000000, // CONVERT UTF-32 TO UTF-16 (CU42 R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CU41, 0xffff000000000000, 0xb9b2000000000000, 0xff0000000000, // CONVERT UTF-32 TO UTF-8 (CU41 R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{CPYA, 0xffff000000000000, 0xb24d000000000000, 0xff0000000000, // COPY ACCESS (CPYA R1,R2)
+		[8]*argField{ap_ACReg_24_27, ap_ACReg_28_31}},
+	{CPSDR, 0xffff000000000000, 0xb372000000000000, 0xf0000000000, // COPY SIGN (long) (CPSDR R1,R3,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_FPReg_28_31}},
+	{VSCSHP, 0xff00000000ff0000, 0xe6000000007c0000, 0xffff0000000, // DECIMAL SCALE AND CONVERT AND SPLIT TO HFP (VSCSHP V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VSCHP, 0xff00000000ff0000, 0xe600000000740000, 0xf0f00000000, // DECIMAL SCALE AND CONVERT TO HFP (VSCHP V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{DFLTCC, 0xffff000000000000, 0xb939000000000000, 0xf0000000000, // DEFLATE CONVERSION CALL (DFLTCC R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{D, 0xff00000000000000, 0x5d00000000000000, 0x0, // DIVIDE (32→64) (D R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{DR, 0xff00000000000000, 0x1d00000000000000, 0x0, // DIVIDE (32←64) (DR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{DXBR, 0xffff000000000000, 0xb34d000000000000, 0xff0000000000, // DIVIDE (extended BFP) (DXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{DXTR, 0xffff000000000000, 0xb3d9000000000000, 0xf0000000000, // DIVIDE (extended DFP) (DXTR R1,R2,R3)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}},
+	{DXTRA, 0xffff000000000000, 0xb3d9000000000000, 0x0, // DIVIDE (extended DFP) (DXTRA R1,R2,R3,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}},
+	{DXR, 0xffff000000000000, 0xb22d000000000000, 0xff0000000000, // DIVIDE (extended HFP) (DXR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{DDB, 0xff00000000ff0000, 0xed000000001d0000, 0xff000000, // DIVIDE (long BFP) (DDB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{DDBR, 0xffff000000000000, 0xb31d000000000000, 0xff0000000000, // DIVIDE (long BFP) (DDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{DDTR, 0xffff000000000000, 0xb3d1000000000000, 0xf0000000000, // DIVIDE (long DFP) (DDTR R1,R2,R3)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}},
+	{DDTRA, 0xffff000000000000, 0xb3d1000000000000, 0x0, // DIVIDE (long DFP) (DDTRA R1,R2,R3,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}},
+	{DD, 0xff00000000000000, 0x6d00000000000000, 0x0, // DIVIDE (long HFP) (DD R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{DDR, 0xff00000000000000, 0x2d00000000000000, 0x0, // DIVIDE (long HFP) (DDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{DEB, 0xff00000000ff0000, 0xed000000000d0000, 0xff000000, // DIVIDE (short BFP) (DEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{DEBR, 0xffff000000000000, 0xb30d000000000000, 0xff0000000000, // DIVIDE (short BFP) (DEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{DE, 0xff00000000000000, 0x7d00000000000000, 0x0, // DIVIDE (short HFP) (DE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{DER, 0xff00000000000000, 0x3d00000000000000, 0x0, // DIVIDE (short HFP) (DER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{DP, 0xff00000000000000, 0xfd00000000000000, 0x0, // DIVIDE DECIMAL (DP D1(L1,B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}},
+	{DL, 0xff00000000ff0000, 0xe300000000970000, 0x0, // DIVIDE LOGICAL (32→64) (DL R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{DLR, 0xffff000000000000, 0xb997000000000000, 0xff0000000000, // DIVIDE LOGICAL (32←64) (DLR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{DLG, 0xff00000000ff0000, 0xe300000000870000, 0x0, // DIVIDE LOGICAL (64←128) (DLG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{DLGR, 0xffff000000000000, 0xb987000000000000, 0xff0000000000, // DIVIDE LOGICAL (64→128) (DLGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{DSG, 0xff00000000ff0000, 0xe3000000000d0000, 0x0, // DIVIDE SINGLE (64) (DSG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{DSGR, 0xffff000000000000, 0xb90d000000000000, 0xff0000000000, // DIVIDE SINGLE (64) (DSGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{DSGF, 0xff00000000ff0000, 0xe3000000001d0000, 0x0, // DIVIDE SINGLE (64←32) (DSGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{DSGFR, 0xffff000000000000, 0xb91d000000000000, 0xff0000000000, // DIVIDE SINGLE (64→32) (DSGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{DIDBR, 0xffff000000000000, 0xb35b000000000000, 0x0, // DIVIDE TO INTEGER (long BFP) (DIDBR R1,R3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{DIEBR, 0xffff000000000000, 0xb353000000000000, 0x0, // DIVIDE TO INTEGER (short BFP) (DIEBR R1,R3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{ED, 0xff00000000000000, 0xde00000000000000, 0x0, // EDIT (ED D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{EDMK, 0xff00000000000000, 0xdf00000000000000, 0x0, // EDIT AND MARK (EDMK D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{X, 0xff00000000000000, 0x5700000000000000, 0x0, // EXCLUSIVE OR (32) (X R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{XR, 0xff00000000000000, 0x1700000000000000, 0x0, // EXCLUSIVE OR (32) (XR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{XRK, 0xffff000000000000, 0xb9f7000000000000, 0xf0000000000, // EXCLUSIVE OR (32) (XRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{XY, 0xff00000000ff0000, 0xe300000000570000, 0x0, // EXCLUSIVE OR (32) (XY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{XG, 0xff00000000ff0000, 0xe300000000820000, 0x0, // EXCLUSIVE OR (64) (XG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{XGR, 0xffff000000000000, 0xb982000000000000, 0xff0000000000, // EXCLUSIVE OR (64) (XGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{XGRK, 0xffff000000000000, 0xb9e7000000000000, 0xf0000000000, // EXCLUSIVE OR (64) (XGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{XC, 0xff00000000000000, 0xd700000000000000, 0x0, // EXCLUSIVE OR (character) (XC D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{XI, 0xff00000000000000, 0x9700000000000000, 0x0, // EXCLUSIVE OR (immediate) (XI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{XIY, 0xff00000000ff0000, 0xeb00000000570000, 0x0, // EXCLUSIVE OR (immediate) (XIY D1(B1),I2)
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{XIHF, 0xff0f000000000000, 0xc006000000000000, 0x0, // EXCLUSIVE OR IMMEDIATE (high) (XIHF R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{XILF, 0xff0f000000000000, 0xc007000000000000, 0x0, // EXCLUSIVE OR IMMEDIATE (low) (XILF R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{EX, 0xff00000000000000, 0x4400000000000000, 0x0, // EXECUTE (EX R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{EXRL, 0xff0f000000000000, 0xc600000000000000, 0x0, // EXECUTE RELATIVE LONG (EXRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{EAR, 0xffff000000000000, 0xb24f000000000000, 0xff0000000000, // EXTRACT ACCESS (EAR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_ACReg_28_31}},
+	{ESEA, 0xffff000000000000, 0xb99d000000000000, 0xff0f00000000, // EXTRACT AND SET EXTENDED AUTHORITY (ESEA R1)
+		[8]*argField{ap_Reg_24_27}},
+	{EEXTR, 0xffff000000000000, 0xb3ed000000000000, 0xff0000000000, // EXTRACT BIASED EXPONENT (extended DFP to 64) (EEXTR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_FPReg_28_31}},
+	{EEDTR, 0xffff000000000000, 0xb3e5000000000000, 0xff0000000000, // EXTRACT BIASED EXPONENT (long DFP to 64) (EEDTR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_FPReg_28_31}},
+	{ECAG, 0xff00000000ff0000, 0xeb000000004c0000, 0x0, // EXTRACT CPU ATTRIBUTE (ECAG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{ECTG, 0xff0f000000000000, 0xc801000000000000, 0x0, // EXTRACT CPU TIME (ECTG D1(B1),D2(B2),R3)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_8_11}},
+	{EFPC, 0xffff000000000000, 0xb38c000000000000, 0xff0f00000000, // EXTRACT FPC (EFPC R1)
+		[8]*argField{ap_Reg_24_27}},
+	{EPAR, 0xffff000000000000, 0xb226000000000000, 0xff0f00000000, // EXTRACT PRIMARY ASN (EPAR R1)
+		[8]*argField{ap_Reg_24_27}},
+	{EPAIR, 0xffff000000000000, 0xb99a000000000000, 0xff0f00000000, // EXTRACT PRIMARY ASN AND INSTANCE (EPAIR R1)
+		[8]*argField{ap_Reg_24_27}},
+	{EPSW, 0xffff000000000000, 0xb98d000000000000, 0xff0000000000, // EXTRACT PSW (EPSW R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{ESAR, 0xffff000000000000, 0xb227000000000000, 0xff0f00000000, // EXTRACT SECONDARY ASN (ESAR R1)
+		[8]*argField{ap_Reg_24_27}},
+	{ESAIR, 0xffff000000000000, 0xb99b000000000000, 0xff0f00000000, // EXTRACT SECONDARY ASN AND INSTANCE (ESAIR R1)
+		[8]*argField{ap_Reg_24_27}},
+	{ESXTR, 0xffff000000000000, 0xb3ef000000000000, 0xff0000000000, // EXTRACT SIGNIFICANCE (extended DFP to 64) (ESXTR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_FPReg_28_31}},
+	{ESDTR, 0xffff000000000000, 0xb3e7000000000000, 0xff0000000000, // EXTRACT SIGNIFICANCE (long DFP to 64) (ESDTR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_FPReg_28_31}},
+	{EREG, 0xffff000000000000, 0xb249000000000000, 0xff0000000000, // EXTRACT STACKED REGISTERS (32) (EREG R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{EREGG, 0xffff000000000000, 0xb90e000000000000, 0xff0000000000, // EXTRACT STACKED REGISTERS (64) (EREGG R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{ESTA, 0xffff000000000000, 0xb24a000000000000, 0xff0000000000, // EXTRACT STACKED STATE (ESTA R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{ETND, 0xffff000000000000, 0xb2ec000000000000, 0xff0f00000000, // EXTRACT TRANSACTION NESTING DEPTH (ETND R1)
+		[8]*argField{ap_Reg_24_27}},
+	{FLOGR, 0xffff000000000000, 0xb983000000000000, 0xff0000000000, // FIND LEFTMOST ONE (FLOGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{HSCH, 0xffff000000000000, 0xb231000000000000, 0xffff00000000, // HALT SUBCHANNEL (HSCH)
+		[8]*argField{}},
+	{HDR, 0xff00000000000000, 0x2400000000000000, 0x0, // HALVE (long HFP) (HDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{HER, 0xff00000000000000, 0x3400000000000000, 0x0, // HALVE (short HFP) (HER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{IAC, 0xffff000000000000, 0xb224000000000000, 0xff0f00000000, // INSERT ADDRESS SPACE CONTROL (IAC R1)
+		[8]*argField{ap_Reg_24_27}},
+	{IEXTR, 0xffff000000000000, 0xb3fe000000000000, 0xf0000000000, // INSERT BIASED EXPONENT (64 to extended DFP) (IEXTR R1,R3,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_Reg_28_31}},
+	{IEDTR, 0xffff000000000000, 0xb3f6000000000000, 0xf0000000000, // INSERT BIASED EXPONENT (64 to long DFP) (IEDTR R1,R3,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_Reg_28_31}},
+	{IC, 0xff00000000000000, 0x4300000000000000, 0x0, // INSERT CHARACTER (IC R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{ICY, 0xff00000000ff0000, 0xe300000000730000, 0x0, // INSERT CHARACTER (ICY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{ICMH, 0xff00000000ff0000, 0xeb00000000800000, 0x0, // INSERT CHARACTERS UNDER MASK (high) (ICMH R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{ICM, 0xff00000000000000, 0xbf00000000000000, 0x0, // INSERT CHARACTERS UNDER MASK (low) (ICM R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{ICMY, 0xff00000000ff0000, 0xeb00000000810000, 0x0, // INSERT CHARACTERS UNDER MASK (low) (ICMY R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{IIHH, 0xff0f000000000000, 0xa500000000000000, 0x0, // INSERT IMMEDIATE (high high) (IIHH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{IIHL, 0xff0f000000000000, 0xa501000000000000, 0x0, // INSERT IMMEDIATE (high low) (IIHL R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{IIHF, 0xff0f000000000000, 0xc008000000000000, 0x0, // INSERT IMMEDIATE (high) (IIHF R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{IILH, 0xff0f000000000000, 0xa502000000000000, 0x0, // INSERT IMMEDIATE (low high) (IILH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{IILL, 0xff0f000000000000, 0xa503000000000000, 0x0, // INSERT IMMEDIATE (low low) (IILL R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{IILF, 0xff0f000000000000, 0xc009000000000000, 0x0, // INSERT IMMEDIATE (low) (IILF R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{IPM, 0xffff000000000000, 0xb222000000000000, 0xff0f00000000, // INSERT PROGRAM MASK (IPM R1)
+		[8]*argField{ap_Reg_24_27}},
+	{IPK, 0xffff000000000000, 0xb20b000000000000, 0xffff00000000, // INSERT PSW KEY (IPK)
+		[8]*argField{}},
+	{IRBM, 0xffff000000000000, 0xb9ac000000000000, 0xff0000000000, // INSERT REFERENCE BITS MULTIPLE (IRBM R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{ISKE, 0xffff000000000000, 0xb229000000000000, 0xff0000000000, // INSERT STORAGE KEY EXTENDED (ISKE R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{IVSK, 0xffff000000000000, 0xb223000000000000, 0xff0000000000, // INSERT VIRTUAL STORAGE KEY (IVSK R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{IDTE, 0xffff000000000000, 0xb98e000000000000, 0x0, // INVALIDATE DAT TABLE ENTRY (IDTE R1,R3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{IPTE, 0xffff000000000000, 0xb221000000000000, 0x0, // INVALIDATE PAGE TABLE ENTRY (IPTE R1,R2,R3,M4)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19, ap_Mask_20_23}},
+	{L, 0xff00000000000000, 0x5800000000000000, 0x0, // LOAD (32) (L R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LR, 0xff00000000000000, 0x1800000000000000, 0x0, // LOAD (32) (LR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{LY, 0xff00000000ff0000, 0xe300000000580000, 0x0, // LOAD (32) (LY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LG, 0xff00000000ff0000, 0xe300000000040000, 0x0, // LOAD (64) (LG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LGR, 0xffff000000000000, 0xb904000000000000, 0xff0000000000, // LOAD (64) (LGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LGF, 0xff00000000ff0000, 0xe300000000140000, 0x0, // LOAD (64←32) (LGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LGFR, 0xffff000000000000, 0xb914000000000000, 0xff0000000000, // LOAD (64←32) (LGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LXR, 0xffff000000000000, 0xb365000000000000, 0xff0000000000, // LOAD (extended) (LXR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LD, 0xff00000000000000, 0x6800000000000000, 0x0, // LOAD (long) (LD R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LDR, 0xff00000000000000, 0x2800000000000000, 0x0, // LOAD (long) (LDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LDY, 0xff00000000ff0000, 0xed00000000650000, 0x0, // LOAD (long) (LDY R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LE, 0xff00000000000000, 0x7800000000000000, 0x0, // LOAD (short) (LE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LER, 0xff00000000000000, 0x3800000000000000, 0x0, // LOAD (short) (LER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LEY, 0xff00000000ff0000, 0xed00000000640000, 0x0, // LOAD (short) (LEY R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LAM, 0xff00000000000000, 0x9a00000000000000, 0x0, // LOAD ACCESS MULTIPLE 7-268 (LAM R1,R3,D2(B2))
+		[8]*argField{ap_ACReg_8_11, ap_ACReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{LAMY, 0xff00000000ff0000, 0xeb000000009a0000, 0x0, // LOAD ACCESS MULTIPLE 7-268 (LAMY R1,R3,D2(B2))
+		[8]*argField{ap_ACReg_8_11, ap_ACReg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LA, 0xff00000000000000, 0x4100000000000000, 0x0, // LOAD ADDRESS (LA R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LAY, 0xff00000000ff0000, 0xe300000000710000, 0x0, // LOAD ADDRESS (LAY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LAE, 0xff00000000000000, 0x5100000000000000, 0x0, // LOAD ADDRESS EXTENDED (LAE R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LAEY, 0xff00000000ff0000, 0xe300000000750000, 0x0, // LOAD ADDRESS EXTENDED (LAEY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LARL, 0xff0f000000000000, 0xc000000000000000, 0x0, // LOAD ADDRESS RELATIVE LONG (LARL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{LASP, 0xffff000000000000, 0xe500000000000000, 0x0, // LOAD ADDRESS SPACE PARAMETERS (LASP D1(B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{LAA, 0xff00000000ff0000, 0xeb00000000f80000, 0x0, // LOAD AND ADD (32) (LAA R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LAAG, 0xff00000000ff0000, 0xeb00000000e80000, 0x0, // LOAD AND ADD (64) (LAAG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LAAL, 0xff00000000ff0000, 0xeb00000000fa0000, 0x0, // LOAD AND ADD LOGICAL (32) (LAAL R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LAALG, 0xff00000000ff0000, 0xeb00000000ea0000, 0x0, // LOAD AND ADD LOGICAL (64) (LAALG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LAN, 0xff00000000ff0000, 0xeb00000000f40000, 0x0, // LOAD AND AND (32) (LAN R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LANG, 0xff00000000ff0000, 0xeb00000000e40000, 0x0, // LOAD AND AND (64) (LANG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LAX, 0xff00000000ff0000, 0xeb00000000f70000, 0x0, // LOAD AND EXCLUSIVE OR (32) (LAX R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LAXG, 0xff00000000ff0000, 0xeb00000000e70000, 0x0, // LOAD AND EXCLUSIVE OR (64) (LAXG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LAO, 0xff00000000ff0000, 0xeb00000000f60000, 0x0, // LOAD AND OR (32) (LAO R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LAOG, 0xff00000000ff0000, 0xeb00000000e60000, 0x0, // LOAD AND OR (64) (LAOG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LT, 0xff00000000ff0000, 0xe300000000120000, 0x0, // LOAD AND TEST (32) (LT R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LTR, 0xff00000000000000, 0x1200000000000000, 0x0, // LOAD AND TEST (32) (LTR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{LTG, 0xff00000000ff0000, 0xe300000000020000, 0x0, // LOAD AND TEST (64) (LTG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LTGR, 0xffff000000000000, 0xb902000000000000, 0xff0000000000, // LOAD AND TEST (64) (LTGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LTGF, 0xff00000000ff0000, 0xe300000000320000, 0x0, // LOAD AND TEST (64→32) (LTGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LTGFR, 0xffff000000000000, 0xb912000000000000, 0xff0000000000, // LOAD AND TEST (64→32) (LTGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LTXBR, 0xffff000000000000, 0xb342000000000000, 0xff0000000000, // LOAD AND TEST (extended BFP) (LTXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LTXTR, 0xffff000000000000, 0xb3de000000000000, 0xff0000000000, // LOAD AND TEST (extended DFP) (LTXTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LTXR, 0xffff000000000000, 0xb362000000000000, 0xff0000000000, // LOAD AND TEST (extended HFP) (LTXR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LTDBR, 0xffff000000000000, 0xb312000000000000, 0xff0000000000, // LOAD AND TEST (long BFP) (LTDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LTDTR, 0xffff000000000000, 0xb3d6000000000000, 0xff0000000000, // LOAD AND TEST (long DFP) (LTDTR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LTDR, 0xff00000000000000, 0x2200000000000000, 0x0, // LOAD AND TEST (long HFP) (LTDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LTEBR, 0xffff000000000000, 0xb302000000000000, 0xff0000000000, // LOAD AND TEST (short BFP) (LTEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LTER, 0xff00000000000000, 0x3200000000000000, 0x0, // LOAD AND TEST (short HFP) (LTER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LAT, 0xff00000000ff0000, 0xe3000000009f0000, 0x0, // LOAD AND TRAP (32L→32) (LAT R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LGAT, 0xff00000000ff0000, 0xe300000000850000, 0x0, // LOAD AND TRAP (64) (LGAT R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LZRF, 0xff00000000ff0000, 0xe3000000003b0000, 0x0, // LOAD AND ZERO RIGHTMOST BYTE (32) (LZRF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LZRG, 0xff00000000ff0000, 0xe3000000002a0000, 0x0, // LOAD AND ZERO RIGHTMOST BYTE (64) (LZRG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LBEAR, 0xffff000000000000, 0xb200000000000000, 0x0, // LOAD BEAR (LBEAR D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{LB, 0xff00000000ff0000, 0xe300000000760000, 0x0, // LOAD BYTE (32→8) (LB R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LBR, 0xffff000000000000, 0xb926000000000000, 0xff0000000000, // LOAD BYTE (32←8) (LBR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LGB, 0xff00000000ff0000, 0xe300000000770000, 0x0, // LOAD BYTE (64→8) (LGB R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LGBR, 0xffff000000000000, 0xb906000000000000, 0xff0000000000, // LOAD BYTE (64←8) (LGBR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LBH, 0xff00000000ff0000, 0xe300000000c00000, 0x0, // LOAD BYTE HIGH (32←8) (LBH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LCR, 0xff00000000000000, 0x1300000000000000, 0x0, // LOAD COMPLEMENT (32) (LCR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{LCGR, 0xffff000000000000, 0xb903000000000000, 0xff0000000000, // LOAD COMPLEMENT (64) (LCGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LCGFR, 0xffff000000000000, 0xb913000000000000, 0xff0000000000, // LOAD COMPLEMENT (64←32) (LCGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LCXBR, 0xffff000000000000, 0xb343000000000000, 0xff0000000000, // LOAD COMPLEMENT (extended BFP) (LCXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LCXR, 0xffff000000000000, 0xb363000000000000, 0xff0000000000, // LOAD COMPLEMENT (extended HFP) (LCXR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LCDBR, 0xffff000000000000, 0xb313000000000000, 0xff0000000000, // LOAD COMPLEMENT (long BFP) (LCDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LCDR, 0xff00000000000000, 0x2300000000000000, 0x0, // LOAD COMPLEMENT (long HFP) (LCDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LCDFR, 0xffff000000000000, 0xb373000000000000, 0xff0000000000, // LOAD COMPLEMENT (long) (LCDFR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LCEBR, 0xffff000000000000, 0xb303000000000000, 0xff0000000000, // LOAD COMPLEMENT (short BFP) (LCEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LCER, 0xff00000000000000, 0x3300000000000000, 0x0, // LOAD COMPLEMENT (short HFP) (LCER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LCTL, 0xff00000000000000, 0xb700000000000000, 0x0, // LOAD CONTROL (32) (LCTL R1,R3,D2(B2))
+		[8]*argField{ap_CReg_8_11, ap_CReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{LCTLG, 0xff00000000ff0000, 0xeb000000002f0000, 0x0, // LOAD CONTROL (64) (LCTLG R1,R3,D2(B2))
+		[8]*argField{ap_CReg_8_11, ap_CReg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LCBB, 0xff00000000ff0000, 0xe700000000270000, 0xf000000, // LOAD COUNT TO BLOCK BOUNDARY (LCBB R1,D2(X2,B2),M3)
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35}},
+	{FIXBR, 0xffff000000000000, 0xb347000000000000, 0xf0000000000, // LOAD FP INTEGER (extended BFP) (FIXBR R1,M3,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{FIXBRA, 0xffff000000000000, 0xb347000000000000, 0x0, // LOAD FP INTEGER (extended BFP) (FIXBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{FIXTR, 0xffff000000000000, 0xb3df000000000000, 0x0, // LOAD FP INTEGER (extended DFP) (FIXTR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{FIXR, 0xffff000000000000, 0xb367000000000000, 0xff0000000000, // LOAD FP INTEGER (extended HFP) (FIXR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{FIDBR, 0xffff000000000000, 0xb35f000000000000, 0xf0000000000, // LOAD FP INTEGER (long BFP) (FIDBR R1,M3,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{FIDBRA, 0xffff000000000000, 0xb35f000000000000, 0x0, // LOAD FP INTEGER (long BFP) (FIDBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{FIDTR, 0xffff000000000000, 0xb3d7000000000000, 0x0, // LOAD FP INTEGER (long DFP) (FIDTR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{FIDR, 0xffff000000000000, 0xb37f000000000000, 0xff0000000000, // LOAD FP INTEGER (long HFP) (FIDR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{FIEBR, 0xffff000000000000, 0xb357000000000000, 0xf0000000000, // LOAD FP INTEGER (short BFP) (FIEBR R1,M3,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31}},
+	{FIEBRA, 0xffff000000000000, 0xb357000000000000, 0x0, // LOAD FP INTEGER (short BFP) (FIEBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{FIER, 0xffff000000000000, 0xb377000000000000, 0xff0000000000, // LOAD FP INTEGER (short HFP) (FIER R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LFPC, 0xffff000000000000, 0xb29d000000000000, 0x0, // LOAD FPC (LFPC D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{LFAS, 0xffff000000000000, 0xb2bd000000000000, 0x0, // LOAD FPC AND SIGNAL (LFAS D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{LDGR, 0xffff000000000000, 0xb3c1000000000000, 0xff0000000000, // LOAD FPR FROM GR (64 to long) (LDGR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_Reg_28_31}},
+	{LGDR, 0xffff000000000000, 0xb3cd000000000000, 0xff0000000000, // LOAD GR FROM FPR (long to 64) (LGDR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_FPReg_28_31}},
+	{LGG, 0xff00000000ff0000, 0xe3000000004c0000, 0x0, // LOAD GUARDED (64) (LGG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LGSC, 0xff00000000ff0000, 0xe3000000004d0000, 0x0, // LOAD GUARDED STORAGE CONTROLS (LGSC R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LH, 0xff00000000000000, 0x4800000000000000, 0x0, // LOAD HALFWORD (32→16) (LH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LHR, 0xffff000000000000, 0xb927000000000000, 0xff0000000000, // LOAD HALFWORD (32←16) (LHR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LHY, 0xff00000000ff0000, 0xe300000000780000, 0x0, // LOAD HALFWORD (32←16) (LHY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LGH, 0xff00000000ff0000, 0xe300000000150000, 0x0, // LOAD HALFWORD (64←16) (LGH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LGHR, 0xffff000000000000, 0xb907000000000000, 0xff0000000000, // LOAD HALFWORD (64←16) (LGHR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LHH, 0xff00000000ff0000, 0xe300000000c40000, 0x0, // LOAD HALFWORD HIGH (32→16) (LHH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LOCHHI, 0xff00000000ff0000, 0xec000000004e0000, 0xff000000, // LOAD HALFWORD HIGH IMMEDIATE ON (LOCHHI R1,I2,M3)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31, ap_Mask_12_15}},
+	{LHI, 0xff0f000000000000, 0xa708000000000000, 0x0, // CONDITION (32←16)LOAD HALFWORD IMMEDIATE (32)←16 (LHI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
+	{LGHI, 0xff0f000000000000, 0xa709000000000000, 0x0, // LOAD HALFWORD IMMEDIATE (64→16) (LGHI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
+	{LOCHI, 0xff00000000ff0000, 0xec00000000420000, 0xff000000, // LOAD HALFWORD IMMEDIATE ON CONDITION(32←16) (LOCHI R1,I2,M3)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31, ap_Mask_12_15}},
+	{LOCGHI, 0xff00000000ff0000, 0xec00000000460000, 0xff000000, // LOAD HALFWORD IMMEDIATE ON CONDITION(64→16) (LOCGHI R1,I2,M3)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31, ap_Mask_12_15}},
+	{LHRL, 0xff0f000000000000, 0xc405000000000000, 0x0, // LOAD HALFWORD RELATIVE LONG (32←16) (LHRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{LGHRL, 0xff0f000000000000, 0xc404000000000000, 0x0, // LOAD HALFWORD RELATIVE LONG (64←16) (LGHRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{LFH, 0xff00000000ff0000, 0xe300000000ca0000, 0x0, // LOAD HIGH (32) (LFH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LFHAT, 0xff00000000ff0000, 0xe300000000c80000, 0x0, // LOAD HIGH AND TRAP (32H←32) (LFHAT R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LOCFH, 0xff00000000ff0000, 0xeb00000000e00000, 0x0, // LOAD HIGH ON CONDITION (32) (LOCFH R1,D2(B2),M3)
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}},
+	{LOCFHR, 0xffff000000000000, 0xb9e0000000000000, 0xf0000000000, // LOAD HIGH ON CONDITION (32) (LOCFHR R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{LGFI, 0xff0f000000000000, 0xc001000000000000, 0x0, // LOAD IMMEDIATE (64→32) (LGFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{LXDB, 0xff00000000ff0000, 0xed00000000050000, 0xff000000, // LOAD LENGTHENED (long to extended BFP) (LXDB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LXDBR, 0xffff000000000000, 0xb305000000000000, 0xff0000000000, // LOAD LENGTHENED (long to extended BFP) (LXDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LXDTR, 0xffff000000000000, 0xb3dc000000000000, 0xf00000000000, // LOAD LENGTHENED (long to extended DFP) (LXDTR R1,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_Mask_20_23}},
+	{LXD, 0xff00000000ff0000, 0xed00000000250000, 0xff000000, // LOAD LENGTHENED (long to extended HFP) (LXD R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LXDR, 0xffff000000000000, 0xb325000000000000, 0xff0000000000, // LOAD LENGTHENED (long to extended HFP) (LXDR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LXEB, 0xff00000000ff0000, 0xed00000000060000, 0xff000000, // LOAD LENGTHENED (short to extended BFP) (LXEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LXEBR, 0xffff000000000000, 0xb306000000000000, 0xff0000000000, // LOAD LENGTHENED (short to extended BFP) (LXEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LXE, 0xff00000000ff0000, 0xed00000000260000, 0xff000000, // LOAD LENGTHENED (short to extended HFP) (LXE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LXER, 0xffff000000000000, 0xb326000000000000, 0xff0000000000, // LOAD LENGTHENED (short to extended HFP) (LXER R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LDEB, 0xff00000000ff0000, 0xed00000000040000, 0xff000000, // LOAD LENGTHENED (short to long BFP) (LDEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LDEBR, 0xffff000000000000, 0xb304000000000000, 0xff0000000000, // LOAD LENGTHENED (short to long BFP) (LDEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LDETR, 0xffff000000000000, 0xb3d4000000000000, 0xf00000000000, // LOAD LENGTHENED (short to long DFP) (LDETR R1,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_Mask_20_23}},
+	{LDE, 0xff00000000ff0000, 0xed00000000240000, 0xff000000, // LOAD LENGTHENED (short to long HFP) (LDE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LDER, 0xffff000000000000, 0xb324000000000000, 0xff0000000000, // LOAD LENGTHENED (short to long HFP) (LDER R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LLGF, 0xff00000000ff0000, 0xe300000000160000, 0x0, // LOAD LOGICAL (64←32) (LLGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLGFR, 0xffff000000000000, 0xb916000000000000, 0xff0000000000, // LOAD LOGICAL (64←32) (LLGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LLGFSG, 0xff00000000ff0000, 0xe300000000480000, 0x0, // LOAD LOGICAL AND SHIFT GUARDED (64←32) (LLGFSG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLGFAT, 0xff00000000ff0000, 0xe3000000009d0000, 0x0, // LOAD LOGICAL AND TRAP (64→32) (LLGFAT R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLZRGF, 0xff00000000ff0000, 0xe3000000003a0000, 0x0, // LOAD LOGICAL AND ZERO RIGHTMOST BYTE(64→32) (LLZRGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLC, 0xff00000000ff0000, 0xe300000000940000, 0x0, // LOAD LOGICAL CHARACTER (32→8) (LLC R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLCR, 0xffff000000000000, 0xb994000000000000, 0xff0000000000, // LOAD LOGICAL CHARACTER (32←8) (LLCR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LLGC, 0xff00000000ff0000, 0xe300000000900000, 0x0, // LOAD LOGICAL CHARACTER (64←8) (LLGC R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLGCR, 0xffff000000000000, 0xb984000000000000, 0xff0000000000, // LOAD LOGICAL CHARACTER (64←8) (LLGCR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LLCH, 0xff00000000ff0000, 0xe300000000c20000, 0x0, // LOAD LOGICAL CHARACTER HIGH (32←8) (LLCH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLH, 0xff00000000ff0000, 0xe300000000950000, 0x0, // LOAD LOGICAL HALFWORD (32←16) (LLH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLHR, 0xffff000000000000, 0xb995000000000000, 0xff0000000000, // LOAD LOGICAL HALFWORD (32←16) (LLHR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LLGH, 0xff00000000ff0000, 0xe300000000910000, 0x0, // LOAD LOGICAL HALFWORD (64→16) (LLGH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLGHR, 0xffff000000000000, 0xb985000000000000, 0xff0000000000, // LOAD LOGICAL HALFWORD (64←16) (LLGHR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LLHH, 0xff00000000ff0000, 0xe300000000c60000, 0x0, // LOAD LOGICAL HALFWORD HIGH (32→16) (LLHH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLHRL, 0xff0f000000000000, 0xc402000000000000, 0x0, // LOAD LOGICAL HALFWORD RELATIVE LONG(32←16) (LLHRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{LLGHRL, 0xff0f000000000000, 0xc406000000000000, 0x0, // LOAD LOGICAL HALFWORD RELATIVE LONG(64→16) (LLGHRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{LLIHH, 0xff0f000000000000, 0xa50c000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high high) (LLIHH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{LLIHL, 0xff0f000000000000, 0xa50d000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high low) (LLIHL R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{LLIHF, 0xff0f000000000000, 0xc00e000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high) (LLIHF R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{LLILH, 0xff0f000000000000, 0xa50e000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low high) (LLILH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{LLILL, 0xff0f000000000000, 0xa50f000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low low) (LLILL R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{LLILF, 0xff0f000000000000, 0xc00f000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low) (LLILF R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{LLGFRL, 0xff0f000000000000, 0xc40e000000000000, 0x0, // LOAD LOGICAL RELATIVE LONG (64→32) (LLGFRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{LLGT, 0xff00000000ff0000, 0xe300000000170000, 0x0, // LOAD LOGICAL THIRTY ONE BITS (64→31) (LLGT R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LLGTR, 0xffff000000000000, 0xb917000000000000, 0xff0000000000, // LOAD LOGICAL THIRTY ONE BITS (64→31) (LLGTR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LLGTAT, 0xff00000000ff0000, 0xe3000000009c0000, 0x0, // LOAD LOGICAL THIRTY ONE BITS AND TRAP(64←31) (LLGTAT R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LM, 0xff00000000000000, 0x9800000000000000, 0x0, // LOAD MULTIPLE (32) (LM R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{LMY, 0xff00000000ff0000, 0xeb00000000980000, 0x0, // LOAD MULTIPLE (32) (LMY R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LMG, 0xff00000000ff0000, 0xeb00000000040000, 0x0, // LOAD MULTIPLE (64) (LMG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LMD, 0xff00000000000000, 0xef00000000000000, 0x0, // LOAD MULTIPLE DISJOINT (64→32&32) (LMD R1,R3,D2(B2),D4(B4))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{LMH, 0xff00000000ff0000, 0xeb00000000960000, 0x0, // LOAD MULTIPLE HIGH (32) (LMH R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LNR, 0xff00000000000000, 0x1100000000000000, 0x0, // LOAD NEGATIVE (32) (LNR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{LNGR, 0xffff000000000000, 0xb901000000000000, 0xff0000000000, // LOAD NEGATIVE (64) (LNGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LNGFR, 0xffff000000000000, 0xb911000000000000, 0xff0000000000, // LOAD NEGATIVE (64→32) (LNGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LNXBR, 0xffff000000000000, 0xb341000000000000, 0xff0000000000, // LOAD NEGATIVE (extended BFP) (LNXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LNXR, 0xffff000000000000, 0xb361000000000000, 0xff0000000000, // LOAD NEGATIVE (extended HFP) (LNXR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LNDBR, 0xffff000000000000, 0xb311000000000000, 0xff0000000000, // LOAD NEGATIVE (long BFP) (LNDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LNDR, 0xff00000000000000, 0x2100000000000000, 0x0, // LOAD NEGATIVE (long HFP) (LNDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LNDFR, 0xffff000000000000, 0xb371000000000000, 0xff0000000000, // LOAD NEGATIVE (long) (LNDFR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LNEBR, 0xffff000000000000, 0xb301000000000000, 0xff0000000000, // LOAD NEGATIVE (short BFP) (LNEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LNER, 0xff00000000000000, 0x3100000000000000, 0x0, // LOAD NEGATIVE (short HFP) (LNER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LOC, 0xff00000000ff0000, 0xeb00000000f20000, 0x0, // LOAD ON CONDITION (32) (LOC R1,D2(B2),M3)
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}},
+	{LOCR, 0xffff000000000000, 0xb9f2000000000000, 0xf0000000000, // LOAD ON CONDITION (32) (LOCR R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{LOCG, 0xff00000000ff0000, 0xeb00000000e20000, 0x0, // LOAD ON CONDITION (64) (LOCG R1,D2(B2),M3)
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}},
+	{LOCGR, 0xffff000000000000, 0xb9e2000000000000, 0xf0000000000, // LOAD ON CONDITION (64) (LOCGR R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{LPTEA, 0xffff000000000000, 0xb9aa000000000000, 0x0, // LOAD PAGE TABLE ENTRY ADDRESS (LPTEA R1,R3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{LPD, 0xff0f000000000000, 0xc804000000000000, 0x0, // LOAD PAIR DISJOINT (32) (LPD R3,D1(B1),D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{LPDG, 0xff0f000000000000, 0xc805000000000000, 0x0, // LOAD PAIR DISJOINT (64) (LPDG R3,D1(B1),D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{LPQ, 0xff00000000ff0000, 0xe3000000008f0000, 0x0, // LOAD PAIR FROM QUADWORD (64&64←128) (LPQ R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LPR, 0xff00000000000000, 0x1000000000000000, 0x0, // LOAD POSITIVE (32) (LPR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{LPGR, 0xffff000000000000, 0xb900000000000000, 0xff0000000000, // LOAD POSITIVE (64) (LPGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LPGFR, 0xffff000000000000, 0xb910000000000000, 0xff0000000000, // LOAD POSITIVE (64→32) (LPGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LPXBR, 0xffff000000000000, 0xb340000000000000, 0xff0000000000, // LOAD POSITIVE (extended BFP) (LPXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LPXR, 0xffff000000000000, 0xb360000000000000, 0xff0000000000, // LOAD POSITIVE (extended HFP) (LPXR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LPDBR, 0xffff000000000000, 0xb310000000000000, 0xff0000000000, // LOAD POSITIVE (long BFP) (LPDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LPDR, 0xff00000000000000, 0x2000000000000000, 0x0, // LOAD POSITIVE (long HFP) (LPDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LPDFR, 0xffff000000000000, 0xb370000000000000, 0xff0000000000, // LOAD POSITIVE (long) (LPDFR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LPEBR, 0xffff000000000000, 0xb300000000000000, 0xff0000000000, // LOAD POSITIVE (short BFP) (LPEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LPER, 0xff00000000000000, 0x3000000000000000, 0x0, // LOAD POSITIVE (short HFP) (LPER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LPSW, 0xff00000000000000, 0x8200000000000000, 0x0, // LOAD PSW (LPSW D1(B1))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{LPSWE, 0xffff000000000000, 0xb2b2000000000000, 0x0, // LOAD PSW EXTENDED (LPSWE D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{LPSWEY, 0xff00000000ff0000, 0xeb00000000710000, 0xff000000000000, // LOAD PSW EXTENDED (LPSWEY D1(B1))
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{LRA, 0xff00000000000000, 0xb100000000000000, 0x0, // LOAD REAL ADDRESS (32) (LRA R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LRAY, 0xff00000000ff0000, 0xe300000000130000, 0x0, // LOAD REAL ADDRESS (32) (LRAY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LRAG, 0xff00000000ff0000, 0xe300000000030000, 0x0, // LOAD REAL ADDRESS (64) (LRAG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LRL, 0xff0f000000000000, 0xc40d000000000000, 0x0, // LOAD RELATIVE LONG (32) (LRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{LGRL, 0xff0f000000000000, 0xc408000000000000, 0x0, // LOAD RELATIVE LONG (64) (LGRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{LGFRL, 0xff0f000000000000, 0xc40c000000000000, 0x0, // LOAD RELATIVE LONG (64→32) (LGFRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{LRVH, 0xff00000000ff0000, 0xe3000000001f0000, 0x0, // LOAD REVERSED (16) (LRVH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LRV, 0xff00000000ff0000, 0xe3000000001e0000, 0x0, // LOAD REVERSED (32) (LRV R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LRVR, 0xffff000000000000, 0xb91f000000000000, 0xff0000000000, // LOAD REVERSED (32) (LRVR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LRVG, 0xff00000000ff0000, 0xe3000000000f0000, 0x0, // LOAD REVERSED (64) (LRVG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{LRVGR, 0xffff000000000000, 0xb90f000000000000, 0xff0000000000, // LOAD REVERSED (64) (LRVGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LDXBR, 0xffff000000000000, 0xb345000000000000, 0xff0000000000, // LOAD ROUNDED (extended to long BFP) (LDXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LDXBRA, 0xffff000000000000, 0xb345000000000000, 0x0, // LOAD ROUNDED (extended to long BFP) (LDXBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{LDXTR, 0xffff000000000000, 0xb3dd000000000000, 0x0, // LOAD ROUNDED (extended to long DFP) (LDXTR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{LDXR, 0xff00000000000000, 0x2500000000000000, 0x0, // LOAD ROUNDED (extended to long HFP) (LDXR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LRDR, 0xff00000000000000, 0x2500000000000000, 0x0, // LOAD ROUNDED (extended to long HFP) (LRDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LEXBR, 0xffff000000000000, 0xb346000000000000, 0xff0000000000, // LOAD ROUNDED (extended to short BFP) (LEXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LEXBRA, 0xffff000000000000, 0xb346000000000000, 0x0, // LOAD ROUNDED (extended to short BFP) (LEXBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{LEXR, 0xffff000000000000, 0xb366000000000000, 0xff0000000000, // LOAD ROUNDED (extended to short HFP) (LEXR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LEDBR, 0xffff000000000000, 0xb344000000000000, 0xff0000000000, // LOAD ROUNDED (long to short BFP) (LEDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{LEDBRA, 0xffff000000000000, 0xb344000000000000, 0x0, // LOAD ROUNDED (long to short BFP) (LEDBRA R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{LEDTR, 0xffff000000000000, 0xb3d5000000000000, 0x0, // LOAD ROUNDED (long to short DFP) (LEDTR R1,M3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_Mask_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{LEDR, 0xff00000000000000, 0x3500000000000000, 0x0, // LOAD ROUNDED (long to short HFP) (LEDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LRER, 0xff00000000000000, 0x3500000000000000, 0x0, // LOAD ROUNDED (long to short HFP) (LRER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{LURA, 0xffff000000000000, 0xb24b000000000000, 0xff0000000000, // LOAD USING REAL ADDRESS (32) (LURA R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LURAG, 0xffff000000000000, 0xb905000000000000, 0xff0000000000, // LOAD USING REAL ADDRESS (64) (LURAG R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{LZXR, 0xffff000000000000, 0xb376000000000000, 0xff0f00000000, // LOAD ZERO (extended) (LZXR R1)
+		[8]*argField{ap_FPReg_24_27}},
+	{LZDR, 0xffff000000000000, 0xb375000000000000, 0xff0f00000000, // LOAD ZERO (long) (LZDR R1)
+		[8]*argField{ap_FPReg_24_27}},
+	{LZER, 0xffff000000000000, 0xb374000000000000, 0xff0f00000000, // LOAD ZERO (short) (LZER R1)
+		[8]*argField{ap_FPReg_24_27}},
+	{MSTA, 0xffff000000000000, 0xb247000000000000, 0xff0f00000000, // MODIFY STACKED STATE (MSTA R1)
+		[8]*argField{ap_Reg_24_27}},
+	{MSCH, 0xffff000000000000, 0xb232000000000000, 0x0, // MODIFY SUBCHANNEL (MSCH D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{MC, 0xff00000000000000, 0xaf00000000000000, 0x0, // MONITOR CALL (MC D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{MVHHI, 0xffff000000000000, 0xe544000000000000, 0x0, // MOVE (16←16) (MVHHI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}},
+	{MVHI, 0xffff000000000000, 0xe54c000000000000, 0x0, // MOVE (32→16) (MVHI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}},
+	{MVGHI, 0xffff000000000000, 0xe548000000000000, 0x0, // MOVE (64←16) (MVGHI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned16_32_47}},
+	{MVC, 0xff00000000000000, 0xd200000000000000, 0x0, // MOVE (character) (MVC D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{MVI, 0xff00000000000000, 0x9200000000000000, 0x0, // MOVE (immediate) (MVI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{MVIY, 0xff00000000ff0000, 0xeb00000000520000, 0x0, // MOVE (immediate) (MVIY D1(B1),I2)
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{MVCIN, 0xff00000000000000, 0xe800000000000000, 0x0, // MOVE INVERSE (MVCIN D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{MVCL, 0xff00000000000000, 0xe00000000000000, 0x0, // MOVE LONG (MVCL R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{MVCLE, 0xff00000000000000, 0xa800000000000000, 0x0, // MOVE LONG EXTENDED (MVCLE R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{MVCLU, 0xff00000000ff0000, 0xeb000000008e0000, 0x0, // MOVE LONG UNICODE (MVCLU R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{MVN, 0xff00000000000000, 0xd100000000000000, 0x0, // MOVE NUMERICS (MVN D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{MVPG, 0xffff000000000000, 0xb254000000000000, 0xff0000000000, // MOVE PAGE (MVPG R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{MVCRL, 0xffff000000000000, 0xe50a000000000000, 0x0, // MOVE RIGHT TO LEFT (MVCRL D1(B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{MVST, 0xffff000000000000, 0xb255000000000000, 0xff0000000000, // MOVE STRING (MVST R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{MVCP, 0xff00000000000000, 0xda00000000000000, 0x0, // MOVE TO PRIMARY (MVCP D1(R1,B1),D2(B2),R3)
+		[8]*argField{ap_DispUnsigned_20_31, ap_Reg_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_12_15}},
+	{MVCS, 0xff00000000000000, 0xdb00000000000000, 0x0, // MOVE TO SECONDARY (MVCS D1(R1,B1),D2(B2),R3)
+		[8]*argField{ap_DispUnsigned_20_31, ap_Reg_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_12_15}},
+	{MVCDK, 0xffff000000000000, 0xe50f000000000000, 0x0, // MOVE WITH DESTINATION KEY (MVCDK D1(B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{MVCK, 0xff00000000000000, 0xd900000000000000, 0x0, // MOVE WITH KEY (MVCK D1(R1,B1),D2(B2),R3)
+		[8]*argField{ap_DispUnsigned_20_31, ap_Reg_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_12_15}},
+	{MVO, 0xff00000000000000, 0xf100000000000000, 0x0, // MOVE WITH OFFSET (MVO D1(L1,B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}},
+	{MVCOS, 0xff0f000000000000, 0xc800000000000000, 0x0, // MOVE WITH OPTIONAL SPECIFICATIONS (MVCOS D1(B1),D2(B2),R3)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_Reg_8_11}},
+	{MVCSK, 0xffff000000000000, 0xe50e000000000000, 0x0, // MOVE WITH SOURCE KEY (MVCSK D1(B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{MVZ, 0xff00000000000000, 0xd300000000000000, 0x0, // MOVE ZONES (MVZ D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{MG, 0xff00000000ff0000, 0xe300000000840000, 0x0, // MULTIPLY (128←64) (MG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MGRK, 0xffff000000000000, 0xb9ec000000000000, 0xf0000000000, // MULTIPLY (128←64) (MGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{M, 0xff00000000000000, 0x5c00000000000000, 0x0, // MULTIPLY (64←32) (M R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MFY, 0xff00000000ff0000, 0xe3000000005c0000, 0x0, // MULTIPLY (64←32) (MFY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MR, 0xff00000000000000, 0x1c00000000000000, 0x0, // MULTIPLY (64←32) (MR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{MXBR, 0xffff000000000000, 0xb34c000000000000, 0xff0000000000, // MULTIPLY (extended BFP) (MXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MXTR, 0xffff000000000000, 0xb3d8000000000000, 0xf0000000000, // MULTIPLY (extended DFP) (MXTR R1,R2,R3)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}},
+	{MXTRA, 0xffff000000000000, 0xb3d8000000000000, 0x0, // MULTIPLY (extended DFP) (MXTRA R1,R2,R3,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}},
+	{MXR, 0xff00000000000000, 0x2600000000000000, 0x0, // MULTIPLY (extended HFP) (MXR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{MDB, 0xff00000000ff0000, 0xed000000001c0000, 0xff000000, // MULTIPLY (long BFP) (MDB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MDBR, 0xffff000000000000, 0xb31c000000000000, 0xff0000000000, // MULTIPLY (long BFP) (MDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MDTR, 0xffff000000000000, 0xb3d0000000000000, 0xf0000000000, // MULTIPLY (long DFP) (MDTR R1,R2,R3)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}},
+	{MDTRA, 0xffff000000000000, 0xb3d0000000000000, 0x0, // MULTIPLY (long DFP) (MDTRA R1,R2,R3,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}},
+	{MD, 0xff00000000000000, 0x6c00000000000000, 0x0, // MULTIPLY (long HFP) (MD R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MDR, 0xff00000000000000, 0x2c00000000000000, 0x0, // MULTIPLY (long HFP) (MDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{MXDB, 0xff00000000ff0000, 0xed00000000070000, 0xff000000, // MULTIPLY (long to extended BFP) (MXDB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MXDBR, 0xffff000000000000, 0xb307000000000000, 0xff0000000000, // MULTIPLY (long to extended BFP) (MXDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MXD, 0xff00000000000000, 0x6700000000000000, 0x0, // MULTIPLY (long to extended HFP) (MXD R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MXDR, 0xff00000000000000, 0x2700000000000000, 0x0, // MULTIPLY (long to extended HFP) (MXDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{MEEB, 0xff00000000ff0000, 0xed00000000170000, 0xff000000, // MULTIPLY (short BFP) (MEEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MEEBR, 0xffff000000000000, 0xb317000000000000, 0xff0000000000, // MULTIPLY (short BFP) (MEEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MEE, 0xff00000000ff0000, 0xed00000000370000, 0xff000000, // MULTIPLY (short HFP) (MEE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MEER, 0xffff000000000000, 0xb337000000000000, 0xff0000000000, // MULTIPLY (short HFP) (MEER R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MDEB, 0xff00000000ff0000, 0xed000000000c0000, 0xff000000, // MULTIPLY (short to long BFP) (MDEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MDEBR, 0xffff000000000000, 0xb30c000000000000, 0xff0000000000, // MULTIPLY (short to long BFP) (MDEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MDE, 0xff00000000000000, 0x7c00000000000000, 0x0, // MULTIPLY (short to long HFP) (MDE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MDER, 0xff00000000000000, 0x3c00000000000000, 0x0, // MULTIPLY (short to long HFP) (MDER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{ME, 0xff00000000000000, 0x7c00000000000000, 0x0, // MULTIPLY (short to long HFP) (ME R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MER, 0xff00000000000000, 0x3c00000000000000, 0x0, // MULTIPLY (short to long HFP) (MER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{MAY, 0xff00000000ff0000, 0xed000000003a0000, 0xf000000, // MULTIPLY & ADD UNNORMALIZED (long to ext. HFP) (MAY R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MAYR, 0xffff000000000000, 0xb33a000000000000, 0xf0000000000, // MULTIPLY & ADD UNNORMALIZED (long to ext. HFP) (MAYR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MADB, 0xff00000000ff0000, 0xed000000001e0000, 0xf000000, // MULTIPLY AND ADD (long BFP) (MADB R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MADBR, 0xffff000000000000, 0xb31e000000000000, 0xf0000000000, // MULTIPLY AND ADD (long BFP) (MADBR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MAD, 0xff00000000ff0000, 0xed000000003e0000, 0xf000000, // MULTIPLY AND ADD (long HFP) (MAD R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MADR, 0xffff000000000000, 0xb33e000000000000, 0xf0000000000, // MULTIPLY AND ADD (long HFP) (MADR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MAEB, 0xff00000000ff0000, 0xed000000000e0000, 0xf000000, // MULTIPLY AND ADD (short BFP) (MAEB R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MAEBR, 0xffff000000000000, 0xb30e000000000000, 0xf0000000000, // MULTIPLY AND ADD (short BFP) (MAEBR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MAE, 0xff00000000ff0000, 0xed000000002e0000, 0xf000000, // MULTIPLY AND ADD (short HFP) (MAE R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MAER, 0xffff000000000000, 0xb32e000000000000, 0xf0000000000, // MULTIPLY AND ADD (short HFP) (MAER R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MAYH, 0xff00000000ff0000, 0xed000000003c0000, 0xf000000, // MULTIPLY AND ADD UNNRM. (long to ext. high HFP) (MAYH R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MAYHR, 0xffff000000000000, 0xb33c000000000000, 0xf0000000000, // MULTIPLY AND ADD UNNRM. (long to ext. high HFP) (MAYHR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MAYL, 0xff00000000ff0000, 0xed00000000380000, 0xf000000, // MULTIPLY AND ADD UNNRM. (long to ext. low HFP) (MAYL R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MAYLR, 0xffff000000000000, 0xb338000000000000, 0xf0000000000, // MULTIPLY AND ADD UNNRM. (long to ext. low HFP) (MAYLR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MSDB, 0xff00000000ff0000, 0xed000000001f0000, 0xf000000, // MULTIPLY AND SUBTRACT (long BFP) (MSDB R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MSDBR, 0xffff000000000000, 0xb31f000000000000, 0xf0000000000, // MULTIPLY AND SUBTRACT (long BFP) (MSDBR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MSD, 0xff00000000ff0000, 0xed000000003f0000, 0xf000000, // MULTIPLY AND SUBTRACT (long HFP) (MSD R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MSDR, 0xffff000000000000, 0xb33f000000000000, 0xf0000000000, // MULTIPLY AND SUBTRACT (long HFP) (MSDR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MSEB, 0xff00000000ff0000, 0xed000000000f0000, 0xf000000, // MULTIPLY AND SUBTRACT (short BFP) (MSEB R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MSEBR, 0xffff000000000000, 0xb30f000000000000, 0xf0000000000, // MULTIPLY AND SUBTRACT (short BFP) (MSEBR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MSE, 0xff00000000ff0000, 0xed000000002f0000, 0xf000000, // MULTIPLY AND SUBTRACT (short HFP) (MSE R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MSER, 0xffff000000000000, 0xb32f000000000000, 0xf0000000000, // MULTIPLY AND SUBTRACT (short HFP) (MSER R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MP, 0xff00000000000000, 0xfc00000000000000, 0x0, // MULTIPLY DECIMAL (MP D1(L1,B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}},
+	{MH, 0xff00000000000000, 0x4c00000000000000, 0x0, // MULTIPLY HALFWORD (32←16) (MH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MHY, 0xff00000000ff0000, 0xe3000000007c0000, 0x0, // MULTIPLY HALFWORD (32←16) (MHY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MGH, 0xff00000000ff0000, 0xe3000000003c0000, 0x0, // MULTIPLY HALFWORD (64→16) (MGH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MHI, 0xff0f000000000000, 0xa70c000000000000, 0x0, // MULTIPLY HALFWORD IMMEDIATE (32→16) (MHI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{MGHI, 0xff0f000000000000, 0xa70d000000000000, 0x0, // MULTIPLY HALFWORD IMMEDIATE (64→16) (MGHI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{MLG, 0xff00000000ff0000, 0xe300000000860000, 0x0, // MULTIPLY LOGICAL (128→64) (MLG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MLGR, 0xffff000000000000, 0xb986000000000000, 0xff0000000000, // MULTIPLY LOGICAL (128→64) (MLGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{ML, 0xff00000000ff0000, 0xe300000000960000, 0x0, // MULTIPLY LOGICAL (64←32) (ML R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MLR, 0xffff000000000000, 0xb996000000000000, 0xff0000000000, // MULTIPLY LOGICAL (64←32) (MLR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{MS, 0xff00000000000000, 0x7100000000000000, 0x0, // MULTIPLY SINGLE (32) (MS R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MSC, 0xff00000000ff0000, 0xe300000000530000, 0x0, // MULTIPLY SINGLE (32) (MSC R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MSR, 0xffff000000000000, 0xb252000000000000, 0xff0000000000, // MULTIPLY SINGLE (32) (MSR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{MSRKC, 0xffff000000000000, 0xb9fd000000000000, 0xf0000000000, // MULTIPLY SINGLE (32) (MSRKC R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{MSY, 0xff00000000ff0000, 0xe300000000510000, 0x0, // MULTIPLY SINGLE (32) (MSY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MSG, 0xff00000000ff0000, 0xe3000000000c0000, 0x0, // MULTIPLY SINGLE (64) (MSG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MSGC, 0xff00000000ff0000, 0xe300000000830000, 0x0, // MULTIPLY SINGLE (64) (MSGC R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MSGR, 0xffff000000000000, 0xb90c000000000000, 0xff0000000000, // MULTIPLY SINGLE (64) (MSGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{MSGRKC, 0xffff000000000000, 0xb9ed000000000000, 0xf0000000000, // MULTIPLY SINGLE (64) (MSGRKC R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{MSGF, 0xff00000000ff0000, 0xe3000000001c0000, 0x0, // MULTIPLY SINGLE (64←32) (MSGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MSGFR, 0xffff000000000000, 0xb91c000000000000, 0xff0000000000, // MULTIPLY SINGLE (64←32) (MSGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{MSFI, 0xff0f000000000000, 0xc201000000000000, 0x0, // MULTIPLY SINGLE IMMEDIATE (32) (MSFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{MSGFI, 0xff0f000000000000, 0xc200000000000000, 0x0, // MULTIPLY SINGLE IMMEDIATE (64←32) (MSGFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{MYH, 0xff00000000ff0000, 0xed000000003d0000, 0xf000000, // MULTIPLY UNNORM. (long to ext. high HFP) (MYH R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MYHR, 0xffff000000000000, 0xb33d000000000000, 0xf0000000000, // MULTIPLY UNNORM. (long to ext. high HFP) (MYHR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MYL, 0xff00000000ff0000, 0xed00000000390000, 0xf000000, // MULTIPLY UNNORM. (long to ext. low HFP) (MYL R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MYLR, 0xffff000000000000, 0xb339000000000000, 0xf0000000000, // MULTIPLY UNNORM. (long to ext. low HFP) (MYLR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{MY, 0xff00000000ff0000, 0xed000000003b0000, 0xf000000, // MULTIPLY UNNORMALIZED (long to ext. HFP) (MY R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{MYR, 0xffff000000000000, 0xb33b000000000000, 0xf0000000000, // MULTIPLY UNNORMALIZED (long to ext. HFP) (MYR R1,R3,R2)
+		[8]*argField{ap_FPReg_16_19, ap_FPReg_24_27, ap_FPReg_28_31}},
+	{NNRK, 0xffff000000000000, 0xb974000000000000, 0xf0000000000, // NAND (32) (NNRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{NNGRK, 0xffff000000000000, 0xb964000000000000, 0xf0000000000, // NAND (64) (NNGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{NNPA, 0xffff000000000000, 0xb93b000000000000, 0xffff00000000, // NEURAL NETWORK PROCESSING ASSIST (NNPA)
+		[8]*argField{}},
+	{NIAI, 0xffff000000000000, 0xb2fa000000000000, 0xff0000000000, // NEXT INSTRUCTION ACCESS INTENT (NIAI I1,I2)
+		[8]*argField{ap_ImmUnsigned_24_27, ap_ImmUnsigned_28_31}},
+	{NTSTG, 0xff00000000ff0000, 0xe300000000250000, 0x0, // NONTRANSACTIONAL STORE (64) (NTSTG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{NORK, 0xffff000000000000, 0xb976000000000000, 0xf0000000000, // NOR (32) (NORK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{NOGRK, 0xffff000000000000, 0xb966000000000000, 0xf0000000000, // NOR (64) (NOGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{NXRK, 0xffff000000000000, 0xb977000000000000, 0xf0000000000, // NOT EXCLUSIVE OR (32) (NXRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{NXGRK, 0xffff000000000000, 0xb967000000000000, 0xf0000000000, // NOT EXCLUSIVE OR (64) (NXGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{O, 0xff00000000000000, 0x5600000000000000, 0x0, // OR (32) (O R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{OR, 0xff00000000000000, 0x1600000000000000, 0x0, // OR (32) (OR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{ORK, 0xffff000000000000, 0xb9f6000000000000, 0xf0000000000, // OR (32) (ORK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{OY, 0xff00000000ff0000, 0xe300000000560000, 0x0, // OR (32) (OY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{OG, 0xff00000000ff0000, 0xe300000000810000, 0x0, // OR (64) (OG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{OGR, 0xffff000000000000, 0xb981000000000000, 0xff0000000000, // OR (64) (OGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{OGRK, 0xffff000000000000, 0xb9e6000000000000, 0xf0000000000, // OR (64) (OGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{OC, 0xff00000000000000, 0xd600000000000000, 0x0, // OR (character) (OC D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{OI, 0xff00000000000000, 0x9600000000000000, 0x0, // OR (immediate) (OI D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{OIY, 0xff00000000ff0000, 0xeb00000000560000, 0x0, // OR (immediate) (OIY D1(B1),I2)
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{OIHH, 0xff0f000000000000, 0xa508000000000000, 0x0, // OR IMMEDIATE (high high) (OIHH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{OIHL, 0xff0f000000000000, 0xa509000000000000, 0x0, // OR IMMEDIATE (high low) (OIHL R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{OIHF, 0xff0f000000000000, 0xc00c000000000000, 0x0, // OR IMMEDIATE (high) (OIHF R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{OILH, 0xff0f000000000000, 0xa50a000000000000, 0x0, // OR IMMEDIATE (low high) (OILH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{OILL, 0xff0f000000000000, 0xa50b000000000000, 0x0, // OR IMMEDIATE (low low) (OILL R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{OILF, 0xff0f000000000000, 0xc00d000000000000, 0x0, // OR IMMEDIATE (low) (OILF R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{OCRK, 0xffff000000000000, 0xb975000000000000, 0xf0000000000, // OR WITH COMPLEMENT (32) (OCRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{OCGRK, 0xffff000000000000, 0xb965000000000000, 0xf0000000000, // OR WITH COMPLEMENT (64) (OCGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{PACK, 0xff00000000000000, 0xf200000000000000, 0x0, // PACK (PACK D1(L1,B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}},
+	{PKA, 0xff00000000000000, 0xe900000000000000, 0x0, // PACK ASCII (PKA D1(B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_8_15, ap_BaseReg_32_35}},
+	{PKU, 0xff00000000000000, 0xe100000000000000, 0x0, // PACK UNICODE (PKU D1(B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_8_15, ap_BaseReg_32_35}},
+	{PGIN, 0xffff000000000000, 0xb22e000000000000, 0xff0000000000, // PAGE IN (PGIN R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{PGOUT, 0xffff000000000000, 0xb22f000000000000, 0xff0000000000, // PAGE OUT (PGOUT R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{PCC, 0xffff000000000000, 0xb92c000000000000, 0xffff00000000, // PERFORM CRYPTOGRAPHIC COMPUTATION (PCC)
+		[8]*argField{}},
+	{PCKMO, 0xffff000000000000, 0xb928000000000000, 0xffff00000000, // PERFORM CRYPTOGRAPHIC KEY MGMT. OPERATIONS (PCKMO)
+		[8]*argField{}},
+	{PFPO, 0xffff000000000000, 0x10a000000000000, 0x0, // PERFORM FLOATING-POINT OPERATION (PFPO)
+		[8]*argField{}},
+	{PFMF, 0xffff000000000000, 0xb9af000000000000, 0xff0000000000, // PERFORM FRAME MANAGEMENT FUNCTION (PFMF R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{PLO, 0xff00000000000000, 0xee00000000000000, 0x0, // PERFORM LOCKED OPERATION (PLO R1,D2(B2),R3,D4(B4))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Reg_12_15, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{PPA, 0xffff000000000000, 0xb2e8000000000000, 0xf0000000000, // PERFORM PROCESSOR ASSIST (PPA R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{PRNO, 0xffff000000000000, 0xb93c000000000000, 0xff0000000000, // PERFORM RANDOM NUMBER OPERATION (PRNO R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{PTFF, 0xffff000000000000, 0x104000000000000, 0x0, // PERFORM TIMING FACILITY FUNCTION (PTFF)
+		[8]*argField{}},
+	{PTF, 0xffff000000000000, 0xb9a2000000000000, 0xff0f00000000, // PERFORM TOPOLOGY FUNCTION (PTF R1)
+		[8]*argField{ap_Reg_24_27}},
+	{POPCNT, 0xffff000000000000, 0xb9e1000000000000, 0xf0000000000, // POPULATION COUNT (POPCNT R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{PFD, 0xff00000000ff0000, 0xe300000000360000, 0x0, // PREFETCH DATA (PFD M1,D2(X2,B2))
+		[8]*argField{ap_Mask_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{PFDRL, 0xff0f000000000000, 0xc602000000000000, 0x0, // PREFETCH DATA RELATIVE LONG (PFDRL M1,RI2)
+		[8]*argField{ap_Mask_8_11, ap_RegImSigned32_16_47}},
+	{PC, 0xffff000000000000, 0xb218000000000000, 0x0, // PROGRAM CALL (PC D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{PR, 0xffff000000000000, 0x101000000000000, 0x0, // PROGRAM RETURN (PR)
+		[8]*argField{}},
+	{PT, 0xffff000000000000, 0xb228000000000000, 0xff0000000000, // PROGRAM TRANSFER (PT R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{PTI, 0xffff000000000000, 0xb99e000000000000, 0xff0000000000, // PROGRAM TRANSFER WITH INSTANCE (PTI R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{PALB, 0xffff000000000000, 0xb248000000000000, 0xffff00000000, // PURGE ALB (PALB)
+		[8]*argField{}},
+	{PTLB, 0xffff000000000000, 0xb20d000000000000, 0xffff00000000, // PURGE TLB (PTLB)
+		[8]*argField{}},
+	{QAXTR, 0xffff000000000000, 0xb3fd000000000000, 0x0, // QUANTIZE (extended DFP) (QAXTR R1,R3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{QADTR, 0xffff000000000000, 0xb3f5000000000000, 0x0, // QUANTIZE (long DFP) (QADTR R1,R3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_FPReg_28_31, ap_Mask_20_23}},
+	{QPACI, 0xffff000000000000, 0xb28f000000000000, 0x0, // QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{RRXTR, 0xffff000000000000, 0xb3ff000000000000, 0x0, // REROUND (extended DFP) (RRXTR R1,R3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{RRDTR, 0xffff000000000000, 0xb3f7000000000000, 0x0, // REROUND (long DFP) (RRDTR R1,R3,R2,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{RCHP, 0xffff000000000000, 0xb23b000000000000, 0xffff00000000, // RESET CHANNEL PATH (RCHP)
+		[8]*argField{}},
+	{RDP, 0xffff000000000000, 0xb98b000000000000, 0x0, // RESET DAT PROTECTION (RDP R1,R3,R2,M4)
+		[8]*argField{ap_Reg_24_27, ap_Reg_16_19, ap_Reg_28_31, ap_Mask_20_23}},
+	{RRBE, 0xffff000000000000, 0xb22a000000000000, 0xff0000000000, // RESET REFERENCE BIT EXTENDED (RRBE R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{RRBM, 0xffff000000000000, 0xb9ae000000000000, 0xff0000000000, // RESET REFERENCE BITS MULTIPLE (RRBM R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{RP, 0xffff000000000000, 0xb277000000000000, 0x0, // RESUME PROGRAM (RP D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{RSCH, 0xffff000000000000, 0xb238000000000000, 0xffff00000000, // RESUME SUBCHANNEL (RSCH)
+		[8]*argField{}},
+	{RLL, 0xff00000000ff0000, 0xeb000000001d0000, 0x0, // ROTATE LEFT SINGLE LOGICAL (32) (RLL R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{RLLG, 0xff00000000ff0000, 0xeb000000001c0000, 0x0, // ROTATE LEFT SINGLE LOGICAL (64) (RLLG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{RNSBG, 0xff00000000ff0000, 0xec00000000540000, 0x0, // ROTATE THEN AND SELECTED BITS (64) (RNSBG R1,R2,I3,I4,I5)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}},
+	{RXSBG, 0xff00000000ff0000, 0xec00000000570000, 0x0, // ROTATETHENEXCLUSIVEORSELECT.BITS(64) (RXSBG R1,R2,I3,I4,I5)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}},
+	{RISBG, 0xff00000000ff0000, 0xec00000000550000, 0x0, // ROTATE THEN INSERT SELECTED BITS (64) (RISBG R1,R2,I3,I4,I5)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}},
+	{RISBGN, 0xff00000000ff0000, 0xec00000000590000, 0x0, // ROTATE THEN INSERT SELECTED BITS (64) (RISBGN R1,R2,I3,I4,I5)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}},
+	{RISBHG, 0xff00000000ff0000, 0xec000000005d0000, 0x0, // ROTATE THEN INSERT SELECTED BITS HIGH(64) (RISBHG R1,R2,I3,I4,I5)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}},
+	{RISBLG, 0xff00000000ff0000, 0xec00000000510000, 0x0, // ROTATE THEN INSERT SELECTED BITS LOW (64) (RISBLG R1,R2,I3,I4,I5)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}},
+	{ROSBG, 0xff00000000ff0000, 0xec00000000560000, 0x0, // ROTATE THEN OR SELECTED BITS (64) (ROSBG R1,R2,I3,I4,I5)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_ImmUnsigned_32_39}},
+	{SRST, 0xffff000000000000, 0xb25e000000000000, 0xff0000000000, // SEARCH STRING (SRST R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{SRSTU, 0xffff000000000000, 0xb9be000000000000, 0xff0000000000, // SEARCH STRING UNICODE (SRSTU R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{SELR, 0xffff000000000000, 0xb9f0000000000000, 0x0, // SELECT (32) (SELR R1,R2,R3,M4)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19, ap_Mask_20_23}},
+	{SELGR, 0xffff000000000000, 0xb9e3000000000000, 0x0, // SELECT (64) (SELGR R1,R2,R3,M4)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19, ap_Mask_20_23}},
+	{SELFHR, 0xffff000000000000, 0xb9c0000000000000, 0x0, // SELECT HIGH (32) (SELFHR R1,R2,R3,M4)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19, ap_Mask_20_23}},
+	{SAR, 0xffff000000000000, 0xb24e000000000000, 0xff0000000000, // SET ACCESS (SAR R1,R2)
+		[8]*argField{ap_ACReg_24_27, ap_Reg_28_31}},
+	{SAL, 0xffff000000000000, 0xb237000000000000, 0xffff00000000, // SET ADDRESS LIMIT (SAL)
+		[8]*argField{}},
+	{SAC, 0xffff000000000000, 0xb219000000000000, 0x0, // SET ADDRESS SPACE CONTROL (SAC D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SACF, 0xffff000000000000, 0xb279000000000000, 0x0, // SET ADDRESS SPACE CONTROL FAST (SACF D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SAM24, 0xffff000000000000, 0x10c000000000000, 0x0, // SET ADDRESSING MODE (24) (SAM24)
+		[8]*argField{}},
+	{SAM31, 0xffff000000000000, 0x10d000000000000, 0x0, // SET ADDRESSING MODE (31) (SAM31)
+		[8]*argField{}},
+	{SAM64, 0xffff000000000000, 0x10e000000000000, 0x0, // SET ADDRESSING MODE (64) (SAM64)
+		[8]*argField{}},
+	{SRNM, 0xffff000000000000, 0xb299000000000000, 0x0, // SET BFP ROUNDING MODE (2 bit) (SRNM D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SRNMB, 0xffff000000000000, 0xb2b8000000000000, 0x0, // SET BFP ROUNDING MODE (3 bit) (SRNMB D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SCHM, 0xffff000000000000, 0xb23c000000000000, 0xffff00000000, // SET CHANNEL MONITOR (SCHM)
+		[8]*argField{}},
+	{SCK, 0xffff000000000000, 0xb204000000000000, 0x0, // SET CLOCK (SCK D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SCKC, 0xffff000000000000, 0xb206000000000000, 0x0, // SET CLOCK COMPARATOR (SCKC D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SCKPF, 0xffff000000000000, 0x107000000000000, 0x0, // SET CLOCK PROGRAMMABLE FIELD (SCKPF)
+		[8]*argField{}},
+	{SPT, 0xffff000000000000, 0xb208000000000000, 0x0, // SET CPU TIMER (SPT D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SRNMT, 0xffff000000000000, 0xb2b9000000000000, 0x0, // SET DFP ROUNDING MODE (SRNMT D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SFPC, 0xffff000000000000, 0xb384000000000000, 0xff0f00000000, // SET FPC (SFPC R1)
+		[8]*argField{ap_Reg_24_27}},
+	{SFASR, 0xffff000000000000, 0xb385000000000000, 0xff0f00000000, // SET FPC AND SIGNAL (SFASR R1)
+		[8]*argField{ap_Reg_24_27}},
+	{SPX, 0xffff000000000000, 0xb210000000000000, 0x0, // SET PREFIX (SPX D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SPM, 0xff00000000000000, 0x400000000000000, 0xf000000000000, // SET PROGRAM MASK (SPM R1)
+		[8]*argField{ap_Reg_8_11}},
+	{SPKA, 0xffff000000000000, 0xb20a000000000000, 0x0, // SET PSW KEY FROM ADDRESS (SPKA D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SSAR, 0xffff000000000000, 0xb225000000000000, 0xff0f00000000, // SET SECONDARY ASN (SSAR R1)
+		[8]*argField{ap_Reg_24_27}},
+	{SSAIR, 0xffff000000000000, 0xb99f000000000000, 0xff0f00000000, // SET SECONDARY ASN WITH INSTANCE (SSAIR R1)
+		[8]*argField{ap_Reg_24_27}},
+	{SSKE, 0xffff000000000000, 0xb22b000000000000, 0xf0000000000, // SET STORAGE KEY EXTENDED (SSKE R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{SSM, 0xff00000000000000, 0x8000000000000000, 0x0, // SET SYSTEM MASK (SSM D1(B1))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SRP, 0xff00000000000000, 0xf000000000000000, 0x0, // SHIFT AND ROUND DECIMAL (SRP D1(L1,B1),D2(B2),I3)
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35, ap_ImmUnsigned_12_15}},
+	{SLDA, 0xff00000000000000, 0x8f00000000000000, 0xf000000000000, // SHIFT LEFT DOUBLE (64) (SLDA R1,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SLDL, 0xff00000000000000, 0x8d00000000000000, 0xf000000000000, // SHIFT LEFT DOUBLE LOGICAL (64) (SLDL R1,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SLA, 0xff00000000000000, 0x8b00000000000000, 0xf000000000000, // SHIFT LEFT SINGLE (32) (SLA R1,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SLAK, 0xff00000000ff0000, 0xeb00000000dd0000, 0x0, // SHIFT LEFT SINGLE (32) (SLAK R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{SLAG, 0xff00000000ff0000, 0xeb000000000b0000, 0x0, // SHIFT LEFT SINGLE (64) (SLAG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{SLL, 0xff00000000000000, 0x8900000000000000, 0xf000000000000, // SHIFT LEFT SINGLE LOGICAL (32) (SLL R1,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SLLK, 0xff00000000ff0000, 0xeb00000000df0000, 0x0, // SHIFT LEFT SINGLE LOGICAL (32) (SLLK R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{SLLG, 0xff00000000ff0000, 0xeb000000000d0000, 0x0, // SHIFT LEFT SINGLE LOGICAL (64) (SLLG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{SRDA, 0xff00000000000000, 0x8e00000000000000, 0xf000000000000, // SHIFT RIGHT DOUBLE (64) (SRDA R1,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SRDL, 0xff00000000000000, 0x8c00000000000000, 0xf000000000000, // SHIFT RIGHT DOUBLE LOGICAL (64) (SRDL R1,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SRA, 0xff00000000000000, 0x8a00000000000000, 0xf000000000000, // SHIFT RIGHT SINGLE (32) (SRA R1,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SRAK, 0xff00000000ff0000, 0xeb00000000dc0000, 0x0, // SHIFT RIGHT SINGLE (32) (SRAK R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{SRAG, 0xff00000000ff0000, 0xeb000000000a0000, 0x0, // SHIFT RIGHT SINGLE (64) (SRAG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{SRL, 0xff00000000000000, 0x8800000000000000, 0xf000000000000, // SHIFT RIGHT SINGLE LOGICAL (32) (SRL R1,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SRLK, 0xff00000000ff0000, 0xeb00000000de0000, 0x0, // SHIFT RIGHT SINGLE LOGICAL (32) (SRLK R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{SRLG, 0xff00000000ff0000, 0xeb000000000c0000, 0x0, // SHIFT RIGHT SINGLE LOGICAL (64) (SRLG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{SLXT, 0xff00000000ff0000, 0xed00000000480000, 0xf000000, // SHIFT SIGNIFICAND LEFT (extended DFP) (SLXT R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SLDT, 0xff00000000ff0000, 0xed00000000400000, 0xf000000, // SHIFT SIGNIFICAND LEFT (long DFP) (SLDT R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SRXT, 0xff00000000ff0000, 0xed00000000490000, 0xf000000, // SHIFT SIGNIFICAND RIGHT (extended DFP) (SRXT R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SRDT, 0xff00000000ff0000, 0xed00000000410000, 0xf000000, // SHIFT SIGNIFICAND RIGHT (long DFP) (SRDT R1,R3,D2(X2,B2))
+		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SIGP, 0xff00000000000000, 0xae00000000000000, 0x0, // SIGNAL PROCESSOR (SIGP R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{SORTL, 0xffff000000000000, 0xb938000000000000, 0xff0000000000, // SORT LISTS (SORTL R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{SQXBR, 0xffff000000000000, 0xb316000000000000, 0xff0000000000, // SQUARE ROOT (extended BFP) (SQXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{SQXR, 0xffff000000000000, 0xb336000000000000, 0xff0000000000, // SQUARE ROOT (extended HFP) (SQXR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{SQDB, 0xff00000000ff0000, 0xed00000000150000, 0xff000000, // SQUARE ROOT (long BFP) (SQDB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SQDBR, 0xffff000000000000, 0xb315000000000000, 0xff0000000000, // SQUARE ROOT (long BFP) (SQDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{SQD, 0xff00000000ff0000, 0xed00000000350000, 0xff000000, // SQUARE ROOT (long HFP) (SQD R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SQDR, 0xffff000000000000, 0xb244000000000000, 0xff0000000000, // SQUARE ROOT (long HFP) (SQDR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{SQEB, 0xff00000000ff0000, 0xed00000000140000, 0xff000000, // SQUARE ROOT (short BFP) (SQEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SQEBR, 0xffff000000000000, 0xb314000000000000, 0xff0000000000, // SQUARE ROOT (short BFP) (SQEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{SQE, 0xff00000000ff0000, 0xed00000000340000, 0xff000000, // SQUARE ROOT (short HFP) (SQE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SQER, 0xffff000000000000, 0xb245000000000000, 0xff0000000000, // SQUARE ROOT (short HFP) (SQER R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{SSCH, 0xffff000000000000, 0xb233000000000000, 0x0, // START SUBCHANNEL (SSCH D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{ST, 0xff00000000000000, 0x5000000000000000, 0x0, // STORE (32) (ST R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STY, 0xff00000000ff0000, 0xe300000000500000, 0x0, // STORE (32) (STY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STG, 0xff00000000ff0000, 0xe300000000240000, 0x0, // STORE (64) (STG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STD, 0xff00000000000000, 0x6000000000000000, 0x0, // STORE (long) (STD R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STDY, 0xff00000000ff0000, 0xed00000000670000, 0x0, // STORE (long) (STDY R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STE, 0xff00000000000000, 0x7000000000000000, 0x0, // STORE (short) (STE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STEY, 0xff00000000ff0000, 0xed00000000660000, 0x0, // STORE (short) (STEY R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STAM, 0xff00000000000000, 0x9b00000000000000, 0x0, // STORE ACCESS MULTIPLE 7-389 (STAM R1,R3,D2(B2))
+		[8]*argField{ap_ACReg_8_11, ap_ACReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STAMY, 0xff00000000ff0000, 0xeb000000009b0000, 0x0, // STORE ACCESS MULTIPLE 7-389 (STAMY R1,R3,D2(B2))
+		[8]*argField{ap_ACReg_8_11, ap_ACReg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{STBEAR, 0xffff000000000000, 0xb201000000000000, 0x0, // STORE BEAR (STBEAR D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STCPS, 0xffff000000000000, 0xb23a000000000000, 0x0, // STORE CHANNEL PATH STATUS (STCPS D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STCRW, 0xffff000000000000, 0xb239000000000000, 0x0, // STORE CHANNEL REPORT WORD (STCRW D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STC, 0xff00000000000000, 0x4200000000000000, 0x0, // STORE CHARACTER (STC R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STCY, 0xff00000000ff0000, 0xe300000000720000, 0x0, // STORE CHARACTER (STCY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STCH, 0xff00000000ff0000, 0xe300000000c30000, 0x0, // STORE CHARACTER HIGH (8) (STCH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STCMH, 0xff00000000ff0000, 0xeb000000002c0000, 0x0, // STORE CHARACTERS UNDER MASK (high) (STCMH R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{STCM, 0xff00000000000000, 0xbe00000000000000, 0x0, // STORE CHARACTERS UNDER MASK (low) (STCM R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STCMY, 0xff00000000ff0000, 0xeb000000002d0000, 0x0, // STORE CHARACTERS UNDER MASK (low) (STCMY R1,M3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Mask_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{STCK, 0xffff000000000000, 0xb205000000000000, 0x0, // STORE CLOCK (STCK D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STCKC, 0xffff000000000000, 0xb207000000000000, 0x0, // STORE CLOCK COMPARATOR (STCKC D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STCKE, 0xffff000000000000, 0xb278000000000000, 0x0, // STORE CLOCK EXTENDED (STCKE D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STCKF, 0xffff000000000000, 0xb27c000000000000, 0x0, // STORE CLOCK FAST (STCKF D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STCTL, 0xff00000000000000, 0xb600000000000000, 0x0, // STORE CONTROL (32) (STCTL R1,R3,D2(B2))
+		[8]*argField{ap_CReg_8_11, ap_CReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STCTG, 0xff00000000ff0000, 0xeb00000000250000, 0x0, // STORE CONTROL (64) (STCTG R1,R3,D2(B2))
+		[8]*argField{ap_CReg_8_11, ap_CReg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{STAP, 0xffff000000000000, 0xb212000000000000, 0x0, // STORE CPU ADDRESS (STAP D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STIDP, 0xffff000000000000, 0xb202000000000000, 0x0, // STORE CPU ID (STIDP D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STPT, 0xffff000000000000, 0xb209000000000000, 0x0, // STORE CPU TIMER (STPT D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STFL, 0xffff000000000000, 0xb2b1000000000000, 0x0, // STORE FACILITY LIST (STFL D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STFLE, 0xffff000000000000, 0xb2b0000000000000, 0x0, // STORE FACILITY LIST EXTENDED (STFLE D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STFPC, 0xffff000000000000, 0xb29c000000000000, 0x0, // STORE FPC (STFPC D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STGSC, 0xff00000000ff0000, 0xe300000000490000, 0x0, // STORE GUARDED STORAGE CONTROLS (STGSC R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STH, 0xff00000000000000, 0x4000000000000000, 0x0, // STORE HALFWORD (16) (STH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STHY, 0xff00000000ff0000, 0xe300000000700000, 0x0, // STORE HALFWORD (16) (STHY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STHH, 0xff00000000ff0000, 0xe300000000c70000, 0x0, // STORE HALFWORD HIGH (16) (STHH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STHRL, 0xff0f000000000000, 0xc407000000000000, 0x0, // STORE HALFWORD RELATIVE LONG (16) (STHRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{STFH, 0xff00000000ff0000, 0xe300000000cb0000, 0x0, // STORE HIGH (32) (STFH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STOCFH, 0xff00000000ff0000, 0xeb00000000e10000, 0x0, // STORE HIGH ON CONDITION (STOCFH R1,D2(B2),M3)
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}},
+	{STM, 0xff00000000000000, 0x9000000000000000, 0x0, // STORE MULTIPLE (32) (STM R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STMY, 0xff00000000ff0000, 0xeb00000000900000, 0x0, // STORE MULTIPLE (32) (STMY R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{STMG, 0xff00000000ff0000, 0xeb00000000240000, 0x0, // STORE MULTIPLE (64) (STMG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{STMH, 0xff00000000ff0000, 0xeb00000000260000, 0x0, // STORE MULTIPLE HIGH (32) (STMH R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{STOC, 0xff00000000ff0000, 0xeb00000000f30000, 0x0, // STORE ON CONDITION (32) (STOC R1,D2(B2),M3)
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}},
+	{STOCG, 0xff00000000ff0000, 0xeb00000000e30000, 0x0, // STORE ON CONDITION (64) (STOCG R1,D2(B2),M3)
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_Mask_12_15}},
+	{STPQ, 0xff00000000ff0000, 0xe3000000008e0000, 0x0, // STORE PAIR TO QUADWORD (STPQ R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STPX, 0xffff000000000000, 0xb211000000000000, 0x0, // STORE PREFIX (STPX D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STRAG, 0xffff000000000000, 0xe502000000000000, 0x0, // STORE REAL ADDRESS (STRAG D1(B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{STRL, 0xff0f000000000000, 0xc40f000000000000, 0x0, // STORE RELATIVE LONG (32) (STRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{STGRL, 0xff0f000000000000, 0xc40b000000000000, 0x0, // STORE RELATIVE LONG (64) (STGRL R1,RI2)
+		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
+	{STRVH, 0xff00000000ff0000, 0xe3000000003f0000, 0x0, // STORE REVERSED (16) (STRVH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STRV, 0xff00000000ff0000, 0xe3000000003e0000, 0x0, // STORE REVERSED (32) (STRV R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STRVG, 0xff00000000ff0000, 0xe3000000002f0000, 0x0, // STORE REVERSED (64) (STRVG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{STSCH, 0xffff000000000000, 0xb234000000000000, 0x0, // STORE SUBCHANNEL (STSCH D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STSI, 0xffff000000000000, 0xb27d000000000000, 0x0, // STORE SYSTEM INFORMATION (STSI D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{STNSM, 0xff00000000000000, 0xac00000000000000, 0x0, // STORE THEN AND SYSTEM MASK (STNSM D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{STOSM, 0xff00000000000000, 0xad00000000000000, 0x0, // STORE THEN OR SYSTEM MASK (STOSM D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{STURA, 0xffff000000000000, 0xb246000000000000, 0xff0000000000, // STORE USING REAL ADDRESS (32) (STURA R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{STURG, 0xffff000000000000, 0xb925000000000000, 0xff0000000000, // STORE USING REAL ADDRESS (64) (STURG R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{S, 0xff00000000000000, 0x5b00000000000000, 0x0, // SUBTRACT (32) (S R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SR, 0xff00000000000000, 0x1b00000000000000, 0x0, // SUBTRACT (32) (SR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{SRK, 0xffff000000000000, 0xb9f9000000000000, 0xf0000000000, // SUBTRACT (32) (SRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{SY, 0xff00000000ff0000, 0xe3000000005b0000, 0x0, // SUBTRACT (32) (SY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SG, 0xff00000000ff0000, 0xe300000000090000, 0x0, // SUBTRACT (64) (SG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SGR, 0xffff000000000000, 0xb909000000000000, 0xff0000000000, // SUBTRACT (64) (SGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{SGRK, 0xffff000000000000, 0xb9e9000000000000, 0xf0000000000, // SUBTRACT (64) (SGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{SGF, 0xff00000000ff0000, 0xe300000000190000, 0x0, // SUBTRACT (64←32) (SGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SGFR, 0xffff000000000000, 0xb919000000000000, 0xff0000000000, // SUBTRACT (64→32) (SGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{SXBR, 0xffff000000000000, 0xb34b000000000000, 0xff0000000000, // SUBTRACT (extended BFP) (SXBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{SXTR, 0xffff000000000000, 0xb3db000000000000, 0xf0000000000, // SUBTRACT (extended DFP) (SXTR R1,R2,R3)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}},
+	{SXTRA, 0xffff000000000000, 0xb3db000000000000, 0x0, // SUBTRACT (extended DFP) (SXTRA R1,R2,R3,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}},
+	{SDB, 0xff00000000ff0000, 0xed000000001b0000, 0xff000000, // SUBTRACT (long BFP) (SDB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SDBR, 0xffff000000000000, 0xb31b000000000000, 0xff0000000000, // SUBTRACT (long BFP) (SDBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{SDTR, 0xffff000000000000, 0xb3d3000000000000, 0xf0000000000, // SUBTRACT (long DFP) (SDTR R1,R2,R3)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19}},
+	{SDTRA, 0xffff000000000000, 0xb3d3000000000000, 0x0, // SUBTRACT (long DFP) (SDTRA R1,R2,R3,M4)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31, ap_FPReg_16_19, ap_Mask_20_23}},
+	{SEB, 0xff00000000ff0000, 0xed000000000b0000, 0xff000000, // SUBTRACT (short BFP) (SEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SEBR, 0xffff000000000000, 0xb30b000000000000, 0xff0000000000, // SUBTRACT (short BFP) (SEBR R1,R2)
+		[8]*argField{ap_FPReg_24_27, ap_FPReg_28_31}},
+	{SP, 0xff00000000000000, 0xfb00000000000000, 0x0, // SUBTRACT DECIMAL (SP D1(L1,B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}},
+	{SH, 0xff00000000000000, 0x4b00000000000000, 0x0, // SUBTRACT HALFWORD (32←16) (SH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SHY, 0xff00000000ff0000, 0xe3000000007b0000, 0x0, // SUBTRACT HALFWORD (32→16) (SHY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SGH, 0xff00000000ff0000, 0xe300000000390000, 0x0, // SUBTRACT HALFWORD (64→16) (SGH R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SHHHR, 0xffff000000000000, 0xb9c9000000000000, 0xf0000000000, // SUBTRACT HIGH (32) (SHHHR R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{SHHLR, 0xffff000000000000, 0xb9d9000000000000, 0xf0000000000, // SUBTRACT HIGH (32) (SHHLR R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{SL, 0xff00000000000000, 0x5f00000000000000, 0x0, // SUBTRACT LOGICAL (32) (SL R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SLR, 0xff00000000000000, 0x1f00000000000000, 0x0, // SUBTRACT LOGICAL (32) (SLR R1,R2)
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15}},
+	{SLRK, 0xffff000000000000, 0xb9fb000000000000, 0xf0000000000, // SUBTRACT LOGICAL (32) (SLRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{SLY, 0xff00000000ff0000, 0xe3000000005f0000, 0x0, // SUBTRACT LOGICAL (32) (SLY R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SLG, 0xff00000000ff0000, 0xe3000000000b0000, 0x0, // SUBTRACT LOGICAL (64) (SLG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SLGR, 0xffff000000000000, 0xb90b000000000000, 0xff0000000000, // SUBTRACT LOGICAL (64) (SLGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{SLGRK, 0xffff000000000000, 0xb9eb000000000000, 0xf0000000000, // SUBTRACT LOGICAL (64) (SLGRK R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{SLGF, 0xff00000000ff0000, 0xe3000000001b0000, 0x0, // SUBTRACT LOGICAL (64←32) (SLGF R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SLGFR, 0xffff000000000000, 0xb91b000000000000, 0xff0000000000, // SUBTRACT LOGICAL (64←32) (SLGFR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{SLHHHR, 0xffff000000000000, 0xb9cb000000000000, 0xf0000000000, // SUBTRACT LOGICAL HIGH (32) (SLHHHR R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{SLHHLR, 0xffff000000000000, 0xb9db000000000000, 0xf0000000000, // SUBTRACT LOGICAL HIGH (32) (SLHHLR R1,R2,R3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
+	{SLFI, 0xff0f000000000000, 0xc205000000000000, 0x0, // SUBTRACT LOGICAL IMMEDIATE (32) (SLFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{SLGFI, 0xff0f000000000000, 0xc204000000000000, 0x0, // SUBTRACT LOGICAL IMMEDIATE (64→32) (SLGFI R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+	{SLB, 0xff00000000ff0000, 0xe300000000990000, 0x0, // SUBTRACT LOGICAL WITH BORROW (32) (SLB R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SLBR, 0xffff000000000000, 0xb999000000000000, 0xff0000000000, // SUBTRACT LOGICAL WITH BORROW (32) (SLBR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{SLBG, 0xff00000000ff0000, 0xe300000000890000, 0x0, // SUBTRACT LOGICAL WITH BORROW (64) (SLBG R1,D2(X2,B2))
+		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SLBGR, 0xffff000000000000, 0xb989000000000000, 0xff0000000000, // SUBTRACT LOGICAL WITH BORROW (64) (SLBGR R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{SXR, 0xff00000000000000, 0x3700000000000000, 0x0, // SUBTRACT NORMALIZED (extended HFP) (SXR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{SD, 0xff00000000000000, 0x6b00000000000000, 0x0, // SUBTRACT NORMALIZED (long HFP) (SD R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SDR, 0xff00000000000000, 0x2b00000000000000, 0x0, // SUBTRACT NORMALIZED (long HFP) (SDR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{SE, 0xff00000000000000, 0x7b00000000000000, 0x0, // SUBTRACT NORMALIZED (short HFP) (SE R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SER, 0xff00000000000000, 0x3b00000000000000, 0x0, // SUBTRACT NORMALIZED (short HFP) (SER R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{SW, 0xff00000000000000, 0x6f00000000000000, 0x0, // SUBTRACT UNNORMALIZED (long HFP) (SW R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SWR, 0xff00000000000000, 0x2f00000000000000, 0x0, // SUBTRACT UNNORMALIZED (long HFP) (SWR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{SU, 0xff00000000000000, 0x7f00000000000000, 0x0, // SUBTRACT UNNORMALIZED (short HFP) (SU R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{SUR, 0xff00000000000000, 0x3f00000000000000, 0x0, // SUBTRACT UNNORMALIZED (short HFP) (SUR R1,R2)
+		[8]*argField{ap_FPReg_8_11, ap_FPReg_12_15}},
+	{SVC, 0xff00000000000000, 0xa00000000000000, 0x0, // SUPERVISOR CALL (SVC I)
+		[8]*argField{ap_ImmUnsigned_8_15}},
+	{TAR, 0xffff000000000000, 0xb24c000000000000, 0xff0000000000, // TEST ACCESS (TAR R1,R2)
+		[8]*argField{ap_ACReg_24_27, ap_Reg_28_31}},
+	{TAM, 0xffff000000000000, 0x10b000000000000, 0x0, // TEST ADDRESSING MODE (TAM)
+		[8]*argField{}},
+	{TS, 0xff00000000000000, 0x9300000000000000, 0x0, // TEST AND SET (TS D1(B1))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{TB, 0xffff000000000000, 0xb22c000000000000, 0xff0000000000, // TEST BLOCK (TB R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{TCXB, 0xff00000000ff0000, 0xed00000000120000, 0xff000000, // TEST DATA CLASS (extended BFP) (TCXB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{TDCXT, 0xff00000000ff0000, 0xed00000000580000, 0xff000000, // TEST DATA CLASS (extended DFP) (TDCXT R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{TCDB, 0xff00000000ff0000, 0xed00000000110000, 0xff000000, // TEST DATA CLASS (long BFP) (TCDB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{TDCDT, 0xff00000000ff0000, 0xed00000000540000, 0xff000000, // TEST DATA CLASS (long DFP) (TDCDT R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{TCEB, 0xff00000000ff0000, 0xed00000000100000, 0xff000000, // TEST DATA CLASS (short BFP) (TCEB R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{TDCET, 0xff00000000ff0000, 0xed00000000500000, 0xff000000, // TEST DATA CLASS (short DFP) (TDCET R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{TDGXT, 0xff00000000ff0000, 0xed00000000590000, 0xff000000, // TEST DATA GROUP (extended DFP) (TDGXT R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{TDGDT, 0xff00000000ff0000, 0xed00000000550000, 0xff000000, // TEST DATA GROUP (long DFP) (TDGDT R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{TDGET, 0xff00000000ff0000, 0xed00000000510000, 0xff000000, // TEST DATA GROUP (short DFP) (TDGET R1,D2(X2,B2))
+		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
+	{TP, 0xff00000000ff0000, 0xeb00000000c00000, 0xf0000ff000000, // TEST DECIMAL (TP D1(L1,B1))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19}},
+	{TPEI, 0xffff000000000000, 0xb9a1000000000000, 0xff0000000000, // TEST PENDING EXTERNAL INTERRUPTION (TPEI R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{TPI, 0xffff000000000000, 0xb236000000000000, 0x0, // TEST PENDING INTERRUPTION (TPI D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{TPROT, 0xffff000000000000, 0xe501000000000000, 0x0, // TEST PROTECTION (TPROT D1(B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{TSCH, 0xffff000000000000, 0xb235000000000000, 0x0, // TEST SUBCHANNEL (TSCH D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{TM, 0xff00000000000000, 0x9100000000000000, 0x0, // TEST UNDER MASK (TM D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{TMY, 0xff00000000ff0000, 0xeb00000000510000, 0x0, // TEST UNDER MASK (TMY D1(B1),I2)
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+	{TMHH, 0xff0f000000000000, 0xa702000000000000, 0x0, // TEST UNDER MASK (high high) (TMHH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{TMHL, 0xff0f000000000000, 0xa703000000000000, 0x0, // TEST UNDER MASK (high low) (TMHL R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{TMLH, 0xff0f000000000000, 0xa700000000000000, 0x0, // TEST UNDER MASK (low high) (TMLH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{TMLL, 0xff0f000000000000, 0xa701000000000000, 0x0, // TEST UNDER MASK (low low) (TMLL R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{TMH, 0xff0f000000000000, 0xa700000000000000, 0x0, // TEST UNDER MASK HIGH (TMH R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{TML, 0xff0f000000000000, 0xa701000000000000, 0x0, // TEST UNDER MASK LOW (TML R1,I2)
+		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+	{TRACE, 0xff00000000000000, 0x9900000000000000, 0x0, // TRACE (32) (TRACE R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{TRACG, 0xff00000000ff0000, 0xeb000000000f0000, 0x0, // TRACE (64) (TRACG R1,R3,D2(B2))
+		[8]*argField{ap_Reg_8_11, ap_Reg_12_15, ap_DispSigned20_20_39, ap_BaseReg_16_19}},
+	{TABORT, 0xffff000000000000, 0xb2fc000000000000, 0x0, // TRANSACTION ABORT (TABORT D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{TBEGINC, 0xffff000000000000, 0xe561000000000000, 0x0, // TRANSACTION BEGIN (constrained) (TBEGINC D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}},
+	{TBEGIN, 0xffff000000000000, 0xe560000000000000, 0x0, // TRANSACTION BEGIN (nonconstrained) (TBEGIN D1(B1),I2)
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_32_47}},
+	{TEND, 0xffff000000000000, 0xb2f8000000000000, 0xffff00000000, // TRANSACTION END (TEND)
+		[8]*argField{}},
+	{TR, 0xff00000000000000, 0xdc00000000000000, 0x0, // TRANSLATE (TR D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{TRT, 0xff00000000000000, 0xdd00000000000000, 0x0, // TRANSLATE AND TEST (TRT D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{TRTE, 0xffff000000000000, 0xb9bf000000000000, 0xf0000000000, // TRANSLATE AND TEST EXTENDED (TRTE R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{TRTR, 0xff00000000000000, 0xd000000000000000, 0x0, // TRANSLATE AND TEST REVERSE (TRTR D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{TRTRE, 0xffff000000000000, 0xb9bd000000000000, 0xf0000000000, // TRANSLATE AND TEST REVERSE EXTENDED (TRTRE R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{TRE, 0xffff000000000000, 0xb2a5000000000000, 0xff0000000000, // TRANSLATE EXTENDED (TRE R1,R2)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
+	{TROO, 0xffff000000000000, 0xb993000000000000, 0xf0000000000, // TRANSLATE ONE TO ONE (TROO R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{TROT, 0xffff000000000000, 0xb992000000000000, 0xf0000000000, // TRANSLATE ONE TO TWO (TROT R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{TRTO, 0xffff000000000000, 0xb991000000000000, 0xf0000000000, // TRANSLATE TWO TO ONE (TRTO R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{TRTT, 0xffff000000000000, 0xb990000000000000, 0xf0000000000, // TRANSLATE TWO TO TWO (TRTT R1,R2,M3)
+		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
+	{TRAP2, 0xffff000000000000, 0x1ff000000000000, 0x0, // TRAP (TRAP2)
+		[8]*argField{}},
+	{TRAP4, 0xffff000000000000, 0xb2ff000000000000, 0x0, // TRAP (TRAP4 D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19}},
+	{UNPK, 0xff00000000000000, 0xf300000000000000, 0x0, // UNPACK (UNPK D1(L1,B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}},
+	{UNPKA, 0xff00000000000000, 0xea00000000000000, 0x0, // UNPACK ASCII (UNPKA D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{UNPKU, 0xff00000000000000, 0xe200000000000000, 0x0, // UNPACK UNICODE (UNPKU D1(L1,B1),D2(B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
+	{UPT, 0xffff000000000000, 0x102000000000000, 0x0, // UPDATE TREE (UPT)
+		[8]*argField{}},
+	{VA, 0xff00000000ff0000, 0xe700000000f30000, 0xfff00000000, // VECTOR ADD (VA V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VACC, 0xff00000000ff0000, 0xe700000000f10000, 0xfff00000000, // VECTOR ADD COMPUTE CARRY (VACC V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VAP, 0xff00000000ff0000, 0xe600000000710000, 0xf0000000000, // VECTOR ADD DECIMAL (VAP V1,V2,V3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VAC, 0xff00000000ff0000, 0xe700000000bb0000, 0xff00000000, // VECTOR ADD WITH CARRY (VAC V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VACCC, 0xff00000000ff0000, 0xe700000000b90000, 0xff00000000, // VECTOR ADD WITH CARRY COMPUTE CARRY (VACCC V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VN, 0xff00000000ff0000, 0xe700000000680000, 0xffff0000000, // VECTOR AND (VN V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VNC, 0xff00000000ff0000, 0xe700000000690000, 0xffff0000000, // VECTOR AND WITH COMPLEMENT (VNC V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VAVG, 0xff00000000ff0000, 0xe700000000f20000, 0xfff00000000, // VECTOR AVERAGE (VAVG V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VAVGL, 0xff00000000ff0000, 0xe700000000f00000, 0xfff00000000, // VECTOR AVERAGE LOGICAL (VAVGL V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VBPERM, 0xff00000000ff0000, 0xe700000000850000, 0xffff0000000, // VECTOR BIT PERMUTE (VBPERM V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VCKSM, 0xff00000000ff0000, 0xe700000000660000, 0xffff0000000, // VECTOR CHECKSUM (VCKSM V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VCP, 0xff00000000ff0000, 0xe600000000770000, 0xf00f0ff0000000, // VECTOR COMPARE DECIMAL (VCP V1,V2,M3)
+		[8]*argField{ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCEQ, 0xff00000000ff0000, 0xe700000000f80000, 0xf0f00000000, // VECTOR COMPARE EQUAL (VCEQ V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCH, 0xff00000000ff0000, 0xe700000000fb0000, 0xf0f00000000, // VECTOR COMPARE HIGH (VCH V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCHL, 0xff00000000ff0000, 0xe700000000f90000, 0xf0f00000000, // VECTOR COMPARE HIGH LOGICAL (VCHL V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCSPH, 0xff00000000ff0000, 0xe6000000007d0000, 0xf0ff0000000, // VECTOR CONVERT HFP TO SCALED DECIMAL (VCSPH V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCVB, 0xff00000000ff0000, 0xe600000000500000, 0xff00f0000000, // VECTOR CONVERT TO BINARY (VCVB R1,V2,M3,M4)
+		[8]*argField{ap_Reg_8_11, ap_VecReg_12_15, ap_Mask_24_27, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VCVBG, 0xff00000000ff0000, 0xe600000000520000, 0xff00f0000000, // VECTOR CONVERT TO BINARY (VCVBG R1,V2,M3,M4)
+		[8]*argField{ap_Reg_8_11, ap_VecReg_12_15, ap_Mask_24_27, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VCVD, 0xff00000000ff0000, 0xe600000000580000, 0xff0000000000, // VECTOR CONVERT TO DECIMAL (VCVD V1,R2,I3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCVDG, 0xff00000000ff0000, 0xe6000000005a0000, 0xff0000000000, // VECTOR CONVERT TO DECIMAL (VCVDG V1,R2,I3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCLZDP, 0xff00000000ff0000, 0xe600000000510000, 0xff0ff0000000, // VECTOR COUNT LEADING ZERO DIGITS (VCLZDP V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCLZ, 0xff00000000ff0000, 0xe700000000530000, 0xffff00000000, // VECTOR COUNT LEADING ZEROS (VCLZ V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VCTZ, 0xff00000000ff0000, 0xe700000000520000, 0xffff00000000, // VECTOR COUNT TRAILING ZEROS (VCTZ V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VDP, 0xff00000000ff0000, 0xe6000000007a0000, 0xf0000000000, // VECTOR DIVIDE DECIMAL (VDP V1,V2,V3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VEC, 0xff00000000ff0000, 0xe700000000db0000, 0xffff00000000, // VECTOR ELEMENT COMPARE (VEC V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VECL, 0xff00000000ff0000, 0xe700000000d90000, 0xffff00000000, // VECTOR ELEMENT COMPARE LOGICAL (VECL V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VERIM, 0xff00000000ff0000, 0xe700000000720000, 0xf0000000000, // VECTORELEMENTROTATEANDINSERTUNDER MASK (VERIM V1,V2,V3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_24_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VERLL, 0xff00000000ff0000, 0xe700000000330000, 0x0, // VECTOR ELEMENT ROTATE LEFT LOGICAL (VERLL V1,V3,D2(B2),M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VERLLV, 0xff00000000ff0000, 0xe700000000730000, 0xfff00000000, // VECTOR ELEMENT ROTATE LEFT LOGICAL (VERLLV V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VESLV, 0xff00000000ff0000, 0xe700000000700000, 0xfff00000000, // VECTOR ELEMENT SHIFT LEFT (VESLV V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VESL, 0xff00000000ff0000, 0xe700000000300000, 0x0, // VECTOR ELEMENT SHIFT LEFT (VESL V1,V3,D2(B2),M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VESRA, 0xff00000000ff0000, 0xe7000000003a0000, 0x0, // VECTOR ELEMENT SHIFT RIGHT ARITHMETIC (VESRA V1,V3,D2(B2),M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VESRAV, 0xff00000000ff0000, 0xe7000000007a0000, 0xfff00000000, // VECTOR ELEMENT SHIFT RIGHT ARITHMETIC (VESRAV V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VESRL, 0xff00000000ff0000, 0xe700000000380000, 0x0, // VECTOR ELEMENT SHIFT RIGHT LOGICAL (VESRL V1,V3,D2(B2),M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VESRLV, 0xff00000000ff0000, 0xe700000000780000, 0xfff00000000, // VECTOR ELEMENT SHIFT RIGHT LOGICAL (VESRLV V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VX, 0xff00000000ff0000, 0xe7000000006d0000, 0xffff0000000, // VECTOR EXCLUSIVE OR (VX V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VFAE, 0xff00000000ff0000, 0xe700000000820000, 0xf0f00000000, // VECTOR FIND ANY ELEMENT EQUAL (VFAE V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VFEE, 0xff00000000ff0000, 0xe700000000800000, 0xf0f00000000, // VECTOR FIND ELEMENT EQUAL (VFEE V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VFENE, 0xff00000000ff0000, 0xe700000000810000, 0xf0f00000000, // VECTOR FIND ELEMENT NOT EQUAL (VFENE V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VFA, 0xff00000000ff0000, 0xe700000000e30000, 0xff000000000, // VECTOR FP ADD (VFA V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{WFK, 0xff00000000ff0000, 0xe700000000ca0000, 0xfff000000000, // VECTOR FP COMPARE AND SIGNAL SCALAR (WFK V1,V2,M3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VFCE, 0xff00000000ff0000, 0xe700000000e80000, 0xf0000000000, // VECTOR FP COMPARE EQUAL (VFCE V1,V2,V3,M4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VFCH, 0xff00000000ff0000, 0xe700000000eb0000, 0xf0000000000, // VECTOR FP COMPARE HIGH (VFCH V1,V2,V3,M4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VFCHE, 0xff00000000ff0000, 0xe700000000ea0000, 0xf0000000000, // VECTOR FP COMPARE HIGH OR EQUAL (VFCHE V1,V2,V3,M4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{WFC, 0xff00000000ff0000, 0xe700000000cb0000, 0xfff000000000, // VECTOR FP COMPARE SCALAR (WFC V1,V2,M3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VCLFNH, 0xff00000000ff0000, 0xe600000000560000, 0xfff000000000, // VECTOR FP CONVERT AND LENGTHEN FROM NNP HIGH (VCLFNH V1,V2,M3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VCLFNL, 0xff00000000ff0000, 0xe6000000005e0000, 0xfff000000000, // VECTOR FP CONVERT AND LENGTHEN FROM NNP LOW (VCLFNL V1,V2,M3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VCRNF, 0xff00000000ff0000, 0xe600000000750000, 0xff000000000, // VECTOR FP CONVERT AND ROUND TO NNP (VCRNF V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VCFPS, 0xff00000000ff0000, 0xe700000000c30000, 0xff0000000000, // VECTOR FP CONVERT FROM FIXED (VCFPS V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCDG, 0xff00000000ff0000, 0xe700000000c30000, 0xff0000000000, // VECTOR FP CONVERT FROM FIXED 64-BIT (VCDG V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCFPL, 0xff00000000ff0000, 0xe700000000c10000, 0xff0000000000, // VECTOR FP CONVERT FROM LOGICAL (VCFPL V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCDLG, 0xff00000000ff0000, 0xe700000000c10000, 0xff0000000000, // VECTOR FP CONVERT FROM LOGICAL 64-BIT (VCDLG V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCFN, 0xff00000000ff0000, 0xe6000000005d0000, 0xfff000000000, // VECTOR FP CONVERT FROM NNP (VCFN V1,V2,M3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VCSFP, 0xff00000000ff0000, 0xe700000000c20000, 0xff0000000000, // VECTOR FP CONVERT TO FIXED (VCSFP V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCGD, 0xff00000000ff0000, 0xe700000000c20000, 0xff0000000000, // VECTOR FP CONVERT TO FIXED 64-BIT (VCGD V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCLFP, 0xff00000000ff0000, 0xe700000000c00000, 0xff0000000000, // VECTOR FP CONVERT TO LOGICAL (VCLFP V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCLGD, 0xff00000000ff0000, 0xe700000000c00000, 0xff0000000000, // VECTOR FP CONVERT TO LOGICAL 64-BIT (VCLGD V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VCNF, 0xff00000000ff0000, 0xe600000000550000, 0xfff000000000, // VECTOR FP CONVERT TO NNP (VCNF V1,V2,M3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VFD, 0xff00000000ff0000, 0xe700000000e50000, 0xff000000000, // VECTOR FP DIVIDE (VFD V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VFLL, 0xff00000000ff0000, 0xe700000000c40000, 0xfff000000000, // VECTOR FP LOAD LENGTHENED (VFLL V1,V2,M3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VFLR, 0xff00000000ff0000, 0xe700000000c50000, 0xff0000000000, // VECTOR FP LOAD ROUNDED (VFLR V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VFMAX, 0xff00000000ff0000, 0xe700000000ef0000, 0xf0000000000, // VECTOR FP MAXIMUM (VFMAX V1,V2,V3,M4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VFMIN, 0xff00000000ff0000, 0xe700000000ee0000, 0xf0000000000, // VECTOR FP MINIMUM (VFMIN V1,V2,V3,M4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VFM, 0xff00000000ff0000, 0xe700000000e70000, 0xff000000000, // VECTOR FP MULTIPLY (VFM V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VFMA, 0xff00000000ff0000, 0xe7000000008f0000, 0xf000000000, // VECTOR FP MULTIPLY AND ADD (VFMA V1,V2,V3,V4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_28_31, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VFMS, 0xff00000000ff0000, 0xe7000000008e0000, 0xf000000000, // VECTOR FP MULTIPLY AND SUBTRACT (VFMS V1,V2,V3,V4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_28_31, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VFNMA, 0xff00000000ff0000, 0xe7000000009f0000, 0xf000000000, // VECTOR FP NEGATIVE MULTIPLY AND ADD (VFNMA V1,V2,V3,V4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_28_31, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VFNMS, 0xff00000000ff0000, 0xe7000000009e0000, 0xf000000000, // VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT (VFNMS V1,V2,V3,V4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_28_31, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VFPSO, 0xff00000000ff0000, 0xe700000000cc0000, 0xff0000000000, // VECTOR FP PERFORM SIGN OPERATION (VFPSO V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VFSQ, 0xff00000000ff0000, 0xe700000000ce0000, 0xfff000000000, // VECTOR FP SQUARE ROOT (VFSQ V1,V2,M3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VFS, 0xff00000000ff0000, 0xe700000000e20000, 0xff000000000, // VECTOR FP SUBTRACT (VFS V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VFTCI, 0xff00000000ff0000, 0xe7000000004a0000, 0x0, // VECTOR FP TEST DATA CLASS IMMEDIATE (VFTCI V1,V2,I3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_16_27, ap_Mask_32_35, ap_Mask_28_31, ap_ImmUnsigned_36_39}},
+	{VGFM, 0xff00000000ff0000, 0xe700000000b40000, 0xfff00000000, // VECTOR GALOIS FIELD MULTIPLY SUM (VGFM V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VGFMA, 0xff00000000ff0000, 0xe700000000bc0000, 0xff00000000, // VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE (VGFMA V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VGEF, 0xff00000000ff0000, 0xe700000000130000, 0x0, // VECTOR GATHER ELEMENT (32) (VGEF V1,D2(V2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_VecReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VGEG, 0xff00000000ff0000, 0xe700000000120000, 0x0, // VECTOR GATHER ELEMENT (64) (VGEG V1,D2(V2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_VecReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VGBM, 0xff00000000ff0000, 0xe700000000440000, 0xf0000f0000000, // VECTOR GENERATE BYTE MASK (VGBM V1,I2)
+		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_ImmUnsigned_36_39}},
+	{VGM, 0xff00000000ff0000, 0xe700000000460000, 0xf000000000000, // VECTOR GENERATE MASK (VGM V1,I2,I3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_23, ap_ImmUnsigned_24_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VISTR, 0xff00000000ff0000, 0xe7000000005c0000, 0xff0f00000000, // VECTOR ISOLATE STRING (VISTR V1,V2,M3,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VL, 0xff00000000ff0000, 0xe700000000060000, 0x0, // VECTOR LOAD (VL V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLR, 0xff00000000ff0000, 0xe700000000560000, 0xfffff0000000, // VECTOR LOAD (VLR V1,V2)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_36_39}},
+	{VLREP, 0xff00000000ff0000, 0xe700000000050000, 0x0, // VECTOR LOAD AND REPLICATE (VLREP V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEBRH, 0xff00000000ff0000, 0xe600000000010000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENT (16) (VLEBRH V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEBRF, 0xff00000000ff0000, 0xe600000000030000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENT (32) (VLEBRF V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEBRG, 0xff00000000ff0000, 0xe600000000020000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENT (64) (VLEBRG V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLBRREP, 0xff00000000ff0000, 0xe600000000050000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE (VLBRREP V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLLEBRZ, 0xff00000000ff0000, 0xe600000000040000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO (VLLEBRZ V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLBR, 0xff00000000ff0000, 0xe600000000060000, 0x0, // VECTOR LOAD BYTE REVERSED ELEMENTS (VLBR V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLC, 0xff00000000ff0000, 0xe700000000de0000, 0xffff00000000, // VECTOR LOAD COMPLEMENT (VLC V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEH, 0xff00000000ff0000, 0xe700000000010000, 0x0, // VECTOR LOAD ELEMENT (16) (VLEH V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEF, 0xff00000000ff0000, 0xe700000000030000, 0x0, // VECTOR LOAD ELEMENT (32) (VLEF V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEG, 0xff00000000ff0000, 0xe700000000020000, 0x0, // VECTOR LOAD ELEMENT (64) (VLEG V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEB, 0xff00000000ff0000, 0xe700000000000000, 0x0, // VECTOR LOAD ELEMENT (8) (VLEB V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEIH, 0xff00000000ff0000, 0xe700000000410000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (16) (VLEIH V1,I2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEIF, 0xff00000000ff0000, 0xe700000000430000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (32) (VLEIF V1,I2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEIG, 0xff00000000ff0000, 0xe700000000420000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (64) (VLEIG V1,I2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLEIB, 0xff00000000ff0000, 0xe700000000400000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (8) (VLEIB V1,I2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLER, 0xff00000000ff0000, 0xe600000000070000, 0x0, // VECTOR LOAD ELEMENTS REVERSED (VLER V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VFI, 0xff00000000ff0000, 0xe700000000c70000, 0xff0000000000, // VECTOR LOAD FP INTEGER (VFI V1,V2,M3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_Mask_28_31, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VLGV, 0xff00000000ff0000, 0xe700000000210000, 0x0, // VECTOR LOAD GR FROM VR ELEMENT (VLGV R1,V3,D2(B2),M4)
+		[8]*argField{ap_Reg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLIP, 0xff00000000ff0000, 0xe600000000490000, 0xf000000000000, // VECTOR LOAD IMMEDIATE DECIMAL (VLIP V1,I2,I3)
+		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_ImmUnsigned_32_35, ap_ImmUnsigned_36_39}},
+	{VLLEZ, 0xff00000000ff0000, 0xe700000000040000, 0x0, // VECTOR LOAD LOGICAL ELEMENT AND ZERO (VLLEZ V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLM, 0xff00000000ff0000, 0xe700000000360000, 0x0, // VECTOR LOAD MULTIPLE (VLM V1,V3,D2(B2),M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLP, 0xff00000000ff0000, 0xe700000000df0000, 0xffff00000000, // VECTOR LOAD POSITIVE (VLP V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLRL, 0xff00000000ff0000, 0xe600000000350000, 0x0, // VECTOR LOAD RIGHTMOST WITH LENGTH (VLRL V1,D2(B2),I3)
+		[8]*argField{ap_VecReg_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15, ap_ImmUnsigned_36_39}},
+	{VLRLR, 0xff00000000ff0000, 0xe600000000370000, 0xf0000000000000, // VECTOR LOAD RIGHTMOST WITH LENGTH (VLRLR V1,R3,D2(B2))
+		[8]*argField{ap_VecReg_32_35, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_36_39}},
+	{VLBB, 0xff00000000ff0000, 0xe700000000070000, 0x0, // VECTOR LOAD TO BLOCK BOUNDARY (VLBB V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLVG, 0xff00000000ff0000, 0xe700000000220000, 0x0, // VECTOR LOAD VR ELEMENT FROM GR (VLVG V1,R3,D2(B2),M4)
+		[8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VLVGP, 0xff00000000ff0000, 0xe700000000620000, 0xffff0000000, // VECTOR LOAD VR FROM GRS DISJOINT (VLVGP V1,R2,R3)
+		[8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_Reg_16_19, ap_ImmUnsigned_36_39}},
+	{VLL, 0xff00000000ff0000, 0xe700000000370000, 0xf0000000, // VECTOR LOAD WITH LENGTH (VLL V1,R3,D2(B2))
+		[8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_36_39}},
+	{VMX, 0xff00000000ff0000, 0xe700000000ff0000, 0xfff00000000, // VECTOR MAXIMUM (VMX V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMXL, 0xff00000000ff0000, 0xe700000000fd0000, 0xfff00000000, // VECTOR MAXIMUM LOGICAL (VMXL V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMRH, 0xff00000000ff0000, 0xe700000000610000, 0xfff00000000, // VECTOR MERGE HIGH (VMRH V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMRL, 0xff00000000ff0000, 0xe700000000600000, 0xfff00000000, // VECTOR MERGE LOW (VMRL V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMN, 0xff00000000ff0000, 0xe700000000fe0000, 0xfff00000000, // VECTOR MINIMUM (VMN V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMNL, 0xff00000000ff0000, 0xe700000000fc0000, 0xfff00000000, // VECTOR MINIMUM LOGICAL (VMNL V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMAE, 0xff00000000ff0000, 0xe700000000ae0000, 0xff00000000, // VECTOR MULTIPLY AND ADD EVEN (VMAE V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VMAH, 0xff00000000ff0000, 0xe700000000ab0000, 0xff00000000, // VECTOR MULTIPLY AND ADD HIGH (VMAH V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VMALE, 0xff00000000ff0000, 0xe700000000ac0000, 0xff00000000, // VECTOR MULTIPLY AND ADD LOGICAL EVEN (VMALE V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VMALH, 0xff00000000ff0000, 0xe700000000a90000, 0xff00000000, // VECTOR MULTIPLY AND ADD LOGICAL HIGH (VMALH V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VMALO, 0xff00000000ff0000, 0xe700000000ad0000, 0xff00000000, // VECTOR MULTIPLY AND ADD LOGICAL ODD (VMALO V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VMAL, 0xff00000000ff0000, 0xe700000000aa0000, 0xff00000000, // VECTOR MULTIPLY AND ADD LOW (VMAL V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VMAO, 0xff00000000ff0000, 0xe700000000af0000, 0xff00000000, // VECTOR MULTIPLY AND ADD ODD (VMAO V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VMSP, 0xff00000000ff0000, 0xe600000000790000, 0xf0000000000, // VECTOR MULTIPLY AND SHIFT DECIMAL (VMSP V1,V2,V3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VMP, 0xff00000000ff0000, 0xe600000000780000, 0xf0000000000, // VECTOR MULTIPLY DECIMAL (VMP V1,V2,V3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VME, 0xff00000000ff0000, 0xe700000000a60000, 0xfff00000000, // VECTOR MULTIPLY EVEN (VME V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMH, 0xff00000000ff0000, 0xe700000000a30000, 0xfff00000000, // VECTOR MULTIPLY HIGH (VMH V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMLE, 0xff00000000ff0000, 0xe700000000a40000, 0xfff00000000, // VECTOR MULTIPLY LOGICAL EVEN (VMLE V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMLH, 0xff00000000ff0000, 0xe700000000a10000, 0xfff00000000, // VECTOR MULTIPLY LOGICAL HIGH (VMLH V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMLO, 0xff00000000ff0000, 0xe700000000a50000, 0xfff00000000, // VECTOR MULTIPLY LOGICAL ODD (VMLO V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VML, 0xff00000000ff0000, 0xe700000000a20000, 0xfff00000000, // VECTOR MULTIPLY LOW (VML V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMO, 0xff00000000ff0000, 0xe700000000a70000, 0xfff00000000, // VECTOR MULTIPLY ODD (VMO V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VMSL, 0xff00000000ff0000, 0xe700000000b80000, 0xf00000000, // VECTOR MULTIPLY SUM LOGICAL (VMSL V1,V2,V3,V4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VNN, 0xff00000000ff0000, 0xe7000000006e0000, 0xffff0000000, // VECTOR NAND (VNN V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VNO, 0xff00000000ff0000, 0xe7000000006b0000, 0xffff0000000, // VECTOR NOR (VNO V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VNX, 0xff00000000ff0000, 0xe7000000006c0000, 0xffff0000000, // VECTOR NOT EXCLUSIVE OR (VNX V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VO, 0xff00000000ff0000, 0xe7000000006a0000, 0xffff0000000, // VECTOR OR (VO V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VOC, 0xff00000000ff0000, 0xe7000000006f0000, 0xffff0000000, // VECTOR OR WITH COMPLEMENT (VOC V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VPK, 0xff00000000ff0000, 0xe700000000940000, 0xfff00000000, // VECTOR PACK (VPK V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VPKLS, 0xff00000000ff0000, 0xe700000000950000, 0xf0f00000000, // VECTOR PACK LOGICAL SATURATE (VPKLS V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VPKS, 0xff00000000ff0000, 0xe700000000970000, 0xf0f00000000, // VECTOR PACK SATURATE (VPKS V1,V2,V3,M4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VPKZ, 0xff00000000ff0000, 0xe600000000340000, 0x0, // VECTOR PACK ZONED (VPKZ V1,D2(B2),I3)
+		[8]*argField{ap_VecReg_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15, ap_ImmUnsigned_36_39}},
+	{VPKZR, 0xff00000000ff0000, 0xe600000000700000, 0xf0000000000, // VECTOR PACK ZONED REGISTER (VPKZR V1,V2,V3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VPSOP, 0xff00000000ff0000, 0xe6000000005b0000, 0x0, // VECTOR PERFORM SIGN OPERATION DECIMAL (VPSOP V1,V2,I3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_28_35, ap_ImmUnsigned_16_23, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VPERM, 0xff00000000ff0000, 0xe7000000008c0000, 0xfff00000000, // VECTOR PERMUTE (VPERM V1,V2,V3,V4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_ImmUnsigned_36_39}},
+	{VPDI, 0xff00000000ff0000, 0xe700000000840000, 0xfff00000000, // VECTOR PERMUTE DOUBLEWORD IMMEDIATE (VPDI V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VPOPCT, 0xff00000000ff0000, 0xe700000000500000, 0xffff00000000, // VECTOR POPULATION COUNT (VPOPCT V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VRP, 0xff00000000ff0000, 0xe6000000007b0000, 0xf0000000000, // VECTOR REMAINDER DECIMAL (VRP V1,V2,V3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VREP, 0xff00000000ff0000, 0xe7000000004d0000, 0x0, // VECTOR REPLICATE (VREP V1,V3,I2,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VREPI, 0xff00000000ff0000, 0xe700000000450000, 0xf000000000000, // VECTOR REPLICATE IMMEDIATE (VREPI V1,I2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSCEF, 0xff00000000ff0000, 0xe7000000001b0000, 0x0, // VECTOR SCATTER ELEMENT (32) (VSCEF V1,D2(V2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_VecReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSCEG, 0xff00000000ff0000, 0xe7000000001a0000, 0x0, // VECTOR SCATTER ELEMENT (64) (VSCEG V1,D2(V2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_VecReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSEL, 0xff00000000ff0000, 0xe7000000008d0000, 0xfff00000000, // VECTOR SELECT (VSEL V1,V2,V3,V4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_ImmUnsigned_36_39}},
+	{VSDP, 0xff00000000ff0000, 0xe6000000007e0000, 0xf0000000000, // VECTOR SHIFT AND DIVIDE DECIMAL (VSDP V1,V2,V3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VSRP, 0xff00000000ff0000, 0xe600000000590000, 0x0, // VECTOR SHIFT AND ROUND DECIMAL (VSRP V1,V2,I3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_28_35, ap_ImmUnsigned_16_23, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VSRPR, 0xff00000000ff0000, 0xe600000000720000, 0xf0000000000, // VECTOR SHIFT AND ROUND DECIMAL REGISTER (VSRPR V1,V2,V3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VSL, 0xff00000000ff0000, 0xe700000000740000, 0xffff0000000, // VECTOR SHIFT LEFT (VSL V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VSLB, 0xff00000000ff0000, 0xe700000000750000, 0xffff0000000, // VECTOR SHIFT LEFT BY BYTE (VSLB V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VSLD, 0xff00000000ff0000, 0xe700000000860000, 0xf00f0000000, // VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD V1,V2,V3,I4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_24_31, ap_ImmUnsigned_36_39}},
+	{VSLDB, 0xff00000000ff0000, 0xe700000000770000, 0xf00f0000000, // VECTOR SHIFT LEFT DOUBLE BY BYTE (VSLDB V1,V2,V3,I4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_24_31, ap_ImmUnsigned_36_39}},
+	{VSRA, 0xff00000000ff0000, 0xe7000000007e0000, 0xffff0000000, // VECTOR SHIFT RIGHT ARITHMETIC (VSRA V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VSRAB, 0xff00000000ff0000, 0xe7000000007f0000, 0xffff0000000, // VECTOR SHIFT RIGHT ARITHMETIC BY BYTE (VSRAB V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VSRD, 0xff00000000ff0000, 0xe700000000870000, 0xf00f0000000, // VECTOR SHIFT RIGHT DOUBLE BY BIT (VSRD V1,V2,V3,I4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_24_31, ap_ImmUnsigned_36_39}},
+	{VSRL, 0xff00000000ff0000, 0xe7000000007c0000, 0xffff0000000, // VECTOR SHIFT RIGHT LOGICAL (VSRL V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VSRLB, 0xff00000000ff0000, 0xe7000000007d0000, 0xffff0000000, // VECTOR SHIFT RIGHT LOGICAL BY BYTE (VSRLB V1,V2,V3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_36_39}},
+	{VSEG, 0xff00000000ff0000, 0xe7000000005f0000, 0xffff00000000, // VECTOR SIGN EXTEND TO DOUBLEWORD (VSEG V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VST, 0xff00000000ff0000, 0xe7000000000e0000, 0x0, // VECTOR STORE (VST V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTEBRH, 0xff00000000ff0000, 0xe600000000090000, 0x0, // VECTOR STORE BYTE REVERSED ELEMENT(16) (VSTEBRH V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTEBRF, 0xff00000000ff0000, 0xe6000000000b0000, 0x0, // VECTOR STORE BYTE REVERSED ELEMENT(32) (VSTEBRF V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTEBRG, 0xff00000000ff0000, 0xe6000000000a0000, 0x0, // VECTOR STORE BYTE REVERSED ELEMENT(64) (VSTEBRG V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTBR, 0xff00000000ff0000, 0xe6000000000e0000, 0x0, // VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTEH, 0xff00000000ff0000, 0xe700000000090000, 0x0, // VECTOR STORE ELEMENT (16) (VSTEH V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTEF, 0xff00000000ff0000, 0xe7000000000b0000, 0x0, // VECTOR STORE ELEMENT (32) (VSTEF V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTEG, 0xff00000000ff0000, 0xe7000000000a0000, 0x0, // VECTOR STORE ELEMENT (64) (VSTEG V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTEB, 0xff00000000ff0000, 0xe700000000080000, 0x0, // VECTOR STORE ELEMENT (8) (VSTEB V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTER, 0xff00000000ff0000, 0xe6000000000f0000, 0x0, // VECTOR STORE ELEMENTS REVERSED (VSTER V1,D2(X2,B2),M3)
+		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTM, 0xff00000000ff0000, 0xe7000000003e0000, 0x0, // VECTOR STORE MULTIPLE (VSTM V1,V3,D2(B2),M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSTRL, 0xff00000000ff0000, 0xe6000000003d0000, 0x0, // VECTOR STORE RIGHTMOST WITH LENGTH (VSTRL V1,D2(B2),I3)
+		[8]*argField{ap_VecReg_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15, ap_ImmUnsigned_36_39}},
+	{VSTRLR, 0xff00000000ff0000, 0xe6000000003f0000, 0xf0000000000000, // VECTOR STORE RIGHTMOST WITH LENGTH (VSTRLR V1,R3,D2(B2))
+		[8]*argField{ap_VecReg_32_35, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_36_39}},
+	{VSTL, 0xff00000000ff0000, 0xe7000000003f0000, 0xf0000000, // VECTOR STORE WITH LENGTH (VSTL V1,R3,D2(B2))
+		[8]*argField{ap_VecReg_8_11, ap_Reg_12_15, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_36_39}},
+	{VSTRC, 0xff00000000ff0000, 0xe7000000008a0000, 0xf00000000, // VECTOR STRING RANGE COMPARE (VSTRC V1,V2,V3,V4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VSTRS, 0xff00000000ff0000, 0xe7000000008b0000, 0xf00000000, // VECTOR STRING SEARCH (VSTRS V1,V2,V3,V4,M5,M6)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VS, 0xff00000000ff0000, 0xe700000000f70000, 0xfff00000000, // VECTOR SUBTRACT (VS V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSCBI, 0xff00000000ff0000, 0xe700000000f50000, 0xfff00000000, // VECTOR SUBTRACT COMPUTE BORROW INDICATION (VSCBI V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSP, 0xff00000000ff0000, 0xe600000000730000, 0xf0000000000, // VECTOR SUBTRACT DECIMAL (VSP V1,V2,V3,I4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_ImmUnsigned_28_35, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VSBCBI, 0xff00000000ff0000, 0xe700000000bd0000, 0xff00000000, // VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION (VSBCBI V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VSBI, 0xff00000000ff0000, 0xe700000000bf0000, 0xff00000000, // VECTOR SUBTRACT WITH BORROW INDICATION (VSBI V1,V2,V3,V4,M5)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_VecReg_32_35, ap_Mask_20_23, ap_ImmUnsigned_36_39}},
+	{VSUMG, 0xff00000000ff0000, 0xe700000000650000, 0xfff00000000, // VECTOR SUM ACROSS DOUBLEWORD (VSUMG V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSUMQ, 0xff00000000ff0000, 0xe700000000670000, 0xfff00000000, // VECTOR SUM ACROSS QUADWORD (VSUMQ V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VSUM, 0xff00000000ff0000, 0xe700000000640000, 0xfff00000000, // VECTOR SUM ACROSS WORD (VSUM V1,V2,V3,M4)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_VecReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VTP, 0xff00000000ff0000, 0xe6000000005f0000, 0xf0fffff0000000, // VECTOR TEST DECIMAL (VTP V1)
+		[8]*argField{ap_VecReg_12_15, ap_ImmUnsigned_36_39}},
+	{VTM, 0xff00000000ff0000, 0xe700000000d80000, 0xfffff0000000, // VECTOR TEST UNDER MASK (VTM V1,V2)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_ImmUnsigned_36_39}},
+	{VUPH, 0xff00000000ff0000, 0xe700000000d70000, 0xffff00000000, // VECTOR UNPACK HIGH (VUPH V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VUPLH, 0xff00000000ff0000, 0xe700000000d50000, 0xffff00000000, // VECTOR UNPACK LOGICAL HIGH (VUPLH V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VUPLL, 0xff00000000ff0000, 0xe700000000d40000, 0xffff00000000, // VECTOR UNPACK LOGICAL LOW (VUPLL V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VUPL, 0xff00000000ff0000, 0xe700000000d60000, 0xffff00000000, // VECTOR UNPACK LOW (VUPL V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+	{VUPKZ, 0xff00000000ff0000, 0xe6000000003c0000, 0x0, // VECTOR UNPACK ZONED (VUPKZ V1,D2(B2),I3)
+		[8]*argField{ap_VecReg_32_35, ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15, ap_ImmUnsigned_36_39}},
+	{VUPKZH, 0xff00000000ff0000, 0xe600000000540000, 0xff0ff0000000, // VECTOR UNPACK ZONED HIGH (VUPKZH V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{VUPKZL, 0xff00000000ff0000, 0xe6000000005c0000, 0xff0ff0000000, // VECTOR UNPACK ZONED LOW (VUPKZL V1,V2,M3)
+		[8]*argField{ap_VecReg_8_11, ap_VecReg_12_15, ap_Mask_24_27, ap_ImmUnsigned_36_39}},
+	{ZAP, 0xff00000000000000, 0xf800000000000000, 0x0, // ZERO AND ADD (ZAP D1(L1,B1),D2(L2,B2))
+		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_11, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_Len_12_15, ap_BaseReg_32_35}},
+}
diff --git a/s390x/s390xasm/testdata/decode_generated.txt b/s390x/s390xasm/testdata/decode_generated.txt
new file mode 100644
index 00000000..b7b3f09f
--- /dev/null
+++ b/s390x/s390xasm/testdata/decode_generated.txt
@@ -0,0 +1,1245 @@
+    5a82100b|	gnu	a %r8,11(%r2,%r1) 
+        1a80|	gnu	ar %r8,%r0        
+    b9f80080|	gnu	ark %r8,%r0,%r0   
+e382100b005a|	gnu	ay %r8,11(%r2,%r1)
+e382100b0008|	gnu	ag %r8,11(%r2,%r1)
+    b9080080|	gnu	agr %r8,%r0       
+    b9e80080|	gnu	agrk %r8,%r0,%r0  
+e382100b0018|	gnu	agf %r8,11(%r2,%r1)
+    b9180080|	gnu	agfr %r8,%r0      
+    b34a0080|	gnu	axbr %f8,%f0      
+    b3da0080|	gnu	axtr %f8,%f0,%f0  
+    b3da0180|	gnu	axtra %f8,%f0,%f0,1
+ed82100b001a|	gnu	adb %f8,11(%r2,%r1)
+    b31a0080|	gnu	adbr %f8,%f0      
+    b3d20080|	gnu	adtr %f8,%f0,%f0  
+    b3d20180|	gnu	adtra %f8,%f0,%f0,1
+ed82100b000a|	gnu	aeb %f8,11(%r2,%r1)
+    b30a0080|	gnu	aebr %f8,%f0      
+fa332006100b|	gnu	ap 6(4,%r2),11(4,%r1)
+    4a82100b|	gnu	ah %r8,11(%r2,%r1)
+e382100b007a|	gnu	ahy %r8,11(%r2,%r1)
+e382100b0038|	gnu	agh %r8,11(%r2,%r1)
+    a78a0008|	gnu	ahi %r8,8         
+    a78b0008|	gnu	aghi %r8,8        
+    b9c80080|	gnu	ahhhr %r8,%r0,%r0 
+    b9d80080|	gnu	ahhlr %r8,%r0,%r0 
+c28900000008|	gnu	afi %r8,8         
+ec80000800d8|	gnu	ahik %r8,%r0,8    
+eb082006006a|	gnu	asi 6(%r2),8      
+ec80000800d9|	gnu	aghik %r8,%r0,8   
+c28800000008|	gnu	agfi %r8,8        
+eb082006007a|	gnu	agsi 6(%r2),8     
+cc8800000008|	gnu	aih %r8,8         
+    5e82100b|	gnu	al %r8,11(%r2,%r1)
+        1e80|	gnu	alr %r8,%r0       
+    b9fa0080|	gnu	alrk %r8,%r0,%r0  
+e382100b005e|	gnu	aly %r8,11(%r2,%r1)
+e382100b000a|	gnu	alg %r8,11(%r2,%r1)
+    b90a0080|	gnu	algr %r8,%r0      
+    b9ea0080|	gnu	algrk %r8,%r0,%r0 
+e382100b001a|	gnu	algf %r8,11(%r2,%r1)
+    b91a0080|	gnu	algfr %r8,%r0     
+    b9ca0080|	gnu	alhhhr %r8,%r0,%r0
+    b9da0080|	gnu	alhhlr %r8,%r0,%r0
+c28b00000008|	gnu	alfi %r8,8        
+c28a00000008|	gnu	algfi %r8,8       
+e382100b0098|	gnu	alc %r8,11(%r2,%r1)
+    b9980080|	gnu	alcr %r8,%r0      
+e382100b0088|	gnu	alcg %r8,11(%r2,%r1)
+    b9880080|	gnu	alcgr %r8,%r0     
+ec80000800da|	gnu	alhsik %r8,%r0,8  
+eb082006006e|	gnu	alsi 6(%r2),8     
+ec80000800db|	gnu	alghsik %r8,%r0,8 
+eb082006007e|	gnu	algsi 6(%r2),8    
+cc8a00000008|	gnu	alsih %r8,8       
+cc8b00000008|	gnu	alsihn %r8,8      
+        3680|	gnu	axr %f8,%f0       
+    6a82100b|	gnu	ad %f8,11(%r2,%r1)
+        2a80|	gnu	adr %f8,%f0       
+    7a82100b|	gnu	ae %f8,11(%r2,%r1)
+        3a80|	gnu	aer %f8,%f0       
+    6e82100b|	gnu	aw %f8,11(%r2,%r1)
+        2e80|	gnu	awr %f8,%f0       
+    7e82100b|	gnu	au %f8,11(%r2,%r1)
+        3e80|	gnu	aur %f8,%f0       
+    5482100b|	gnu	n %r8,11(%r2,%r1) 
+        1480|	gnu	nr %r8,%r0        
+    b9f40080|	gnu	nrk %r8,%r0,%r0   
+e382100b0054|	gnu	ny %r8,11(%r2,%r1)
+e382100b0080|	gnu	ng %r8,11(%r2,%r1)
+    b9800080|	gnu	ngr %r8,%r0       
+    b9e40080|	gnu	ngrk %r8,%r0,%r0  
+d4032006100b|	gnu	nc 6(4,%r2),11(%r1)
+    94082006|	gnu	ni 6(%r2),8       
+eb0820060054|	gnu	niy 6(%r2),8      
+    a5840008|	gnu	nihh %r8,8        
+    a5850008|	gnu	nihl %r8,8        
+c08a00000008|	gnu	nihf %r8,8        
+    a5860008|	gnu	nilh %r8,8        
+    a5870008|	gnu	nill %r8,8        
+c08b00000008|	gnu	nilf %r8,8        
+    b9f50080|	gnu	ncrk %r8,%r0,%r0  
+    b9e50080|	gnu	ncgrk %r8,%r0,%r0 
+    4582100b|	gnu	bal %r8,11(%r2,%r1)
+        0580|	gnu	balr %r8,%r0      
+    4d82100b|	gnu	bas %r8,11(%r2,%r1)
+        0d80|	gnu	basr %r8,%r0      
+        0c80|	gnu	bassm %r8,%r0     
+    b25a0080|	gnu	bsa %r8,%r0       
+        0b80|	gnu	bsm %r8,%r0       
+    b2400080|	gnu	bakr %r8,%r0      
+    b2580080|	gnu	bsg %r8,%r0       
+e372100b0047|	gnu	bine 11(%r2,%r1)  
+    4772100b|	gnu	bne 11(%r2,%r1)   
+        0770|	gnu	bner %r0          
+    4682100b|	gnu	bct %r8,11(%r2,%r1)
+        0680|	gnu	bctr %r8,%r0      
+e382100b0046|	gnu	bctg %r8,11(%r2,%r1)
+    b9460080|	gnu	bctgr %r8,%r0     
+    8680100b|	gnu	bxh %r8,%r0,11(%r1)
+eb80100b0044|	gnu	bxhg %r8,%r0,11(%r1)
+    8780100b|	gnu	bxle %r8,%r0,11(%r1)
+eb80100b0045|	gnu	bxleg %r8,%r0,11(%r1)
+c77060b60000|	gnu	bpp 7,0x1cc,182(%r6)
+c57000000093|	gnu	bprp 7,0x1d2,0x2f8
+    a7850000|	gnu	bras %r8,0x1d8    
+c08500000000|	gnu	brasl %r8,0x1dc   
+    a7740000|	gnu	jne 0x1e2         
+c07400000000|	gnu	jgne 0x1e6        
+    a7860000|	gnu	brct %r8,0x1ec    
+    a7870000|	gnu	brctg %r8,0x1f0   
+cc8600000000|	gnu	brcth %r8,0x1f4   
+    84800000|	gnu	brxh %r8,%r0,0x1fa
+ec8000000044|	gnu	brxhg %r8,%r0,0x1fe
+    85800000|	gnu	brxle %r8,%r0,0x204
+ec8000000045|	gnu	brxlg %r8,%r0,0x208
+    b2760000|	gnu	xsch              
+    b2410080|	gnu	cksm %r8,%r0      
+    b92e0080|	gnu	km %r8,%r0        
+    b9290080|	gnu	kma %r8,%r0,%r0   
+    b92f0080|	gnu	kmc %r8,%r0       
+    b92a0080|	gnu	kmf %r8,%r0       
+    b92d0080|	gnu	kmctr %r8,%r0,%r0 
+    b92b0080|	gnu	kmo %r8,%r0       
+    b2300000|	gnu	csch              
+    5982100b|	gnu	c %r8,11(%r2,%r1) 
+        1980|	gnu	cr %r8,%r0        
+e382100b0059|	gnu	cy %r8,11(%r2,%r1)
+e382100b0020|	gnu	cg %r8,11(%r2,%r1)
+    b9200080|	gnu	cgr %r8,%r0       
+e382100b0030|	gnu	cgf %r8,11(%r2,%r1)
+    b9300080|	gnu	cgfr %r8,%r0      
+    b3490080|	gnu	cxbr %f8,%f0      
+    b3ec0080|	gnu	cxtr %f8,%f0      
+    b3690080|	gnu	cxr %f8,%f0       
+ed82100b0019|	gnu	cdb %f8,11(%r2,%r1)
+    b3190080|	gnu	cdbr %f8,%f0      
+    b3e40080|	gnu	cdtr %f8,%f0      
+    6982100b|	gnu	cd %f8,11(%r2,%r1)
+        2980|	gnu	cdr %f8,%f0       
+ed82100b0009|	gnu	ceb %f8,11(%r2,%r1)
+    b3090080|	gnu	cebr %f8,%f0      
+    7982100b|	gnu	ce %f8,11(%r2,%r1)
+        3980|	gnu	cer %f8,%f0       
+ec8080cd30f6|	gnu	crb %r8,%r0,3,205(%r8)
+ec8080cd30e4|	gnu	cgrb %r8,%r0,3,205(%r8)
+ec80ffac3076|	gnu	crj %r8,%r0,3,0x1e6
+ec80ffac3064|	gnu	cgrj %r8,%r0,3,0x1ec
+    b21a100b|	gnu	cfc 11(%r1)       
+    b98f0180|	gnu	crdte %r8,%r0,%r0,1
+    b3480080|	gnu	kxbr %f8,%f0      
+    b3e80080|	gnu	kxtr %f8,%f0      
+ed82100b0018|	gnu	kdb %f8,11(%r2,%r1)
+    b3180080|	gnu	kdbr %f8,%f0      
+    b3e00080|	gnu	kdtr %f8,%f0      
+ed82100b0008|	gnu	keb %f8,11(%r2,%r1)
+    b3080080|	gnu	kebr %f8,%f0      
+    ba80100b|	gnu	cs %r8,%r0,11(%r1)
+eb80100b0014|	gnu	csy %r8,%r0,11(%r1)
+eb80100b0030|	gnu	csg %r8,%r0,11(%r1)
+    b2500080|	gnu	csp %r8,%r0       
+    b98a0080|	gnu	cspg %r8,%r0      
+c8022006100b|	gnu	csst 6(%r2),11(%r1),%r0
+    b9723080|	gnu	crt %r8,%r0,3     
+    b9603080|	gnu	cgrt %r8,%r0,3    
+    b3fc0080|	gnu	cextr %f8,%f0     
+    b3f40080|	gnu	cedtr %f8,%f0     
+f9332006100b|	gnu	cp 6(4,%r2),11(4,%r1)
+    bb80100b|	gnu	cds %r8,%r0,11(%r1)
+eb80100b0031|	gnu	cdsy %r8,%r0,11(%r1)
+eb80100b003e|	gnu	cdsg %r8,%r0,11(%r1)
+    4982100b|	gnu	ch %r8,11(%r2,%r1)
+e382100b0079|	gnu	chy %r8,11(%r2,%r1)
+e382100b0034|	gnu	cgh %r8,11(%r2,%r1)
+e55420060008|	gnu	chhsi 6(%r2),8    
+    a78e0008|	gnu	chi %r8,8         
+e55c20060008|	gnu	chsi 6(%r2),8     
+    a78f0008|	gnu	cghi %r8,8        
+e55820060008|	gnu	cghsi 6(%r2),8    
+c68500000000|	gnu	chrl %r8,0x330    
+c68400000000|	gnu	cghrl %r8,0x336   
+e382100b00cd|	gnu	chf %r8,11(%r2,%r1)
+    b9cd0080|	gnu	chhr %r8,%r0      
+    b9dd0080|	gnu	chlr %r8,%r0      
+c28d00000008|	gnu	cfi %r8,8         
+c28c00000008|	gnu	cgfi %r8,8        
+ec8380cd08fe|	gnu	cib %r8,8,3,205(%r8)
+ec8380cd08fc|	gnu	cgib %r8,8,3,205(%r8)
+ec83ffac087e|	gnu	cij %r8,8,3,0x2ba 
+ec83ffac087c|	gnu	cgij %r8,8,3,0x2c0
+ec8000083072|	gnu	cit %r8,8,3       
+ec8000083070|	gnu	cgit %r8,8,3      
+cc8d00000008|	gnu	cih %r8,8         
+    5582100b|	gnu	cl %r8,11(%r2,%r1)
+        1580|	gnu	clr %r8,%r0       
+e382100b0055|	gnu	cly %r8,11(%r2,%r1)
+e382100b0021|	gnu	clg %r8,11(%r2,%r1)
+    b9210080|	gnu	clgr %r8,%r0      
+e382100b0031|	gnu	clgf %r8,11(%r2,%r1)
+    b9310080|	gnu	clgfr %r8,%r0     
+d5032006100b|	gnu	clc 6(4,%r2),11(%r1)
+    95082006|	gnu	cli 6(%r2),8      
+eb0820060055|	gnu	cliy 6(%r2),8     
+ec8080cd30f7|	gnu	clrb %r8,%r0,3,205(%r8)
+ec8080cd30e5|	gnu	clgrb %r8,%r0,3,205(%r8)
+ec80ffac3077|	gnu	clrj %r8,%r0,3,0x314
+ec80ffac3065|	gnu	clgrj %r8,%r0,3,0x31a
+    b9733080|	gnu	clrt %r8,%r0,3    
+eb83100b0023|	gnu	clt %r8,3,11(%r1) 
+    b9613080|	gnu	clgrt %r8,%r0,3   
+eb83100b002b|	gnu	clgt %r8,3,11(%r1)
+eb83100b0020|	gnu	clmh %r8,3,11(%r1)
+    bd83100b|	gnu	clm %r8,3,11(%r1) 
+eb83100b0021|	gnu	clmy %r8,3,11(%r1)
+e382100b00cf|	gnu	clhf %r8,11(%r2,%r1)
+    b9cf0080|	gnu	clhhr %r8,%r0     
+    b9df0080|	gnu	clhlr %r8,%r0     
+e55520060008|	gnu	clhhsi 6(%r2),8   
+c28f00000008|	gnu	clfi %r8,8        
+e55d20060008|	gnu	clfhsi 6(%r2),8   
+e55920060008|	gnu	clghsi 6(%r2),8   
+c28e00000008|	gnu	clgfi %r8,8       
+ec8380cd08ff|	gnu	clib %r8,8,3,205(%r8)
+ec8380cd08fd|	gnu	clgib %r8,8,3,205(%r8)
+ec83ffac087f|	gnu	clij %r8,8,3,0x37c
+ec83ffac087d|	gnu	clgij %r8,8,3,0x382
+ec8000083073|	gnu	clfit %r8,8,3     
+ec8000083071|	gnu	clgit %r8,8,3     
+cc8f00000008|	gnu	clih %r8,8        
+        0f80|	gnu	clcl %r8,%r0      
+    a980100b|	gnu	clcle %r8,%r0,11(%r1)
+eb80100b008f|	gnu	clclu %r8,%r0,11(%r1)
+c68f00000000|	gnu	clrl %r8,0x44e    
+c68700000000|	gnu	clhrl %r8,0x454   
+c68a00000000|	gnu	clgrl %r8,0x45a   
+c68600000000|	gnu	clghrl %r8,0x460  
+c68e00000000|	gnu	clgfrl %r8,0x466  
+    b25d0080|	gnu	clst %r8,%r0      
+c68d00000000|	gnu	crl %r8,0x470     
+c68800000000|	gnu	cgrl %r8,0x476    
+c68c00000000|	gnu	cgfrl %r8,0x47c   
+    b2570080|	gnu	cuse %r8,%r0      
+    b2630080|	gnu	cmpsc %r8,%r0     
+    b93a0080|	gnu	kdsa %r8,%r0      
+    b93e0080|	gnu	kimd %r8,%r0      
+    b93f0080|	gnu	klmd %r8,%r0      
+    b91e0080|	gnu	kmac %r8,%r0      
+    b3590080|	gnu	thdr %f8,%f0      
+    b3580080|	gnu	thder %f8,%f0     
+    b3960080|	gnu	cxfbr %f8,%r0     
+    b3963180|	gnu	cxfbra %f8,3,%r0,1
+    b9593180|	gnu	cxftr %f8,3,%r0,1 
+    b3b60080|	gnu	cxfr %f8,%r0      
+    b3950080|	gnu	cdfbr %f8,%r0     
+    b3953180|	gnu	cdfbra %f8,3,%r0,1
+    b9513180|	gnu	cdftr %f8,3,%r0,1 
+    b3b50080|	gnu	cdfr %f8,%r0      
+    b3940080|	gnu	cefbr %f8,%r0     
+    b3943180|	gnu	cefbra %f8,3,%r0,1
+    b3b40080|	gnu	cefr %f8,%r0      
+    b3a60080|	gnu	cxgbr %f8,%r0     
+    b3a63180|	gnu	cxgbra %f8,3,%r0,1
+    b3f90080|	gnu	cxgtr %f8,%r0     
+    b3f93180|	gnu	cxgtra %f8,3,%r0,1
+    b3c60080|	gnu	cxgr %f8,%r0      
+    b3a50080|	gnu	cdgbr %f8,%r0     
+    b3a53180|	gnu	cdgbra %f8,3,%r0,1
+    b3f10080|	gnu	cdgtr %f8,%r0     
+    b3f13180|	gnu	cdgtra %f8,3,%r0,1
+    b3c50080|	gnu	cdgr %f8,%r0      
+    b3a40080|	gnu	cegbr %f8,%r0     
+    b3a43180|	gnu	cegbra %f8,3,%r0,1
+    b3c40080|	gnu	cegr %f8,%r0      
+    b3923180|	gnu	cxlfbr %f8,3,%r0,1
+    b95b3180|	gnu	cxlftr %f8,3,%r0,1
+    b3913180|	gnu	cdlfbr %f8,3,%r0,1
+    b9533180|	gnu	cdlftr %f8,3,%r0,1
+    b3903180|	gnu	celfbr %f8,3,%r0,1
+    b3a23180|	gnu	cxlgbr %f8,3,%r0,1
+    b95a3180|	gnu	cxlgtr %f8,3,%r0,1
+    b3a13180|	gnu	cdlgbr %f8,3,%r0,1
+    b9523180|	gnu	cdlgtr %f8,3,%r0,1
+    b3a03180|	gnu	celgbr %f8,3,%r0,1
+ed03100b83af|	gnu	cxpt %f8,11(4,%r1),3
+ed03100b83ae|	gnu	cdpt %f8,11(4,%r1),3
+    b3fb0080|	gnu	cxstr %f8,%r0     
+    b3f30080|	gnu	cdstr %f8,%r0     
+    b3fa0080|	gnu	cxutr %f8,%r0     
+    b3f20080|	gnu	cdutr %f8,%r0     
+ed03100b83ab|	gnu	cxzt %f8,11(4,%r1),3
+ed03100b83aa|	gnu	cdzt %f8,11(4,%r1),3
+    b3503080|	gnu	tbedr %f8,3,%f0   
+    b3513080|	gnu	tbdr %f8,3,%f0    
+    4f82100b|	gnu	cvb %r8,11(%r2,%r1)
+e382100b0006|	gnu	cvby %r8,11(%r2,%r1)
+e382100b000e|	gnu	cvbg %r8,11(%r2,%r1)
+    4e82100b|	gnu	cvd %r8,11(%r2,%r1)
+e382100b0026|	gnu	cvdy %r8,11(%r2,%r1)
+e382100b002e|	gnu	cvdg %r8,11(%r2,%r1)
+    b39a3080|	gnu	cfxbr %r8,3,%f0   
+    b39a3180|	gnu	cfxbra %r8,3,%f0,1
+    b3aa3080|	gnu	cgxbr %r8,3,%f0   
+    b3aa3180|	gnu	cgxbra %r8,3,%f0,1
+    b9493180|	gnu	cfxtr %r8,3,%f0,1 
+    b3e93080|	gnu	cgxtr %r8,3,%f0   
+    b3e93180|	gnu	cgxtra %r8,3,%f0,1
+    b3ba3080|	gnu	cfxr %r8,3,%f0    
+    b3ca3080|	gnu	cgxr %r8,3,%f0    
+    b3993080|	gnu	cfdbr %r8,3,%f0   
+    b3993180|	gnu	cfdbra %r8,3,%f0,1
+    b3a93080|	gnu	cgdbr %r8,3,%f0   
+    b3a93180|	gnu	cgdbra %r8,3,%f0,1
+    b9413180|	gnu	cfdtr %r8,3,%f0,1 
+    b3e13080|	gnu	cgdtr %r8,3,%f0   
+    b3e13180|	gnu	cgdtra %r8,3,%f0,1
+    b3b93080|	gnu	cfdr %r8,3,%f0    
+    b3c93080|	gnu	cgdr %r8,3,%f0    
+    b3983080|	gnu	cfebr %r8,3,%f0   
+    b3983180|	gnu	cfebra %r8,3,%f0,1
+    b3a83080|	gnu	cgebr %r8,3,%f0   
+    b3a83180|	gnu	cgebra %r8,3,%f0,1
+    b3b83080|	gnu	cfer %r8,3,%f0    
+    b3c83080|	gnu	cger %r8,3,%f0    
+    b39e3180|	gnu	clfxbr %r8,3,%f0,1
+    b3ae3180|	gnu	clgxbr %r8,3,%f0,1
+    b94b3180|	gnu	clfxtr %r8,3,%f0,1
+    b94a3180|	gnu	clgxtr %r8,3,%f0,1
+    b39d3180|	gnu	clfdbr %r8,3,%f0,1
+    b3ad3180|	gnu	clgdbr %r8,3,%f0,1
+    b9433180|	gnu	clfdtr %r8,3,%f0,1
+    b9423180|	gnu	clgdtr %r8,3,%f0,1
+    b39c3180|	gnu	clfebr %r8,3,%f0,1
+    b3ac3180|	gnu	clgebr %r8,3,%f0,1
+ed03100b83ad|	gnu	cpxt %f8,11(4,%r1),3
+ed03100b83ac|	gnu	cpdt %f8,11(4,%r1),3
+    b3eb0180|	gnu	csxtr %r8,%f0,1   
+    b3e30180|	gnu	csdtr %r8,%f0,1   
+    b3ea0080|	gnu	cuxtr %r8,%f0     
+    b3e20080|	gnu	cudtr %r8,%f0     
+ed03100b83a9|	gnu	czxt %f8,11(4,%r1),3
+ed03100b83a8|	gnu	czdt %f8,11(4,%r1),3
+    b2a63080|	gnu	cu21 %r8,%r0,3    
+    b9b13080|	gnu	cu24 %r8,%r0,3    
+    b2a63080|	gnu	cu21 %r8,%r0,3    
+    b2a73080|	gnu	cu12 %r8,%r0,3    
+    b2a73080|	gnu	cu12 %r8,%r0,3    
+    b9b03080|	gnu	cu14 %r8,%r0,3    
+    b9b30080|	gnu	cu42 %r8,%r0      
+    b9b20080|	gnu	cu41 %r8,%r0      
+    b24d0080|	gnu	cpya %a8,%a0      
+    b3720080|	gnu	cpsdr %f8,%f0,%f0 
+e6235000087c|	gnu	vscshp %v18,%v3,%v5
+e62350901874|	gnu	vschp %v18,%v3,%v5,1,9
+    b9390080|	gnu	dfltcc %r8,%r0,%r0
+    5d82100b|	gnu	d %r8,11(%r2,%r1) 
+        1d80|	gnu	dr %r8,%r0        
+    b34d0080|	gnu	dxbr %f8,%f0      
+    b3d90080|	gnu	dxtr %f8,%f0,%f0  
+    b3d90180|	gnu	dxtra %f8,%f0,%f0,1
+    b22d0080|	gnu	dxr %f8,%f0       
+ed82100b001d|	gnu	ddb %f8,11(%r2,%r1)
+    b31d0080|	gnu	ddbr %f8,%f0      
+    b3d10080|	gnu	ddtr %f8,%f0,%f0  
+    b3d10180|	gnu	ddtra %f8,%f0,%f0,1
+    6d82100b|	gnu	dd %f8,11(%r2,%r1)
+        2d80|	gnu	ddr %f8,%f0       
+ed82100b000d|	gnu	deb %f8,11(%r2,%r1)
+    b30d0080|	gnu	debr %f8,%f0      
+    7d82100b|	gnu	de %f8,11(%r2,%r1)
+        3d80|	gnu	der %f8,%f0       
+fd332006100b|	gnu	dp 6(4,%r2),11(4,%r1)
+e382100b0097|	gnu	dl %r8,11(%r2,%r1)
+    b9970080|	gnu	dlr %r8,%r0       
+e382100b0087|	gnu	dlg %r8,11(%r2,%r1)
+    b9870080|	gnu	dlgr %r8,%r0      
+e382100b000d|	gnu	dsg %r8,11(%r2,%r1)
+    b90d0080|	gnu	dsgr %r8,%r0      
+e382100b001d|	gnu	dsgf %r8,11(%r2,%r1)
+    b91d0080|	gnu	dsgfr %r8,%r0     
+    b35b0180|	gnu	didbr %f8,%f0,%f0,1
+    b3530180|	gnu	diebr %f8,%f0,%f0,1
+de032006100b|	gnu	ed 6(4,%r2),11(%r1)
+df032006100b|	gnu	edmk 6(4,%r2),11(%r1)
+    5782100b|	gnu	x %r8,11(%r2,%r1) 
+        1780|	gnu	xr %r8,%r0        
+    b9f70080|	gnu	xrk %r8,%r0,%r0   
+e382100b0057|	gnu	xy %r8,11(%r2,%r1)
+e382100b0082|	gnu	xg %r8,11(%r2,%r1)
+    b9820080|	gnu	xgr %r8,%r0       
+    b9e70080|	gnu	xgrk %r8,%r0,%r0  
+d7032006100b|	gnu	xc 6(4,%r2),11(%r1)
+    97082006|	gnu	xi 6(%r2),8       
+eb0820060057|	gnu	xiy 6(%r2),8      
+c08600000008|	gnu	xihf %r8,8        
+c08700000008|	gnu	xilf %r8,8        
+    4482100b|	gnu	ex %r8,11(%r2,%r1)
+c68000000000|	gnu	exrl %r8,0x720    
+    b24f0080|	gnu	ear %r8,%a0       
+    b99d0080|	gnu	esea %r8          
+    b3ed0080|	gnu	eextr %r8,%f0     
+    b3e50080|	gnu	eedtr %r8,%f0     
+eb80100b004c|	gnu	ecag %r8,%r0,11(%r1)
+c8012006100b|	gnu	ectg 6(%r2),11(%r1),%r0
+    b38c0080|	gnu	efpc %r8          
+    b2260080|	gnu	epar %r8          
+    b99a0080|	gnu	epair %r8         
+    b98d0080|	gnu	epsw %r8,%r0      
+    b2270080|	gnu	esar %r8          
+    b99b0080|	gnu	esair %r8         
+    b3ef0080|	gnu	esxtr %r8,%f0     
+    b3e70080|	gnu	esdtr %r8,%f0     
+    b2490080|	gnu	ereg %r8,%r0      
+    b90e0080|	gnu	eregg %r8,%r0     
+    b24a0080|	gnu	esta %r8,%r0      
+    b2ec0080|	gnu	etnd %r8          
+    b9830080|	gnu	flogr %r8,%r0     
+    b2310000|	gnu	hsch              
+        2480|	gnu	hdr %f8,%f0       
+        3480|	gnu	her %f8,%f0       
+    b2240080|	gnu	iac %r8           
+    b3fe0080|	gnu	iextr %f8,%f0,%r0 
+    b3f60080|	gnu	iedtr %f8,%f0,%r0 
+    4382100b|	gnu	ic %r8,11(%r2,%r1)
+e382100b0073|	gnu	icy %r8,11(%r2,%r1)
+eb83100b0080|	gnu	icmh %r8,3,11(%r1)
+    bf83100b|	gnu	icm %r8,3,11(%r1) 
+eb83100b0081|	gnu	icmy %r8,3,11(%r1)
+    a5800008|	gnu	iihh %r8,8        
+    a5810008|	gnu	iihl %r8,8        
+c08800000008|	gnu	iihf %r8,8        
+    a5820008|	gnu	iilh %r8,8        
+    a5830008|	gnu	iill %r8,8        
+c08900000008|	gnu	iilf %r8,8        
+    b2220080|	gnu	ipm %r8           
+    b20b0000|	gnu	ipk               
+    b9ac0080|	gnu	irbm %r8,%r0      
+    b2290080|	gnu	iske %r8,%r0      
+    b2230080|	gnu	ivsk %r8,%r0      
+    b98e0180|	gnu	idte %r8,%r0,%r0,1
+    b2210180|	gnu	ipte %r8,%r0,%r0,1
+    5882100b|	gnu	l %r8,11(%r2,%r1) 
+        1880|	gnu	lr %r8,%r0        
+e382100b0058|	gnu	ly %r8,11(%r2,%r1)
+e382100b0004|	gnu	lg %r8,11(%r2,%r1)
+    b9040080|	gnu	lgr %r8,%r0       
+e382100b0014|	gnu	lgf %r8,11(%r2,%r1)
+    b9140080|	gnu	lgfr %r8,%r0      
+    b3650080|	gnu	lxr %f8,%f0       
+    6882100b|	gnu	ld %f8,11(%r2,%r1)
+        2880|	gnu	ldr %f8,%f0       
+ed82100b0065|	gnu	ldy %f8,11(%r2,%r1)
+    7882100b|	gnu	le %f8,11(%r2,%r1)
+        3880|	gnu	ler %f8,%f0       
+ed82100b0064|	gnu	ley %f8,11(%r2,%r1)
+    9a80100b|	gnu	lam %a8,%a0,11(%r1)
+eb80100b009a|	gnu	lamy %a8,%a0,11(%r1)
+    4182100b|	gnu	la %r8,11(%r2,%r1)
+e382100b0071|	gnu	lay %r8,11(%r2,%r1)
+    5182100b|	gnu	lae %r8,11(%r2,%r1)
+e382100b0075|	gnu	laey %r8,11(%r2,%r1)
+c08000000000|	gnu	larl %r8,0x836    
+e5002006100b|	gnu	lasp 6(%r2),11(%r1)
+eb80100b00f8|	gnu	laa %r8,%r0,11(%r1)
+eb80100b00e8|	gnu	laag %r8,%r0,11(%r1)
+eb80100b00fa|	gnu	laal %r8,%r0,11(%r1)
+eb80100b00ea|	gnu	laalg %r8,%r0,11(%r1)
+eb80100b00f4|	gnu	lan %r8,%r0,11(%r1)
+eb80100b00e4|	gnu	lang %r8,%r0,11(%r1)
+eb80100b00f7|	gnu	lax %r8,%r0,11(%r1)
+eb80100b00e7|	gnu	laxg %r8,%r0,11(%r1)
+eb80100b00f6|	gnu	lao %r8,%r0,11(%r1)
+eb80100b00e6|	gnu	laog %r8,%r0,11(%r1)
+e382100b0012|	gnu	lt %r8,11(%r2,%r1)
+        1280|	gnu	ltr %r8,%r0       
+e382100b0002|	gnu	ltg %r8,11(%r2,%r1)
+    b9020080|	gnu	ltgr %r8,%r0      
+e382100b0032|	gnu	ltgf %r8,11(%r2,%r1)
+    b9120080|	gnu	ltgfr %r8,%r0     
+    b3420080|	gnu	ltxbr %f8,%f0     
+    b3de0080|	gnu	ltxtr %f8,%f0     
+    b3620080|	gnu	ltxr %f8,%f0      
+    b3120080|	gnu	ltdbr %f8,%f0     
+    b3d60080|	gnu	ltdtr %f8,%f0     
+        2280|	gnu	ltdr %f8,%f0      
+    b3020080|	gnu	ltebr %f8,%f0     
+        3280|	gnu	lter %f8,%f0      
+e382100b009f|	gnu	lat %r8,11(%r2,%r1)
+e382100b0085|	gnu	lgat %r8,11(%r2,%r1)
+e382100b003b|	gnu	lzrf %r8,11(%r2,%r1)
+e382100b002a|	gnu	lzrg %r8,11(%r2,%r1)
+    b200100b|	gnu	lbear 11(%r1)     
+e382100b0076|	gnu	lb %r8,11(%r2,%r1)
+    b9260080|	gnu	lbr %r8,%r0       
+e382100b0077|	gnu	lgb %r8,11(%r2,%r1)
+    b9060080|	gnu	lgbr %r8,%r0      
+e382100b00c0|	gnu	lbh %r8,11(%r2,%r1)
+        1380|	gnu	lcr %r8,%r0       
+    b9030080|	gnu	lcgr %r8,%r0      
+    b9130080|	gnu	lcgfr %r8,%r0     
+    b3430080|	gnu	lcxbr %f8,%f0     
+    b3630080|	gnu	lcxr %f8,%f0      
+    b3130080|	gnu	lcdbr %f8,%f0     
+        2380|	gnu	lcdr %f8,%f0      
+    b3730080|	gnu	lcdfr %f8,%f0     
+    b3030080|	gnu	lcebr %f8,%f0     
+        3380|	gnu	lcer %f8,%f0      
+    b780100b|	gnu	lctl %c8,%c0,11(%r1)
+eb80100b002f|	gnu	lctlg %c8,%c0,11(%r1)
+e782100b3027|	gnu	lcbb %r8,11(%r2,%r1),3
+    b3473080|	gnu	fixbr %f8,3,%f0   
+    b3473180|	gnu	fixbra %f8,3,%f0,1
+    b3df3180|	gnu	fixtr %f8,3,%f0,1 
+    b3670080|	gnu	fixr %f8,%f0      
+    b35f3080|	gnu	fidbr %f8,3,%f0   
+    b35f3180|	gnu	fidbra %f8,3,%f0,1
+    b3d73180|	gnu	fidtr %f8,3,%f0,1 
+    b37f0080|	gnu	fidr %f8,%f0      
+    b3573080|	gnu	fiebr %f8,3,%f0   
+    b3573180|	gnu	fiebra %f8,3,%f0,1
+    b3770080|	gnu	fier %f8,%f0      
+    b29d100b|	gnu	lfpc 11(%r1)      
+    b2bd100b|	gnu	lfas 11(%r1)      
+    b3c10080|	gnu	ldgr %f8,%r0      
+    b3cd0080|	gnu	lgdr %r8,%f0      
+e382100b004c|	gnu	lgg %r8,11(%r2,%r1)
+e382100b004d|	gnu	lgsc %r8,11(%r2,%r1)
+    4882100b|	gnu	lh %r8,11(%r2,%r1)
+    b9270080|	gnu	lhr %r8,%r0       
+e382100b0078|	gnu	lhy %r8,11(%r2,%r1)
+e382100b0015|	gnu	lgh %r8,11(%r2,%r1)
+    b9070080|	gnu	lghr %r8,%r0      
+e382100b00c4|	gnu	lhh %r8,11(%r2,%r1)
+ec830008004e|	gnu	lochhinle %r8,8   
+    a7880008|	gnu	lhi %r8,8         
+    a7890008|	gnu	lghi %r8,8        
+ec8300080042|	gnu	lochinle %r8,8    
+ec8300080046|	gnu	locghinle %r8,8   
+c48500000000|	gnu	lhrl %r8,0x99e    
+c48400000000|	gnu	lghrl %r8,0x9a4   
+e382100b00ca|	gnu	lfh %r8,11(%r2,%r1)
+e382100b00c8|	gnu	lfhat %r8,11(%r2,%r1)
+eb83100b00e0|	gnu	locfhnle %r8,11(%r1)
+    b9e03080|	gnu	locfhrnle %r8,%r0 
+c08100000008|	gnu	lgfi %r8,8        
+ed82100b0005|	gnu	lxdb %f8,11(%r2,%r1)
+    b3050080|	gnu	lxdbr %f8,%f0     
+    b3dc0180|	gnu	lxdtr %f8,%f0,1   
+ed82100b0025|	gnu	lxd %f8,11(%r2,%r1)
+    b3250080|	gnu	lxdr %f8,%f0      
+ed82100b0006|	gnu	lxeb %f8,11(%r2,%r1)
+    b3060080|	gnu	lxebr %f8,%f0     
+ed82100b0026|	gnu	lxe %f8,11(%r2,%r1)
+    b3260080|	gnu	lxer %f8,%f0      
+ed82100b0004|	gnu	ldeb %f8,11(%r2,%r1)
+    b3040080|	gnu	ldebr %f8,%f0     
+    b3d40180|	gnu	ldetr %f8,%f0,1   
+ed82100b0024|	gnu	lde %f8,11(%r2,%r1)
+    b3240080|	gnu	lder %f8,%f0      
+e382100b0016|	gnu	llgf %r8,11(%r2,%r1)
+    b9160080|	gnu	llgfr %r8,%r0     
+e382100b0048|	gnu	llgfsg %r8,11(%r2,%r1)
+e382100b009d|	gnu	llgfat %r8,11(%r2,%r1)
+e382100b003a|	gnu	llzrgf %r8,11(%r2,%r1)
+e382100b0094|	gnu	llc %r8,11(%r2,%r1)
+    b9940080|	gnu	llcr %r8,%r0      
+e382100b0090|	gnu	llgc %r8,11(%r2,%r1)
+    b9840080|	gnu	llgcr %r8,%r0     
+e382100b00c2|	gnu	llch %r8,11(%r2,%r1)
+e382100b0095|	gnu	llh %r8,11(%r2,%r1)
+    b9950080|	gnu	llhr %r8,%r0      
+e382100b0091|	gnu	llgh %r8,11(%r2,%r1)
+    b9850080|	gnu	llghr %r8,%r0     
+e382100b00c6|	gnu	llhh %r8,11(%r2,%r1)
+c48200000000|	gnu	llhrl %r8,0xa5a   
+c48600000000|	gnu	llghrl %r8,0xa60  
+    a58c0008|	gnu	llihh %r8,8       
+    a58d0008|	gnu	llihl %r8,8       
+c08e00000008|	gnu	llihf %r8,8       
+    a58e0008|	gnu	llilh %r8,8       
+    a58f0008|	gnu	llill %r8,8       
+c08f00000008|	gnu	llilf %r8,8       
+c48e00000000|	gnu	llgfrl %r8,0xa82  
+e382100b0017|	gnu	llgt %r8,11(%r2,%r1)
+    b9170080|	gnu	llgtr %r8,%r0     
+e382100b009c|	gnu	llgtat %r8,11(%r2,%r1)
+    9880100b|	gnu	lm %r8,%r0,11(%r1)
+eb80100b0098|	gnu	lmy %r8,%r0,11(%r1)
+eb80100b0004|	gnu	lmg %r8,%r0,11(%r1)
+ef80100b80cd|	gnu	lmd %r8,%r0,11(%r1),205(%r8)
+eb80100b0096|	gnu	lmh %r8,%r0,11(%r1)
+        1180|	gnu	lnr %r8,%r0       
+    b9010080|	gnu	lngr %r8,%r0      
+    b9110080|	gnu	lngfr %r8,%r0     
+    b3410080|	gnu	lnxbr %f8,%f0     
+    b3610080|	gnu	lnxr %f8,%f0      
+    b3110080|	gnu	lndbr %f8,%f0     
+        2180|	gnu	lndr %f8,%f0      
+    b3710080|	gnu	lndfr %f8,%f0     
+    b3010080|	gnu	lnebr %f8,%f0     
+        3180|	gnu	lner %f8,%f0      
+eb83100b00f2|	gnu	locnle %r8,11(%r1)
+    b9f23080|	gnu	locrnle %r8,%r0   
+eb83100b00e2|	gnu	locgnle %r8,11(%r1)
+    b9e23080|	gnu	locgrnle %r8,%r0  
+    b9aa0180|	gnu	lptea %r8,%r0,%r0,1
+c8042006100b|	gnu	lpd %r0,6(%r2),11(%r1)
+c8052006100b|	gnu	lpdg %r0,6(%r2),11(%r1)
+e382100b008f|	gnu	lpq %r8,11(%r2,%r1)
+        1080|	gnu	lpr %r8,%r0       
+    b9000080|	gnu	lpgr %r8,%r0      
+    b9100080|	gnu	lpgfr %r8,%r0     
+    b3400080|	gnu	lpxbr %f8,%f0     
+    b3600080|	gnu	lpxr %f8,%f0      
+    b3100080|	gnu	lpdbr %f8,%f0     
+        2080|	gnu	lpdr %f8,%f0      
+    b3700080|	gnu	lpdfr %f8,%f0     
+    b3000080|	gnu	lpebr %f8,%f0     
+        3080|	gnu	lper %f8,%f0      
+    82002006|	gnu	lpsw 6(%r2)       
+    b2b2100b|	gnu	lpswe 11(%r1)     
+eb0020060071|	gnu	lpswey 6(%r2)     
+    b182100b|	gnu	lra %r8,11(%r2,%r1)
+e382100b0013|	gnu	lray %r8,11(%r2,%r1)
+e382100b0003|	gnu	lrag %r8,11(%r2,%r1)
+c48d00000000|	gnu	lrl %r8,0xb40     
+c48800000000|	gnu	lgrl %r8,0xb46    
+c48c00000000|	gnu	lgfrl %r8,0xb4c   
+e382100b001f|	gnu	lrvh %r8,11(%r2,%r1)
+e382100b001e|	gnu	lrv %r8,11(%r2,%r1)
+    b91f0080|	gnu	lrvr %r8,%r0      
+e382100b000f|	gnu	lrvg %r8,11(%r2,%r1)
+    b90f0080|	gnu	lrvgr %r8,%r0     
+    b3450080|	gnu	ldxbr %f8,%f0     
+    b3453180|	gnu	ldxbra %f8,3,%f0,1
+    b3dd3180|	gnu	ldxtr %f8,3,%f0,1 
+        2580|	gnu	ldxr %f8,%f0      
+        2580|	gnu	ldxr %f8,%f0      
+    b3460080|	gnu	lexbr %f8,%f0     
+    b3463180|	gnu	lexbra %f8,3,%f0,1
+    b3660080|	gnu	lexr %f8,%f0      
+    b3440080|	gnu	ledbr %f8,%f0     
+    b3443180|	gnu	ledbra %f8,3,%f0,1
+    b3d53180|	gnu	ledtr %f8,3,%f0,1 
+        3580|	gnu	ledr %f8,%f0      
+        3580|	gnu	ledr %f8,%f0      
+    b24b0080|	gnu	lura %r8,%r0      
+    b9050080|	gnu	lurag %r8,%r0     
+    b3760080|	gnu	lzxr %f8          
+    b3750080|	gnu	lzdr %f8          
+    b3740080|	gnu	lzer %f8          
+    b2470080|	gnu	msta %r8          
+    b232100b|	gnu	msch 11(%r1)      
+    af082006|	gnu	mc 6(%r2),8       
+e54420060008|	gnu	mvhhi 6(%r2),8    
+e54c20060008|	gnu	mvhi 6(%r2),8     
+e54820060008|	gnu	mvghi 6(%r2),8    
+d2032006100b|	gnu	mvc 6(4,%r2),11(%r1)
+    92082006|	gnu	mvi 6(%r2),8      
+eb0820060052|	gnu	mviy 6(%r2),8     
+e8032006100b|	gnu	mvcin 6(4,%r2),11(%r1)
+        0e80|	gnu	mvcl %r8,%r0      
+    a880100b|	gnu	mvcle %r8,%r0,11(%r1)
+eb80100b008e|	gnu	mvclu %r8,%r0,11(%r1)
+d1032006100b|	gnu	mvn 6(4,%r2),11(%r1)
+    b2540080|	gnu	mvpg %r8,%r0      
+e50a2006100b|	gnu	mvcrl 6(%r2),11(%r1)
+    b2550080|	gnu	mvst %r8,%r0      
+da802006100b|	gnu	mvcp 6(%r8,%r2),11(%r1),%r0
+db802006100b|	gnu	mvcs 6(%r8,%r2),11(%r1),%r0
+e50f2006100b|	gnu	mvcdk 6(%r2),11(%r1)
+d9802006100b|	gnu	mvck 6(%r8,%r2),11(%r1),%r0
+f1332006100b|	gnu	mvo 6(4,%r2),11(4,%r1)
+c8002006100b|	gnu	mvcos 6(%r2),11(%r1),%r0
+e50e2006100b|	gnu	mvcsk 6(%r2),11(%r1)
+d3032006100b|	gnu	mvz 6(4,%r2),11(%r1)
+e382100b0084|	gnu	mg %r8,11(%r2,%r1)
+    b9ec0080|	gnu	mgrk %r8,%r0,%r0  
+    5c82100b|	gnu	m %r8,11(%r2,%r1) 
+e382100b005c|	gnu	mfy %r8,11(%r2,%r1)
+        1c80|	gnu	mr %r8,%r0        
+    b34c0080|	gnu	mxbr %f8,%f0      
+    b3d80080|	gnu	mxtr %f8,%f0,%f0  
+    b3d80180|	gnu	mxtra %f8,%f0,%f0,1
+        2680|	gnu	mxr %f8,%f0       
+ed82100b001c|	gnu	mdb %f8,11(%r2,%r1)
+    b31c0080|	gnu	mdbr %f8,%f0      
+    b3d00080|	gnu	mdtr %f8,%f0,%f0  
+    b3d00180|	gnu	mdtra %f8,%f0,%f0,1
+    6c82100b|	gnu	md %f8,11(%r2,%r1)
+        2c80|	gnu	mdr %f8,%f0       
+ed82100b0007|	gnu	mxdb %f8,11(%r2,%r1)
+    b3070080|	gnu	mxdbr %f8,%f0     
+    6782100b|	gnu	mxd %f8,11(%r2,%r1)
+        2780|	gnu	mxdr %f8,%f0      
+ed82100b0017|	gnu	meeb %f8,11(%r2,%r1)
+    b3170080|	gnu	meebr %f8,%f0     
+ed82100b0037|	gnu	mee %f8,11(%r2,%r1)
+    b3370080|	gnu	meer %f8,%f0      
+ed82100b000c|	gnu	mdeb %f8,11(%r2,%r1)
+    b30c0080|	gnu	mdebr %f8,%f0     
+    7c82100b|	gnu	mde %f8,11(%r2,%r1)
+        3c80|	gnu	mder %f8,%f0      
+    7c82100b|	gnu	mde %f8,11(%r2,%r1)
+        3c80|	gnu	mder %f8,%f0      
+ed02100b803a|	gnu	may %f8,%f0,11(%r2,%r1)
+    b33a8000|	gnu	mayr %f8,%f0,%f0  
+ed02100b801e|	gnu	madb %f8,%f0,11(%r2,%r1)
+    b31e8000|	gnu	madbr %f8,%f0,%f0 
+ed02100b803e|	gnu	mad %f8,%f0,11(%r2,%r1)
+    b33e8000|	gnu	madr %f8,%f0,%f0  
+ed02100b800e|	gnu	maeb %f8,%f0,11(%r2,%r1)
+    b30e8000|	gnu	maebr %f8,%f0,%f0 
+ed02100b802e|	gnu	mae %f8,%f0,11(%r2,%r1)
+    b32e8000|	gnu	maer %f8,%f0,%f0  
+ed02100b803c|	gnu	mayh %f8,%f0,11(%r2,%r1)
+    b33c8000|	gnu	mayhr %f8,%f0,%f0 
+ed02100b8038|	gnu	mayl %f8,%f0,11(%r2,%r1)
+    b3388000|	gnu	maylr %f8,%f0,%f0 
+ed02100b801f|	gnu	msdb %f8,%f0,11(%r2,%r1)
+    b31f8000|	gnu	msdbr %f8,%f0,%f0 
+ed02100b803f|	gnu	msd %f8,%f0,11(%r2,%r1)
+    b33f8000|	gnu	msdr %f8,%f0,%f0  
+ed02100b800f|	gnu	mseb %f8,%f0,11(%r2,%r1)
+    b30f8000|	gnu	msebr %f8,%f0,%f0 
+ed02100b802f|	gnu	mse %f8,%f0,11(%r2,%r1)
+    b32f8000|	gnu	mser %f8,%f0,%f0  
+fc332006100b|	gnu	mp 6(4,%r2),11(4,%r1)
+    4c82100b|	gnu	mh %r8,11(%r2,%r1)
+e382100b007c|	gnu	mhy %r8,11(%r2,%r1)
+e382100b003c|	gnu	mgh %r8,11(%r2,%r1)
+    a78c0008|	gnu	mhi %r8,8         
+    a78d0008|	gnu	mghi %r8,8        
+e382100b0086|	gnu	mlg %r8,11(%r2,%r1)
+    b9860080|	gnu	mlgr %r8,%r0      
+e382100b0096|	gnu	ml %r8,11(%r2,%r1)
+    b9960080|	gnu	mlr %r8,%r0       
+    7182100b|	gnu	ms %r8,11(%r2,%r1)
+e382100b0053|	gnu	msc %r8,11(%r2,%r1)
+    b2520080|	gnu	msr %r8,%r0       
+    b9fd0080|	gnu	msrkc %r8,%r0,%r0 
+e382100b0051|	gnu	msy %r8,11(%r2,%r1)
+e382100b000c|	gnu	msg %r8,11(%r2,%r1)
+e382100b0083|	gnu	msgc %r8,11(%r2,%r1)
+    b90c0080|	gnu	msgr %r8,%r0      
+    b9ed0080|	gnu	msgrkc %r8,%r0,%r0
+e382100b001c|	gnu	msgf %r8,11(%r2,%r1)
+    b91c0080|	gnu	msgfr %r8,%r0     
+c28100000008|	gnu	msfi %r8,8        
+c28000000008|	gnu	msgfi %r8,8       
+ed02100b803d|	gnu	myh %f8,%f0,11(%r2,%r1)
+    b33d8000|	gnu	myhr %f8,%f0,%f0  
+ed02100b8039|	gnu	myl %f8,%f0,11(%r2,%r1)
+    b3398000|	gnu	mylr %f8,%f0,%f0  
+ed02100b803b|	gnu	my %f8,%f0,11(%r2,%r1)
+    b33b8000|	gnu	myr %f8,%f0,%f0   
+    b9740080|	gnu	nnrk %r8,%r0,%r0  
+    b9640080|	gnu	nngrk %r8,%r0,%r0 
+    b93b0000|	gnu	nnpa              
+    b2fa00c8|	gnu	niai 12,8         
+e382100b0025|	gnu	ntstg %r8,11(%r2,%r1)
+    b9760080|	gnu	nork %r8,%r0,%r0  
+    b9660080|	gnu	nogrk %r8,%r0,%r0 
+    b9770080|	gnu	nxrk %r8,%r0,%r0  
+    b9670080|	gnu	nxgrk %r8,%r0,%r0 
+    5682100b|	gnu	o %r8,11(%r2,%r1) 
+        1680|	gnu	or %r8,%r0        
+    b9f60080|	gnu	ork %r8,%r0,%r0   
+e382100b0056|	gnu	oy %r8,11(%r2,%r1)
+e382100b0081|	gnu	og %r8,11(%r2,%r1)
+    b9810080|	gnu	ogr %r8,%r0       
+    b9e60080|	gnu	ogrk %r8,%r0,%r0  
+d6032006100b|	gnu	oc 6(4,%r2),11(%r1)
+    96082006|	gnu	oi 6(%r2),8       
+eb0820060056|	gnu	oiy 6(%r2),8      
+    a5880008|	gnu	oihh %r8,8        
+    a5890008|	gnu	oihl %r8,8        
+c08c00000008|	gnu	oihf %r8,8        
+    a58a0008|	gnu	oilh %r8,8        
+    a58b0008|	gnu	oill %r8,8        
+c08d00000008|	gnu	oilf %r8,8        
+    b9750080|	gnu	ocrk %r8,%r0,%r0  
+    b9650080|	gnu	ocgrk %r8,%r0,%r0 
+f2332006100b|	gnu	pack 6(4,%r2),11(4,%r1)
+e9032006100b|	gnu	pka 6(%r2),11(4,%r1)
+e1032006100b|	gnu	pku 6(%r2),11(4,%r1)
+    b22e0080|	gnu	pgin %r8,%r0      
+    b22f0080|	gnu	pgout %r8,%r0     
+    b92c0000|	gnu	pcc               
+    b9280000|	gnu	pckmo             
+        010a|	gnu	pfpo              
+    b9af0080|	gnu	pfmf %r8,%r0      
+ee80100b80cd|	gnu	plo %r8,11(%r1),%r0,205(%r8)
+    b2e83080|	gnu	ppa %r8,%r0,3     
+    b93c0080|	gnu	prno %r8,%r0      
+    b93c0080|	gnu	prno %r8,%r0      
+        0104|	gnu	ptff              
+    b9a20080|	gnu	ptf %r8           
+    b9e13080|	gnu	popcnt %r8,%r0,3  
+e372100b0036|	gnu	pfd 7,11(%r2,%r1) 
+c67200000000|	gnu	pfdrl 7,0xe68     
+    b218100b|	gnu	pc 11(%r1)        
+        0101|	gnu	pr                
+    b2280080|	gnu	pt %r8,%r0        
+    b99e0080|	gnu	pti %r8,%r0       
+    b2480000|	gnu	palb              
+    b20d0000|	gnu	ptlb              
+    b3fd0180|	gnu	qaxtr %f8,%f0,%f0,1
+    b3f50180|	gnu	qadtr %f8,%f0,%f0,1
+    b28f100b|	gnu	qpaci 11(%r1)     
+    b3ff0180|	gnu	rrxtr %f8,%f0,%r0,1
+    b3f70180|	gnu	rrdtr %f8,%f0,%r0,1
+    b23b0000|	gnu	rchp              
+    b98b0180|	gnu	rdp %r8,%r0,%r0,1 
+    b22a0080|	gnu	rrbe %r8,%r0      
+    b9ae0080|	gnu	rrbm %r8,%r0      
+    b277100b|	gnu	rp 11(%r1)        
+    b2380000|	gnu	rsch              
+eb80100b001d|	gnu	rll %r8,%r0,11(%r1)
+eb80100b001c|	gnu	rllg %r8,%r0,11(%r1)
+ec8009691254|	gnu	rnsbg %r8,%r0,9,105,18
+ec8009691257|	gnu	rxsbg %r8,%r0,9,105,18
+ec8009691255|	gnu	risbg %r8,%r0,9,105,18
+ec8009691259|	gnu	risbgn %r8,%r0,9,105,18
+ec800969125d|	gnu	risbhg %r8,%r0,9,105,18
+ec8009691251|	gnu	risblg %r8,%r0,9,105,18
+ec8009691256|	gnu	rosbg %r8,%r0,9,105,18
+    b25e0080|	gnu	srst %r8,%r0      
+    b9be0080|	gnu	srstu %r8,%r0     
+    b9f00180|	gnu	selro %r8,%r0,%r0 
+    b9e30180|	gnu	selgro %r8,%r0,%r0
+    b9c00180|	gnu	selfhro %r8,%r0,%r0
+    b24e0080|	gnu	sar %a8,%r0       
+    b2370000|	gnu	sal               
+    b219100b|	gnu	sac 11(%r1)       
+    b279100b|	gnu	sacf 11(%r1)      
+        010c|	gnu	sam24             
+        010d|	gnu	sam31             
+        010e|	gnu	sam64             
+    b299100b|	gnu	srnm 11(%r1)      
+    b2b8100b|	gnu	srnmb 11(%r1)     
+    b23c0000|	gnu	schm              
+    b204100b|	gnu	sck 11(%r1)       
+    b206100b|	gnu	sckc 11(%r1)      
+        0107|	gnu	sckpf             
+    b208100b|	gnu	spt 11(%r1)       
+    b2b9100b|	gnu	srnmt 11(%r1)     
+    b3840080|	gnu	sfpc %r8          
+    b3850080|	gnu	sfasr %r8         
+    b210100b|	gnu	spx 11(%r1)       
+        0480|	gnu	spm %r8           
+    b20a100b|	gnu	spka 11(%r1)      
+    b2250080|	gnu	ssar %r8          
+    b99f0080|	gnu	ssair %r8         
+    b22b3080|	gnu	sske %r8,%r0,3    
+    80002006|	gnu	ssm 6(%r2)        
+f0392006100b|	gnu	srp 6(4,%r2),11(%r1),9
+    8f80100b|	gnu	slda %r8,11(%r1)  
+    8d80100b|	gnu	sldl %r8,11(%r1)  
+    8b80100b|	gnu	sla %r8,11(%r1)   
+eb80100b00dd|	gnu	slak %r8,%r0,11(%r1)
+eb80100b000b|	gnu	slag %r8,%r0,11(%r1)
+    8980100b|	gnu	sll %r8,11(%r1)   
+eb80100b00df|	gnu	sllk %r8,%r0,11(%r1)
+eb80100b000d|	gnu	sllg %r8,%r0,11(%r1)
+    8e80100b|	gnu	srda %r8,11(%r1)  
+    8c80100b|	gnu	srdl %r8,11(%r1)  
+    8a80100b|	gnu	sra %r8,11(%r1)   
+eb80100b00dc|	gnu	srak %r8,%r0,11(%r1)
+eb80100b000a|	gnu	srag %r8,%r0,11(%r1)
+    8880100b|	gnu	srl %r8,11(%r1)   
+eb80100b00de|	gnu	srlk %r8,%r0,11(%r1)
+eb80100b000c|	gnu	srlg %r8,%r0,11(%r1)
+ed02100b8048|	gnu	slxt %f8,%f0,11(%r2,%r1)
+ed02100b8040|	gnu	sldt %f8,%f0,11(%r2,%r1)
+ed02100b8049|	gnu	srxt %f8,%f0,11(%r2,%r1)
+ed02100b8041|	gnu	srdt %f8,%f0,11(%r2,%r1)
+    ae80100b|	gnu	sigp %r8,%r0,11(%r1)
+    b9380080|	gnu	sortl %r8,%r0     
+    b3160080|	gnu	sqxbr %f8,%f0     
+    b3360080|	gnu	sqxr %f8,%f0      
+ed82100b0015|	gnu	sqdb %f8,11(%r2,%r1)
+    b3150080|	gnu	sqdbr %f8,%f0     
+ed82100b0035|	gnu	sqd %f8,11(%r2,%r1)
+    b2440080|	gnu	sqdr %f8,%f0      
+ed82100b0014|	gnu	sqeb %f8,11(%r2,%r1)
+    b3140080|	gnu	sqebr %f8,%f0     
+ed82100b0034|	gnu	sqe %f8,11(%r2,%r1)
+    b2450080|	gnu	sqer %f8,%f0      
+    b233100b|	gnu	ssch 11(%r1)      
+    5082100b|	gnu	st %r8,11(%r2,%r1)
+e382100b0050|	gnu	sty %r8,11(%r2,%r1)
+e382100b0024|	gnu	stg %r8,11(%r2,%r1)
+    6082100b|	gnu	std %f8,11(%r2,%r1)
+ed82100b0067|	gnu	stdy %f8,11(%r2,%r1)
+    7082100b|	gnu	ste %f8,11(%r2,%r1)
+ed82100b0066|	gnu	stey %f8,11(%r2,%r1)
+    9b80100b|	gnu	stam %a8,%a0,11(%r1)
+eb80100b009b|	gnu	stamy %a8,%a0,11(%r1)
+    b201100b|	gnu	stbear 11(%r1)    
+    b23a100b|	gnu	stcps 11(%r1)     
+    b239100b|	gnu	stcrw 11(%r1)     
+    4282100b|	gnu	stc %r8,11(%r2,%r1)
+e382100b0072|	gnu	stcy %r8,11(%r2,%r1)
+e382100b00c3|	gnu	stch %r8,11(%r2,%r1)
+eb83100b002c|	gnu	stcmh %r8,3,11(%r1)
+    be83100b|	gnu	stcm %r8,3,11(%r1)
+eb83100b002d|	gnu	stcmy %r8,3,11(%r1)
+    b205100b|	gnu	stck 11(%r1)      
+    b207100b|	gnu	stckc 11(%r1)     
+    b278100b|	gnu	stcke 11(%r1)     
+    b27c100b|	gnu	stckf 11(%r1)     
+    b680100b|	gnu	stctl %c8,%c0,11(%r1)
+eb80100b0025|	gnu	stctg %c8,%c0,11(%r1)
+    b212100b|	gnu	stap 11(%r1)      
+    b202100b|	gnu	stidp 11(%r1)     
+    b209100b|	gnu	stpt 11(%r1)      
+    b2b1100b|	gnu	stfl 11(%r1)      
+    b2b0100b|	gnu	stfle 11(%r1)     
+    b29c100b|	gnu	stfpc 11(%r1)     
+e382100b0049|	gnu	stgsc %r8,11(%r2,%r1)
+    4082100b|	gnu	sth %r8,11(%r2,%r1)
+e382100b0070|	gnu	sthy %r8,11(%r2,%r1)
+e382100b00c7|	gnu	sthh %r8,11(%r2,%r1)
+c48700000000|	gnu	sthrl %r8,0x109c  
+e382100b00cb|	gnu	stfh %r8,11(%r2,%r1)
+eb83100b00e1|	gnu	stocfhnle %r8,11(%r1)
+    9080100b|	gnu	stm %r8,%r0,11(%r1)
+eb80100b0090|	gnu	stmy %r8,%r0,11(%r1)
+eb80100b0024|	gnu	stmg %r8,%r0,11(%r1)
+eb80100b0026|	gnu	stmh %r8,%r0,11(%r1)
+eb83100b00f3|	gnu	stocnle %r8,11(%r1)
+eb83100b00e3|	gnu	stocgnle %r8,11(%r1)
+e382100b008e|	gnu	stpq %r8,11(%r2,%r1)
+    b211100b|	gnu	stpx 11(%r1)      
+e5022006100b|	gnu	strag 6(%r2),11(%r1)
+c48f00000000|	gnu	strl %r8,0x10e0   
+c48b00000000|	gnu	stgrl %r8,0x10e6  
+e382100b003f|	gnu	strvh %r8,11(%r2,%r1)
+e382100b003e|	gnu	strv %r8,11(%r2,%r1)
+e382100b002f|	gnu	strvg %r8,11(%r2,%r1)
+    b234100b|	gnu	stsch 11(%r1)     
+    b27d100b|	gnu	stsi 11(%r1)      
+    ac082006|	gnu	stnsm 6(%r2),8    
+    ad082006|	gnu	stosm 6(%r2),8    
+    b2460080|	gnu	stura %r8,%r0     
+    b9250080|	gnu	sturg %r8,%r0     
+    5b82100b|	gnu	s %r8,11(%r2,%r1) 
+        1b80|	gnu	sr %r8,%r0        
+    b9f90080|	gnu	srk %r8,%r0,%r0   
+e382100b005b|	gnu	sy %r8,11(%r2,%r1)
+e382100b0009|	gnu	sg %r8,11(%r2,%r1)
+    b9090080|	gnu	sgr %r8,%r0       
+    b9e90080|	gnu	sgrk %r8,%r0,%r0  
+e382100b0019|	gnu	sgf %r8,11(%r2,%r1)
+    b9190080|	gnu	sgfr %r8,%r0      
+    b34b0080|	gnu	sxbr %f8,%f0      
+    b3db0080|	gnu	sxtr %f8,%f0,%f0  
+    b3db0180|	gnu	sxtra %f8,%f0,%f0,1
+ed82100b001b|	gnu	sdb %f8,11(%r2,%r1)
+    b31b0080|	gnu	sdbr %f8,%f0      
+    b3d30080|	gnu	sdtr %f8,%f0,%f0  
+    b3d30180|	gnu	sdtra %f8,%f0,%f0,1
+ed82100b000b|	gnu	seb %f8,11(%r2,%r1)
+    b30b0080|	gnu	sebr %f8,%f0      
+fb332006100b|	gnu	sp 6(4,%r2),11(4,%r1)
+    4b82100b|	gnu	sh %r8,11(%r2,%r1)
+e382100b007b|	gnu	shy %r8,11(%r2,%r1)
+e382100b0039|	gnu	sgh %r8,11(%r2,%r1)
+    b9c90080|	gnu	shhhr %r8,%r0,%r0 
+    b9d90080|	gnu	shhlr %r8,%r0,%r0 
+    5f82100b|	gnu	sl %r8,11(%r2,%r1)
+        1f80|	gnu	slr %r8,%r0       
+    b9fb0080|	gnu	slrk %r8,%r0,%r0  
+e382100b005f|	gnu	sly %r8,11(%r2,%r1)
+e382100b000b|	gnu	slg %r8,11(%r2,%r1)
+    b90b0080|	gnu	slgr %r8,%r0      
+    b9eb0080|	gnu	slgrk %r8,%r0,%r0 
+e382100b001b|	gnu	slgf %r8,11(%r2,%r1)
+    b91b0080|	gnu	slgfr %r8,%r0     
+    b9cb0080|	gnu	slhhhr %r8,%r0,%r0
+    b9db0080|	gnu	slhhlr %r8,%r0,%r0
+c28500000008|	gnu	slfi %r8,8        
+c28400000008|	gnu	slgfi %r8,8       
+e382100b0099|	gnu	slb %r8,11(%r2,%r1)
+    b9990080|	gnu	slbr %r8,%r0      
+e382100b0089|	gnu	slbg %r8,11(%r2,%r1)
+    b9890080|	gnu	slbgr %r8,%r0     
+        3780|	gnu	sxr %f8,%f0       
+    6b82100b|	gnu	sd %f8,11(%r2,%r1)
+        2b80|	gnu	sdr %f8,%f0       
+    7b82100b|	gnu	se %f8,11(%r2,%r1)
+        3b80|	gnu	ser %f8,%f0       
+    6f82100b|	gnu	sw %f8,11(%r2,%r1)
+        2f80|	gnu	swr %f8,%f0       
+    7f82100b|	gnu	su %f8,11(%r2,%r1)
+        3f80|	gnu	sur %f8,%f0
+        0a7c|	gnu	svc 124
+    b24c0080|	gnu	tar %a8,%r0
+        010b|	gnu	tam
+    93002006|	gnu	ts 6(%r2)
+    b22c0080|	gnu	tb %r8,%r0
+ed82100b0012|	gnu	tcxb %f8,11(%r2,%r1)
+ed82100b0058|	gnu	tdcxt %f8,11(%r2,%r1)
+ed82100b0011|	gnu	tcdb %f8,11(%r2,%r1)
+ed82100b0054|	gnu	tdcdt %f8,11(%r2,%r1)
+ed82100b0010|	gnu	tceb %f8,11(%r2,%r1)
+ed82100b0050|	gnu	tdcet %f8,11(%r2,%r1)
+ed82100b0059|	gnu	tdgxt %f8,11(%r2,%r1)
+ed82100b0055|	gnu	tdgdt %f8,11(%r2,%r1)
+ed82100b0051|	gnu	tdget %f8,11(%r2,%r1)
+eb30200600c0|	gnu	tp 6(4,%r2)
+    b9a10080|	gnu	tpei %r8,%r0
+    b236100b|	gnu	tpi 11(%r1)
+e5012006100b|	gnu	tprot 6(%r2),11(%r1)
+    b235100b|	gnu	tsch 11(%r1)
+    91082006|	gnu	tm 6(%r2),8
+eb0820060051|	gnu	tmy 6(%r2),8
+    a7820008|	gnu	tmhh %r8,8
+    a7830008|	gnu	tmhl %r8,8
+    a7800008|	gnu	tmlh %r8,8
+    a7810008|	gnu	tmll %r8,8
+    a7800008|	gnu	tmlh %r8,8
+    a7810008|	gnu	tmll %r8,8
+    9980100b|	gnu	trace %r8,%r0,11(%r1)
+eb80100b000f|	gnu	tracg %r8,%r0,11(%r1)
+    b2fc100b|	gnu	tabort 11(%r1)
+e56120060008|	gnu	tbeginc 6(%r2),8
+e56020060008|	gnu	tbegin 6(%r2),8
+    b2f80000|	gnu	tend
+dc032006100b|	gnu	tr 6(4,%r2),11(%r1)
+dd032006100b|	gnu	trt 6(4,%r2),11(%r1)
+    b9bf3080|	gnu	trte %r8,%r0,3
+d0032006100b|	gnu	trtr 6(4,%r2),11(%r1)
+    b9bd3080|	gnu	trtre %r8,%r0,3
+    b2a50080|	gnu	tre %r8,%r0    
+    b9933080|	gnu	troo %r8,%r0,3
+    b9923080|	gnu	trot %r8,%r0,3
+    b9913080|	gnu	trto %r8,%r0,3
+    b9903080|	gnu	trtt %r8,%r0,3
+        01ff|	gnu	trap2
+    b2ff100b|	gnu	trap4 11(%r1)
+f3332006100b|	gnu	unpk 6(4,%r2),11(4,%r1)
+ea032006100b|	gnu	unpka 6(4,%r2),11(%r1)
+e2032006100b|	gnu	unpku 6(4,%r2),11(%r1)
+        0102|	gnu	upt
+e723500018f3|	gnu	vah %v18,%v3,%v5
+e723500018f1|	gnu	vacch %v18,%v3,%v5
+e62350969871|	gnu	vap %v18,%v3,%v5,105,9
+e723590088bb|	gnu	vac %v18,%v3,%v5,%v8,9
+e723590088b9|	gnu	vaccc %v18,%v3,%v5,%v8,9
+e72350000868|	gnu	vn %v18,%v3,%v5
+e72350000869|	gnu	vnc %v18,%v3,%v5
+e723500018f2|	gnu	vavgh %v18,%v3,%v5
+e723500018f0|	gnu	vavglh %v18,%v3,%v5
+e72350000885|	gnu	vbperm %v18,%v3,%v5
+e72350000866|	gnu	vcksm %v18,%v3,%v5
+e60230300477|	gnu	vcp %v18,%v3,3
+e723509018f8|	gnu	vceq %v18,%v3,%v5,1,9
+e723509018fb|	gnu	vch %v18,%v3,%v5,1,9
+e723509018f9|	gnu	vchl %v18,%v3,%v5,1,9
+e6235010087d|	gnu	vcsph %v18,%v3,%v5,1
+e68300310050|	gnu	vcvb %r8,%v3,3,1
+e68300310052|	gnu	vcvbg %r8,%v3,3,1
+e62000109858|	gnu	vcvd %v18,%r0,9,1
+e6200010985a|	gnu	vcvdg %v18,%r0,9,1
+e62300300851|	gnu	vclzdp %v18,%v3,3
+e72300003853|	gnu	vclzg %v18,%v3
+e72300003852|	gnu	vctzg %v18,%v3
+e6235096987a|	gnu	vdp %v18,%v3,%v5,105,9
+e723000038db|	gnu	vecg %v18,%v3
+e723000038d9|	gnu	veclg %v18,%v3
+e72350699872|	gnu	verim %v18,%v3,%v5,105,9
+e725100b1833|	gnu	verllh %v18,%v5,11(%r1)
+e72350001873|	gnu	verllvh %v18,%v3,%v5
+e72350001870|	gnu	veslvh %v18,%v3,%v5
+e725100b1830|	gnu	veslh %v18,%v5,11(%r1)
+e725100b183a|	gnu	vesrah %v18,%v5,11(%r1)
+e7235000187a|	gnu	vesravh %v18,%v3,%v5
+e725100b1838|	gnu	vesrlh %v18,%v5,11(%r1)
+e72350001878|	gnu	vesrlvh %v18,%v3,%v5
+e7235000086d|	gnu	vx %v18,%v3,%v5
+e72350901882|	gnu	vfaehs %v18,%v3,%v5,8
+e72350901880|	gnu	vfeeh %v18,%v3,%v5,9
+e72350901881|	gnu	vfeneh %v18,%v3,%v5,9
+e723500918e3|	gnu	vfa %v18,%v3,%v5,1,9
+e723000138ca|	gnu	wfk %v18,%v3,3,1
+e72350b918e8|	gnu	vfce %v18,%v3,%v5,1,9,11
+e72350b918eb|	gnu	vfch %v18,%v3,%v5,1,9,11
+e72350b918ea|	gnu	vfche %v18,%v3,%v5,1,9,11
+e723000138cb|	gnu	wfc %v18,%v3,3,1
+e62300013856|	gnu	vclfnh %v18,%v3,3,1
+e6230001385e|	gnu	vclfnl %v18,%v3,3,1
+e62350091875|	gnu	vcrnf %v18,%v3,%v5,1,9
+e723009138c3|	gnu	vcdgb %v18,%v3,1,9
+e723009138c3|	gnu	vcdgb %v18,%v3,1,9
+e723009138c1|	gnu	vcdlgb %v18,%v3,1,9
+e723009138c1|	gnu	vcdlgb %v18,%v3,1,9
+e6230001385d|	gnu	vcfn %v18,%v3,3,1
+e723009138c2|	gnu	vcgdb %v18,%v3,1,9
+e723009138c2|	gnu	vcgdb %v18,%v3,1,9
+e723009138c0|	gnu	vclgdb %v18,%v3,1,9
+e723009138c0|	gnu	vclgdb %v18,%v3,1,9
+e62300013855|	gnu	vcnf %v18,%v3,3,1
+e723500918e5|	gnu	vfd %v18,%v3,%v5,1,9
+e723000138c4|	gnu	vfll %v18,%v3,3,1
+e723009138c5|	gnu	vflrd %v18,%v3,1,9
+e72350b918ef|	gnu	vfmax %v18,%v3,%v5,1,9,11
+e72350b918ee|	gnu	vfmin %v18,%v3,%v5,1,9,11
+e723500918e7|	gnu	vfm %v18,%v3,%v5,1,9
+e7235b09888f|	gnu	vfma %v18,%v3,%v5,%v8,9,11
+e7235b09888e|	gnu	vfms %v18,%v3,%v5,%v8,9,11
+e7235b09889f|	gnu	vfnma %v18,%v3,%v5,%v8,9,11
+e7235b09889e|	gnu	vfnms %v18,%v3,%v5,%v8,9,11
+e723009138cc|	gnu	vfpso %v18,%v3,3,1,9
+e723000138ce|	gnu	vfsq %v18,%v3,3,1
+e723500918e2|	gnu	vfs %v18,%v3,%v5,1,9
+e7230099184a|	gnu	vftci %v18,%v3,9,1,9
+e723500018b4|	gnu	vgfmh %v18,%v3,%v5
+e723590088bc|	gnu	vgfma %v18,%v3,%v5,%v8,9
+e723100b3813|	gnu	vgef %v18,11(%v3,%r1),3
+e723100b3812|	gnu	vgeg %v18,11(%v3,%r1),3
+e72000080844|	gnu	vgbm %v18,8
+e72008091846|	gnu	vgmh %v18,8,9
+e7230090385c|	gnu	vistr %v18,%v3,3,9
+e722100b3806|	gnu	vl %v18,11(%r2,%r1),3
+e72300000856|	gnu	vlr %v18,%v3
+e722100b3805|	gnu	vlrepg %v18,11(%r2,%r1)
+e622100b3801|	gnu	vlebrh %v18,11(%r2,%r1),3
+e622100b3803|	gnu	vlebrf %v18,11(%r2,%r1),3
+e622100b3802|	gnu	vlebrg %v18,11(%r2,%r1),3
+e622100b3805|	gnu	vlbrrepg %v18,11(%r2,%r1)
+e622100b3804|	gnu	ldrv %v18,11(%r2,%r1)
+e622100b3806|	gnu	vlbrg %v18,11(%r2,%r1)
+e723000038de|	gnu	vlcg %v18,%v3
+e722100b3801|	gnu	vleh %v18,11(%r2,%r1),3
+e722100b3803|	gnu	vlef %v18,11(%r2,%r1),3
+e722100b3802|	gnu	vleg %v18,11(%r2,%r1),3
+e722100b3800|	gnu	vleb %v18,11(%r2,%r1),3
+e72000083841|	gnu	vleih %v18,8,3
+e72000083843|	gnu	vleif %v18,8,3
+e72000083842|	gnu	vleig %v18,8,3
+e72000083840|	gnu	vleib %v18,8,3
+e622100b3807|	gnu	vlerg %v18,11(%r2,%r1)
+e723009138c7|	gnu	vfidb %v18,%v3,1,9
+e785100b1021|	gnu	vlgvh %r8,%v5,11(%r1)
+e62000089849|	gnu	vlip %v18,8,9
+e722100b3804|	gnu	vllezg %v18,11(%r2,%r1)
+e725100b1836|	gnu	vlm %v18,%v5,11(%r1),1
+e723000038df|	gnu	vlpg %v18,%v3
+e609100b2135|	gnu	vlrl %v18,11(%r1),9
+e600100b2137|	gnu	vlrlr %v18,%r0,11(%r1)
+e722100b3807|	gnu	vlbb %v18,11(%r2,%r1),3
+e720100b1822|	gnu	vlvgh %v18,%r0,11(%r1)
+e72000000862|	gnu	vlvgp %v18,%r0,%r0
+e720100b0837|	gnu	vll %v18,%r0,11(%r1)
+e723500018ff|	gnu	vmxh %v18,%v3,%v5
+e723500018fd|	gnu	vmxlh %v18,%v3,%v5
+e72350001861|	gnu	vmrhh %v18,%v3,%v5
+e72350001860|	gnu	vmrlh %v18,%v3,%v5
+e723500018fe|	gnu	vmnh %v18,%v3,%v5
+e723500018fc|	gnu	vmnlh %v18,%v3,%v5
+e723590088ae|	gnu	vmae %v18,%v3,%v5,%v8,9
+e723590088ab|	gnu	vmah %v18,%v3,%v5,%v8,9
+e723590088ac|	gnu	vmale %v18,%v3,%v5,%v8,9
+e723590088a9|	gnu	vmalh %v18,%v3,%v5,%v8,9
+e723590088ad|	gnu	vmalo %v18,%v3,%v5,%v8,9
+e723590088aa|	gnu	vmal %v18,%v3,%v5,%v8,9
+e723590088af|	gnu	vmao %v18,%v3,%v5,%v8,9
+e62350969879|	gnu	vmsp %v18,%v3,%v5,105,9
+e62350969878|	gnu	vmp %v18,%v3,%v5,105,9
+e723500018a6|	gnu	vmeh %v18,%v3,%v5
+e723500018a3|	gnu	vmhh %v18,%v3,%v5
+e723500018a4|	gnu	vmleh %v18,%v3,%v5
+e723500018a1|	gnu	vmlhh %v18,%v3,%v5
+e723500018a5|	gnu	vmloh %v18,%v3,%v5
+e723500018a2|	gnu	vmlhw %v18,%v3,%v5
+e723500018a7|	gnu	vmoh %v18,%v3,%v5
+e72359b088b8|	gnu	vmsl %v18,%v3,%v5,%v8,9,11
+e7235000086e|	gnu	vnn %v18,%v3,%v5
+e7235000086b|	gnu	vno %v18,%v3,%v5
+e7235000086c|	gnu	vnx %v18,%v3,%v5
+e7235000086a|	gnu	vo %v18,%v3,%v5
+e7235000086f|	gnu	voc %v18,%v3,%v5
+e72350001894|	gnu	vpkh %v18,%v3,%v5
+e72350901895|	gnu	vpkls %v18,%v3,%v5,1,9
+e72350901897|	gnu	vpks %v18,%v3,%v5,1,9
+e609100b2134|	gnu	vpkz %v18,11(%r1),9
+e62350969870|	gnu	vpkzr %v18,%v3,%v5,105,9
+e6236990985b|	gnu	vpsop %v18,%v3,9,105,9
+e7235000888c|	gnu	vperm %v18,%v3,%v5,%v8
+e72350001884|	gnu	vpdi %v18,%v3,%v5,1
+e72300003850|	gnu	vpopctg %v18,%v3
+e6235096987b|	gnu	vrp %v18,%v3,%v5,105,9
+e7250008184d|	gnu	vreph %v18,%v5,8
+e72000083845|	gnu	vrepig %v18,8
+e723100b381b|	gnu	vscef %v18,11(%v3,%r1),3
+e723100b381a|	gnu	vsceg %v18,11(%v3,%r1),3
+e7235000888d|	gnu	vsel %v18,%v3,%v5,%v8
+e6235096987e|	gnu	vsdp %v18,%v3,%v5,105,9
+e62369909859|	gnu	vsrp %v18,%v3,9,105,9
+e62350969872|	gnu	vsrpr %v18,%v3,%v5,105,9
+e72350000874|	gnu	vsl %v18,%v3,%v5
+e72350000875|	gnu	vslb %v18,%v3,%v5
+e72350690886|	gnu	vsld %v18,%v3,%v5,105
+e72350690877|	gnu	vsldb %v18,%v3,%v5,105
+e7235000087e|	gnu	vsra %v18,%v3,%v5
+e7235000087f|	gnu	vsrab %v18,%v3,%v5
+e72350690887|	gnu	vsrd %v18,%v3,%v5,105
+e7235000087c|	gnu	vsrl %v18,%v3,%v5
+e7235000087d|	gnu	vsrlb %v18,%v3,%v5
+e7230000385f|	gnu	vseg %v18,%v3,3
+e722100b380e|	gnu	vst %v18,11(%r2,%r1),3
+e622100b3809|	gnu	vstebrh %v18,11(%r2,%r1),3
+e622100b380b|	gnu	vstebrf %v18,11(%r2,%r1),3
+e622100b380a|	gnu	vstebrg %v18,11(%r2,%r1),3
+e622100b380e|	gnu	vstbrg %v18,11(%r2,%r1)
+e722100b3809|	gnu	vsteh %v18,11(%r2,%r1),3
+e722100b380b|	gnu	vstef %v18,11(%r2,%r1),3
+e722100b380a|	gnu	vsteg %v18,11(%r2,%r1),3
+e722100b3808|	gnu	vsteb %v18,11(%r2,%r1),3
+e622100b380f|	gnu	vsterg %v18,11(%r2,%r1)
+e725100b183e|	gnu	vstm %v18,%v5,11(%r1),1
+e609100b213d|	gnu	vstrl %v18,11(%r1),9
+e600100b213f|	gnu	vstrlr %v18,%r0,11(%r1)
+e720100b083f|	gnu	vstl %v18,%r0,11(%r1)
+e72359b0888a|	gnu	vstrc %v18,%v3,%v5,%v8,9,11
+e72359b0888b|	gnu	vstrs %v18,%v3,%v5,%v8,9,11
+e723500018f7|	gnu	vsh %v18,%v3,%v5
+e723500018f5|	gnu	vscbih %v18,%v3,%v5
+e62350969873|	gnu	vsp %v18,%v3,%v5,105,9
+e723590088bd|	gnu	vsbcbi %v18,%v3,%v5,%v8,9
+e723590088bf|	gnu	vsbi %v18,%v3,%v5,%v8,9
+e72350001865|	gnu	vsumgh %v18,%v3,%v5
+e72350001867|	gnu	vsumq %v18,%v3,%v5,1
+e72350001864|	gnu	vsumh %v18,%v3,%v5
+e6020000045f|	gnu	vtp %v18
+e723000008d8|	gnu	vtm %v18,%v3
+e723000038d7|	gnu	vuph %v18,%v3,3
+e723000038d5|	gnu	vuplh %v18,%v3,3
+e723000038d4|	gnu	vupll %v18,%v3,3
+e723000038d6|	gnu	vupl %v18,%v3,3
+e609100b213c|	gnu	vupkz %v18,11(%r1),9
+e62300300854|	gnu	vupkzh %v18,%v3,3
+e6230030085c|	gnu	vupkzl %v18,%v3,3
+f8332006100b|	gnu	zap 6(4,%r2),11(4,%r1)
diff --git a/s390x/s390xmap/map.go b/s390x/s390xmap/map.go
new file mode 100644
index 00000000..9ba698f4
--- /dev/null
+++ b/s390x/s390xmap/map.go
@@ -0,0 +1,636 @@
+// Copyright 2024 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// s390xmap constructs the s390x opcode map from the instruction set CSV file.
+//
+// Usage:
+//
+//	s390map [-fmt=format] s390x.csv
+//
+// The known output formats are:
+//
+//	text (default) - print decoding tree in text form
+//	decoder - print decoding tables for the s390xasm package
+//	encoder - generate a self-contained file which can be used to encode
+//		  go obj.Progs into machine code
+//	asm - generate a GNU asm file which can be compiled by gcc containing
+//	      all opcodes discovered in s390x.csv using macro friendly arguments.
+package main
+
+import (
+	"bytes"
+	"encoding/csv"
+	"flag"
+	"fmt"
+	gofmt "go/format"
+	asm "golang.org/x/arch/s390x/s390xasm"
+	"log"
+	"os"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+var format = flag.String("fmt", "text", "output format: text, decoder, asm")
+var debug = flag.Bool("debug", false, "enable debugging output")
+
+var inputFile string
+
+func usage() {
+	fmt.Fprintf(os.Stderr, "usage: s390xmap [-fmt=format] s390x.csv\n")
+	os.Exit(2)
+}
+
+func main() {
+	log.SetFlags(0)
+	log.SetPrefix("s390xmap: ")
+
+	flag.Usage = usage
+	flag.Parse()
+	if flag.NArg() != 1 {
+		usage()
+	}
+
+	inputFile = flag.Arg(0)
+
+	var printTyp func(*Prog)
+	switch *format {
+	default:
+		log.Fatalf("unknown output format %q", *format)
+	case "text":
+		printTyp = printText
+	case "decoder":
+		printTyp = printDecoder
+	case "asm":
+		printTyp = printASM
+	case "encoder":
+		printTyp = printEncoder
+	}
+
+	p, err := readCSV(flag.Arg(0))
+	if err != nil {
+		log.Fatal(err)
+	}
+	log.Printf("Parsed %d instruction forms.", len(p.Insts))
+	printTyp(p)
+}
+
+// readCSV reads the CSV file and returns the corresponding Prog.
+// It may print details about problems to standard error using the log package.
+func readCSV(file string) (*Prog, error) {
+	// Read input.
+	// Skip leading blank and # comment lines.
+	f, err := os.Open(file)
+	if err != nil {
+		return nil, err
+	}
+	csvReader := csv.NewReader(f)
+	csvReader.Comment = '#'
+	table, err := csvReader.ReadAll()
+	if err != nil {
+		return nil, fmt.Errorf("parsing %s: %v", file, err)
+	}
+	if len(table) == 0 {
+		return nil, fmt.Errorf("empty csv input")
+	}
+	if len(table[0]) < 3 {
+		return nil, fmt.Errorf("csv too narrow: need at least four columns")
+	}
+
+	p := &Prog{}
+	for _, row := range table {
+		add(p, row[0], row[1], row[2], row[3])
+	}
+	return p, nil
+}
+
+type Prog struct {
+	Insts     []Inst
+	OpRanges  map[string]string
+	nextOrder int // Next position value (used for Insts[x].order)
+}
+
+type Field struct {
+	Name     string
+	BitField asm.BitField
+	Type     asm.ArgType
+	flags    uint16
+}
+
+func (f Field) String() string {
+	return fmt.Sprintf("%v(%s%v)", f.Type, f.Name, f.BitField)
+}
+
+type Inst struct {
+	Text     string
+	Encoding string
+	Op       string
+	Mask     uint64
+	Value    uint64
+	DontCare uint64
+	Len      uint16
+	Fields   []Field
+}
+
+func (i Inst) String() string {
+	return fmt.Sprintf("%s (%s) %08x/%08x %v (%s)", i.Op, i.Encoding, i.Value, i.Mask, i.Fields, i.Text)
+}
+
+type Arg struct {
+	Name string
+	Bits int8
+	Offs int8
+}
+
+func (a Arg) String() string {
+	return fmt.Sprintf("%s[%d:%d]", a.Name, a.Offs, a.Offs+a.Bits-1)
+}
+
+func (a Arg) Maximum() int {
+	return 1<<uint8(a.Bits) - 1
+}
+
+func (a Arg) BitMask() uint64 {
+	return uint64(a.Maximum()) << a.Shift()
+}
+
+func (a Arg) Shift() uint8 {
+	return uint8(64 - a.Offs - a.Bits)
+}
+
+type Args []Arg
+
+func (as Args) String() string {
+	ss := make([]string, len(as))
+	for i := range as {
+		ss[i] = as[i].String()
+	}
+	return strings.Join(ss, "|")
+}
+
+func (as Args) Find(name string) int {
+	for i := range as {
+		if as[i].Name == name {
+			return i
+		}
+	}
+	return -1
+}
+
+func (as *Args) Append(a Arg) {
+	*as = append(*as, a)
+}
+
+func (as *Args) Delete(i int) {
+	*as = append((*as)[:i], (*as)[i+1:]...)
+}
+
+func (as Args) Clone() Args {
+	return append(Args{}, as...)
+}
+
+func (a Arg) isDontCare() bool {
+	return a.Name[0] == '/' && a.Name == strings.Repeat("/", len(a.Name))
+}
+
+// Split the string encoding into an Args. The encoding string loosely matches the regex
+// (arg@bitpos|)+
+func parseFields(encoding, text string) Args {
+	var err error
+	var args Args
+
+	fields := strings.Split(encoding, "|")
+
+	for i, f := range fields {
+		name, off := "", -1
+		if f == "" {
+			off = 64
+			if i == 0 || i != len(fields)-1 {
+				fmt.Fprintf(os.Stderr, "%s: wrong %d-th encoding field: %q\n", text, i, f)
+				panic("Invalid encoding entry.")
+			}
+		} else {
+			j := strings.Index(f, "@")
+			if j < 0 {
+				fmt.Fprintf(os.Stderr, "%s: wrong %d-th encoding field: %q\n", text, i, f)
+				panic("Invalid encoding entry.")
+				continue
+			}
+			off, err = strconv.Atoi(f[j+1:])
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "err for: %s has: %s for %s\n", f[:j], err, f[j+1:])
+			}
+			name = f[:j]
+		}
+		if len(args) > 0 {
+			args[len(args)-1].Bits += int8(off)
+		}
+		if name != "" && name != "??" {
+			arg := Arg{Name: name, Offs: int8(off), Bits: int8(-off)}
+			args.Append(arg)
+		}
+	}
+	return args
+}
+
+// Compute the Mask (usually Opcode + secondary Opcode bitfields),
+// the Value (the expected value under the mask), and
+// reserved bits (i.e the // fields which should be set to 0)
+func computeMaskValueReserved(args Args, text string) (mask, value, reserved uint64) {
+	for i := 0; i < len(args); i++ {
+		arg := args[i]
+		v, err := strconv.Atoi(arg.Name)
+		switch {
+		case err == nil && v >= 0: // is a numbered field
+			if v < 0 || v > arg.Maximum() {
+				fmt.Fprintf(os.Stderr, "%s: field %s value (%d) is out of range (%d-bit)\n", text, arg, v, arg.Bits)
+			}
+			mask |= arg.BitMask()
+			value |= uint64(v) << arg.Shift()
+			args.Delete(i)
+			i--
+		case arg.Name[0] == '/': // don't care
+			if arg.Name != strings.Repeat("/", len(arg.Name)) {
+				log.Fatalf("%s: arg %v named like a don't care bit, but it's not", text, arg)
+			}
+			reserved |= arg.BitMask()
+			args.Delete(i)
+			i--
+		default:
+			continue
+		}
+	}
+	// sanity checks
+	if mask&reserved != 0 {
+		log.Fatalf("%s: mask (%08x) and don't care (%08x) collide", text, mask, reserved)
+	}
+	if value&^mask != 0 {
+		log.Fatalf("%s: value (%08x) out of range of mask (%08x)", text, value, mask)
+	}
+	return
+}
+
+func Imm_signed_8bit_check(op string) bool {
+	imm_8 := []string{"ASI", "AGSI", "ALSI", "ALGSI", "CIB", "CGIB", "CIJ", "CGIJ"}
+	var ret bool
+	ret = false
+	for _, str := range imm_8 {
+		if strings.Compare(op, str) == 0 {
+			ret = true
+			break
+		}
+	}
+	return ret
+}
+
+func Imm_signed_16bit_check(op string) bool {
+	imm_16 := []string{"AHI", "AGHI", "ALHSIK", "ALGHSIK", "AHIK", "AGHIK", "LHI", "LGHI", "MVGHI", "CIT", "CGIT", "CGHI", "CGHSI", "CHHSI", "CHI", "CHSI", "CRJ", "CGRJ"}
+	var ret bool
+	ret = false
+	for _, str := range imm_16 {
+		if strings.Compare(op, str) == 0 {
+			ret = true
+			break
+		}
+	}
+	return ret
+}
+
+func Imm_signed_32bit_check(op string) bool {
+	imm_32 := []string{"AFI", "AGFI", "AIH", "CIH", "CFI", "CGFI", "CRL", "STRL", "STGRL"}
+	var ret bool
+	ret = false
+	for _, str := range imm_32 {
+		if strings.Compare(op, str) == 0 {
+			ret = true
+			break
+		}
+	}
+	return ret
+}
+
+func check_flags(flags string) bool {
+	if strings.Contains(flags, "Da") {
+		return true
+	} else if strings.Contains(flags, "Db") {
+		return true
+	} else if strings.Contains(flags, "Dt") {
+		return true
+	} else {
+		return false
+	}
+}
+
+// Parse a row from the CSV describing the instructions, and place the
+// detected instructions into p. One entry may generate multiple intruction
+// entries as each extended mnemonic listed in text is treated like a unique
+// instruction.
+// func add(p *Prog, text, mnemonics, encoding, format string) {
+func add(p *Prog, text, mnemonics, encoding, flags string) {
+	// Parse encoding, building size and offset of each field.
+	// The first field in the encoding is the smallest offset.
+	// And note the MSB is bit 0, not bit 31.
+	// Example: "31@0|RS@6|RA@11|///@16|26@21|Rc@31|"
+	var args Args
+
+	args = parseFields(encoding, text)
+	mask, value, dontCare := computeMaskValueReserved(args, text)
+
+	// split mnemonics into individual instructions
+	// example: "b target_addr (AA=0 LK=0)|ba target_addr (AA=1 LK=0)|bl target_addr (AA=0 LK=1)|bla target_addr (AA=1 LK=1)"
+	inst := Inst{Text: text, Encoding: mnemonics, Value: value, Mask: mask, DontCare: dontCare}
+
+	// order inst.Args according to mnemonics order
+	for i, opr := range operandRe.FindAllString(mnemonics, -1) {
+		if i == 0 { // operation
+			inst.Op = opr
+			continue
+		}
+		field := Field{Name: opr}
+		typ := asm.TypeUnknown
+		flag := uint16(0)
+		switch opr {
+		case "R1", "R2", "R3":
+			s := strings.Split(mnemonics, " ")
+			switch opr {
+			case "R1":
+				switch s[0] {
+				case "CPDT", "CPXT", "CDXT", "CZXT", "CZDT":
+					typ = asm.TypeFPReg
+					flag = 0x2
+				case "CUXTR", "EEXTR", "EEDTR", "EFPC", "ESXTR", "ESDTR", "LGDR", "SFPC", "SFASR":
+					typ = asm.TypeReg
+					flag = 0x1
+				case "CPYA", "LAM", "LAMY", "STAM", "STAMY", "SAR", "TAR":
+					typ = asm.TypeACReg
+					flag = 0x3
+				case "LCTL", "LCTLG", "STCTL", "STCTG":
+					typ = asm.TypeCReg
+					flag = 0x4
+				default:
+					if check_flags(flags) {
+						if strings.Contains(text, "CONVERT TO") {
+							typ = asm.TypeReg
+							flag = 0x1
+						} else {
+							typ = asm.TypeFPReg
+							flag = 0x2
+						}
+					} else {
+						typ = asm.TypeReg
+						flag = 0x1
+					}
+				}
+			case "R2":
+				switch s[0] {
+				case "IEXTR", "IEDTR", "LDGR", "RRXTR", "RRDTR":
+					typ = asm.TypeReg
+					flag = 0x1
+				case "CPYA", "EAR":
+					typ = asm.TypeACReg
+					flag = 0x3
+				default:
+					if check_flags(flags) {
+						if strings.Contains(text, "CONVERT FROM") {
+							typ = asm.TypeReg
+							flag = 0x1
+						} else {
+							typ = asm.TypeFPReg
+							flag = 0x2
+						}
+					} else {
+						typ = asm.TypeReg
+						flag = 0x1
+					}
+				}
+			case "R3":
+				switch s[0] {
+				case "LAM", "LAMY", "STAM", "STAMY":
+					typ = asm.TypeACReg
+					flag = 0x3
+				case "LCTL", "LCTLG", "STCTL", "STCTG":
+					typ = asm.TypeCReg
+					flag = 0x4
+				default:
+					if check_flags(flags) {
+						typ = asm.TypeFPReg
+						flag = 0x2
+					} else {
+						typ = asm.TypeReg
+						flag = 0x1
+					}
+				}
+			}
+
+		case "I", "I1", "I2", "I3", "I4", "I5":
+			flag = 0x0
+			switch opr {
+			case "I", "I1":
+				typ = asm.TypeImmUnsigned
+
+			case "I2":
+				if Imm_signed_8bit_check(inst.Op) {
+					typ = asm.TypeImmSigned8
+					break
+				} else if Imm_signed_16bit_check(inst.Op) { // "ASI", "AGSI", "ALSI", "ALGSI"
+					typ = asm.TypeImmSigned16
+					break
+				} else if Imm_signed_32bit_check(inst.Op) { // "AHI", "AGHI", "AHIK", "AGHIK", "LHI", "LGHI"
+					typ = asm.TypeImmSigned32
+					break
+				} else {
+					typ = asm.TypeImmUnsigned
+					break
+				}
+
+			case "I3", "I4", "I5":
+				typ = asm.TypeImmUnsigned
+
+			}
+
+		case "RI2", "RI3", "RI4":
+			flag = 0x80
+			i := args.Find(opr)
+			count := uint8(args[i].Bits)
+			if count == 12 {
+				typ = asm.TypeRegImSigned12
+				break
+			} else if count == 16 {
+				typ = asm.TypeRegImSigned16
+				break
+			} else if count == 24 {
+				typ = asm.TypeRegImSigned24
+				break
+			} else if count == 32 {
+				typ = asm.TypeRegImSigned32
+				break
+			}
+
+		case "M1", "M3", "M4", "M5", "M6":
+			flag = 0x800
+			typ = asm.TypeMask
+
+		case "B1", "B2", "B3", "B4":
+			typ = asm.TypeBaseReg
+			flag = 0x20 | 0x01
+
+		case "X2":
+			typ = asm.TypeIndexReg
+			flag = 0x40 | 0x01
+
+		case "D1", "D2", "D3", "D4":
+			flag = 0x10
+			i := args.Find(opr)
+			if uint8(args[i].Bits) == 20 {
+				typ = asm.TypeDispSigned20
+				break
+			} else {
+				typ = asm.TypeDispUnsigned
+				break
+			}
+
+		case "L1", "L2":
+			typ = asm.TypeLen
+			flag = 0x10
+		case "V1", "V2", "V3", "V4", "V5", "V6":
+			typ = asm.TypeVecReg
+			flag = 0x08
+		}
+
+		if typ == asm.TypeUnknown {
+			log.Fatalf("%s %s unknown type for opr %s", text, inst, opr)
+		}
+		field.Type = typ
+		field.flags = flag
+		var f1 asm.BitField
+		i := args.Find(opr)
+		if i < 0 {
+			log.Fatalf("%s: couldn't find %s in %s", text, opr, args)
+		}
+		f1.Offs, f1.Bits = uint8(args[i].Offs), uint8(args[i].Bits)
+		field.BitField = f1
+		inst.Fields = append(inst.Fields, field)
+	}
+	if strings.HasPrefix(inst.Op, "V") || strings.Contains(inst.Op, "WFC") || strings.Contains(inst.Op, "WFK") { //Check Vector Instructions
+		Bits := asm.BitField{Offs: 36, Bits: 4}
+		field := Field{Name: "RXB", BitField: Bits, Type: asm.TypeImmUnsigned, flags: 0xC00}
+		inst.Fields = append(inst.Fields, field)
+	}
+	if *debug {
+		fmt.Printf("%v\n", inst)
+	}
+	p.Insts = append(p.Insts, inst)
+}
+
+// operandRe matches each operand (including opcode) in instruction mnemonics
+var operandRe = regexp.MustCompile(`([[:alpha:]][[:alnum:]_]*\.?)`)
+
+// printText implements the -fmt=text mode, which is not implemented (yet?).
+func printText(p *Prog) {
+	log.Fatal("-fmt=text not implemented")
+}
+
+// printEncoder implements the -fmt=encoder mode. which is not implemented (yet?).
+func printEncoder(p *Prog) {
+	log.Fatal("-fmt=encoder not implemented")
+}
+
+func printASM(p *Prog) {
+	fmt.Printf("#include \"hack.h\"\n")
+	fmt.Printf(".text\n")
+	for _, inst := range p.Insts {
+		fmt.Printf("\t%s\n", inst.Encoding)
+	}
+}
+
+// argFieldName constructs a name for the argField
+func argFieldName(f Field) string {
+	ns := []string{"ap", f.Type.String()}
+	b := f.BitField
+	ns = append(ns, fmt.Sprintf("%d_%d", b.Offs, b.Offs+b.Bits-1))
+	return strings.Join(ns, "_")
+}
+
+// printDecoder implements the -fmt=decoder mode.
+// It emits the tables.go for package armasm's decoder.
+func printDecoder(p *Prog) {
+	var buf bytes.Buffer
+
+	fmt.Fprintf(&buf, "// Code generated by s390xmap -fmt=decoder %s DO NOT EDIT.\n", inputFile)
+	fmt.Fprintf(&buf, "\n")
+
+	fmt.Fprintf(&buf, "package s390xasm\n\n")
+
+	// Build list of opcodes, using the csv order (which corresponds to ISA docs order)
+	m := map[string]bool{}
+	fmt.Fprintf(&buf, "const (\n\t_ Op = iota\n")
+	for i := 0; i < len(p.Insts); i++ {
+		name := p.Insts[i].Op
+		switch name {
+		case "CUUTF", "CUTFU", "PPNO":
+			m[name] = false
+			p.Insts = append(p.Insts[:i], p.Insts[i+1:]...)
+			i--
+		default:
+			m[name] = true
+		}
+		if ok := m[name]; !ok {
+			continue
+		}
+		fmt.Fprintf(&buf, "\t%s\n", name)
+	}
+	fmt.Fprint(&buf, ")\n\n\n")
+
+	// Emit slice mapping opcode number to name string.
+	m = map[string]bool{}
+	fmt.Fprintf(&buf, "var opstr = [...]string{\n")
+	for _, inst := range p.Insts {
+		name := inst.Op
+		if ok := m[name]; ok {
+			continue
+		}
+		m[name] = true
+		fmt.Fprintf(&buf, "\t%s: %q,\n", inst.Op, strings.ToLower(inst.Op))
+	}
+	fmt.Fprint(&buf, "}\n\n\n")
+
+	// print out argFields
+	fmt.Fprintf(&buf, "var (\n")
+	m = map[string]bool{}
+	for _, inst := range p.Insts {
+		for _, f := range inst.Fields {
+			name := argFieldName(f)
+			if ok := m[name]; ok {
+				continue
+			}
+			m[name] = true
+			fmt.Fprintf(&buf, "\t%s = &argField{Type: %#v, flags: %#x, BitField: BitField", name, f.Type, f.flags)
+			b := f.BitField
+			fmt.Fprintf(&buf, "{%d, %d }", b.Offs, b.Bits)
+			fmt.Fprintf(&buf, "}\n")
+		}
+	}
+	fmt.Fprint(&buf, ")\n\n\n")
+
+	// Emit decoding table.
+	fmt.Fprintf(&buf, "var instFormats = [...]instFormat{\n")
+	for _, inst := range p.Insts {
+		m, v, dc := inst.Mask, inst.Value, inst.DontCare
+		fmt.Fprintf(&buf, "\t{ %s, %#x, %#x, %#x,", inst.Op, m, v, dc)
+		fmt.Fprintf(&buf, " // %s (%s)\n\t\t[8]*argField{", inst.Text, inst.Encoding)
+		for _, f := range inst.Fields {
+			fmt.Fprintf(&buf, "%s, ", argFieldName(f))
+		}
+		fmt.Fprintf(&buf, "}},\n")
+	}
+	fmt.Fprint(&buf, "}\n\n")
+
+	out, err := gofmt.Source(buf.Bytes())
+	if err != nil {
+		log.Fatalf("gofmt error: %v", err)
+		fmt.Printf("%s", buf.Bytes())
+	} else {
+		fmt.Printf("%s", out)
+	}
+}
diff --git a/s390x/s390xspec/spec.go b/s390x/s390xspec/spec.go
new file mode 100644
index 00000000..cc0ebade
--- /dev/null
+++ b/s390x/s390xspec/spec.go
@@ -0,0 +1,1059 @@
+// Copyright 2024 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// S390xspec reads the Principles of Operation PDF Manual
+// to collect instruction encoding details and writes those details to standard output
+// in CSV format.
+//
+// Usage:
+//
+//	s390xspec z_Architecture_Principles_of_Operation.pdf > s390x.csv
+//
+// Each CSV line contains three fields:
+//
+//	instruction
+//		The instruction heading, such as "BRANCH AND LINK".
+//	mnemonic
+//		The instruction mnemonics, such as "BAL R1,D2(X2,B2)".
+//	encoding
+//		The instruction encoding, a sequence of opcode and operands encoding in respective bit positions
+//		such as operand@bitposition each separated by |
+//		Ex: "45@0|R1@8|X2@12|B2@16|D2@20|"
+//
+// For more on the exact meaning of these fields, see the Principle of Operations IBM-Z Architecture PDF Manual.
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"math"
+	"os"
+	"rsc.io/pdf"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+type Inst struct {
+	Name  string
+	Text  string
+	Enc   string
+	Flags string
+}
+
+var stdout *bufio.Writer
+
+func main() {
+	log.SetFlags(0)
+	log.SetPrefix("s390xspec: ")
+
+	if len(os.Args) != 2 {
+		fmt.Fprintf(os.Stderr, "usage: s390xspec file.pdf\n")
+		os.Exit(2)
+	}
+
+	f, err := pdf.Open(os.Args[1])
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	// Split across multiple columns and pages!
+	var all = []Inst{}
+
+	// Scan document looking for instructions.
+	// Must find exactly the ones in the outline.
+	n := f.NumPage()
+	for pageNum := 1; pageNum <= n; pageNum++ {
+		page := f.Page(pageNum)
+		t1 := getPageContent(page)
+		if len(t1) > 0 && match(t1[0], "Helvetica-Bold", 13.98, "Instructions Arranged by Name") {
+			for n := pageNum; n < pageNum+24; n++ {
+				page := f.Page(n)
+				table := parsePage(n, page)
+				all = append(all, table...)
+			}
+			break
+		} else {
+			continue
+		}
+	}
+	stdout = bufio.NewWriter(os.Stdout)
+	for _, inst := range all {
+		if strings.Contains(inst.Name, "\x00I") {
+			r := rune(0x2190)
+			inst.Name = strings.Replace(inst.Name, "\x00I", string(r), -1)
+		} else if strings.Contains(inst.Name, "I\x00") {
+			r := rune(0x2192)
+			inst.Name = strings.Replace(inst.Name, "I\x00", string(r), -1)
+		}
+		fmt.Fprintf(stdout, "%q,%q,%q,%q\n", inst.Name, inst.Text, inst.Enc, inst.Flags)
+	}
+	stdout.Flush()
+
+}
+
+// getPageContent gets the page content of a single PDF page
+func getPageContent(p pdf.Page) []pdf.Text {
+	var text []pdf.Text
+
+	content := p.Content()
+	for _, t := range content.Text {
+		text = append(text, t)
+	}
+
+	text = findWords(text)
+	return text
+}
+
+// parsePage parses single PDF page and returns the instructions content
+func parsePage(num int, p pdf.Page) []Inst {
+	var insts []Inst
+	text := getPageContent(p)
+
+	for {
+		var heading, mnemonic, format string
+		// The float numbers below are the horizontal X-coordinate values to be parsed out of the Z-ISA PDF book.
+		for len(text) > 0 && !(match(text[0], "Helvetica-Narrow", 8, "") && (matchXCord(text[0], 73.9) || matchXCord(text[0], 55.9))) {
+			text = text[1:]
+		}
+		if len(text) == 0 {
+			break
+		}
+		heading = text[0].S
+		text = text[1:]
+		// The float numbers below are the horizontal X-coordinate values to be parsed out of the Z-ISA PDF book.
+		for !(matchXCord(text[0], 212.2) || matchXCord(text[0], 230.1) || matchXCord(text[0], 246.2) || matchXCord(text[0], 264.2)) {
+			heading += text[0].S
+			if match(text[0], "Wingdings3", 0, "") {
+				heading += text[1].S
+				text = text[1:]
+			}
+			text = text[1:]
+		}
+		if strings.Compare(heading, "DIAGNOSE") == 0 {
+			text = text[1:]
+			continue
+		}
+		heading, check, m := checkHeading(heading)
+		if check {
+			mnemonic = m
+		} else {
+			mnemonic = text[0].S
+			text = text[1:]
+		}
+		index := strings.Index(mnemonic, " ")
+		if index != -1 {
+			format = mnemonic[index+1:]
+			mnemonic = mnemonic[:index]
+		} else {
+			format = text[0].S
+		}
+		text = text[1:]
+		if strings.Compare(format, "SS") == 0 {
+			format += text[0].S
+		}
+		before, _, _ := strings.Cut(format, " ")
+		format = before
+		// The float numbers below are the horizontal X-coordinate values to be parsed out of the Z-ISA PDF book.
+		for len(text) > 0 && !(match(text[0], "Helvetica-Narrow", 8, "") && (matchXCord(text[0], 350.82) || matchXCord(text[0], 363.84) || matchXCord(text[0], 332.82) || matchXCord(text[0], 345.84))) {
+			if text[0].X > 405.48 {
+				break
+			}
+			text = text[1:]
+		}
+		flags := text[0].S
+		// The float numbers below are the horizontal X-coordinate values to be parsed out of the Z-ISA PDF book.
+		for len(text) > 0 && !(match(text[0], "Helvetica-Narrow", 8, "") && ((matchXCord(text[0], 481.7) && (!matchXCord(text[1], 496.1))) || matchXCord(text[0], 496.1) || (matchXCord(text[0], 499.6) && (!matchXCord(text[1], 514))) || (matchXCord(text[0], 514)))) {
+			text = text[1:]
+		}
+		if len(text) == 0 {
+			break
+		}
+		opcode := text[0].S
+		b1, b2, _ := strings.Cut(opcode, " ")
+		if matchXCord(text[0], 481.7) || matchXCord(text[0], 499.6) {
+			opcode = b2
+		} else {
+			opcode = b1
+		}
+		if strings.Compare(text[0].S, b1) == 0 {
+			text = text[2:]
+		} else {
+			text = text[1:]
+		}
+		mnemonic1, encoding := frameMnemonic(mnemonic, format, opcode)
+		for match(text[0], "Helvetica-Narrow", 5.1, "") {
+			text = text[1:]
+		}
+		if match(text[0], "Helvetica-Oblique", 9, "") {
+			text = text[2:]
+			insts = append(insts, Inst{heading, mnemonic1, encoding, flags})
+			continue
+		}
+		if strings.HasPrefix(text[0].S, "(") {
+			y123 := text[0].Y
+			for text[0].Y == y123 && !matchXCord(text[0], 5.1) {
+				heading += text[0].S
+				text = text[1:]
+			}
+		} else if !(math.Abs(text[0].Y-text[1].Y) < 0.3) {
+			heading += " " + text[0].S
+			text = text[1:]
+		}
+		insts = append(insts, Inst{heading, mnemonic1, encoding, flags})
+		if match(text[0], "Helvetica-Oblique", 9, "") {
+			break
+		}
+	}
+	return insts
+}
+
+func checkHeading(heading string) (string, bool, string) {
+	substr := []string{"ALSI", "ALGSI", "CHRL", "CGHRL", "CUXTR", "IEXTR", "RXSBG", "RISBLG", "VERIM", "VPSOP"}
+	b := false
+	for _, s := range substr {
+		r1 := strings.Index(heading, s)
+		if r1 != -1 {
+			heading = heading[:r1-1]
+			b = true
+			return heading, b, s
+		}
+	}
+	return heading, b, ""
+}
+
+func frameMnemonic(mnemonic, format, opcode string) (string, string) {
+
+	var mn, enc string
+
+	switch format {
+	case "E":
+		mn, enc = mnemonic_E(mnemonic, opcode)
+	case "I":
+		mn, enc = mnemonic_I(mnemonic, opcode)
+	case "IE":
+		mn, enc = mnemonic_IE(mnemonic, opcode)
+	case "MII":
+		mn, enc = mnemonic_MII(mnemonic, opcode)
+	case "RI-a", "RI-b", "RI-c":
+		mn, enc = mnemonic_RI(mnemonic, format, opcode)
+	case "RIE-a", "RIE-b", "RIE-c", "RIE-d", "RIE-e", "RIE-f", "RIE-g":
+		mn, enc = mnemonic_RIE(mnemonic, format, opcode)
+	case "RIL-a", "RIL-b", "RIL-c":
+		mn, enc = mnemonic_RIL(mnemonic, format, opcode)
+	case "RIS":
+		mn, enc = mnemonic_RIS(mnemonic, opcode)
+	case "RR":
+		mn, enc = mnemonic_RR(mnemonic, opcode)
+	case "RRD":
+		mn, enc = mnemonic_RRD(mnemonic, opcode)
+	case "RRE":
+		mn, enc = mnemonic_RRE(mnemonic, opcode)
+	case "RRF-a", "RRF-b", "RRF-c", "RRF-d", "RRF-e":
+		mn, enc = mnemonic_RRF(mnemonic, format, opcode)
+	case "RRS":
+		mn, enc = mnemonic_RRS(mnemonic, opcode)
+	case "RS-a", "RS-b":
+		mn, enc = mnemonic_RS(mnemonic, format, opcode)
+	case "RSI":
+		mn, enc = mnemonic_RSI(mnemonic, opcode)
+	case "RSL-a", "RSL-b":
+		mn, enc = mnemonic_RSL(mnemonic, format, opcode)
+	case "RSY-a", "RSY-b":
+		mn, enc = mnemonic_RSY(mnemonic, format, opcode)
+	case "RX-a", "RX-b":
+		mn, enc = mnemonic_RX(mnemonic, format, opcode)
+	case "RXE":
+		mn, enc = mnemonic_RXE(mnemonic, opcode)
+	case "RXF":
+		mn, enc = mnemonic_RXF(mnemonic, opcode)
+	case "RXY-a", "RXY-b":
+		mn, enc = mnemonic_RXY(mnemonic, format, opcode)
+	case "S":
+		mn, enc = mnemonic_S(mnemonic, opcode)
+	case "SI":
+		mn, enc = mnemonic_SI(mnemonic, opcode)
+	case "SIL":
+		mn, enc = mnemonic_SIL(mnemonic, opcode)
+	case "SIY":
+		mn, enc = mnemonic_SIY(mnemonic, opcode)
+	case "SMI":
+		mn, enc = mnemonic_SMI(mnemonic, opcode)
+	case "SS-a", "SS-b", "SS-c", "SS-d", "SS-e", "SS-f":
+		mn, enc = mnemonic_SS(mnemonic, format, opcode)
+	case "SSE":
+		mn, enc = mnemonic_SSE(mnemonic, opcode)
+	case "SSF":
+		mn, enc = mnemonic_SSF(mnemonic, opcode)
+	case "VRI-a", "VRI-b", "VRI-c", "VRI-d", "VRI-e", "VRI-f", "VRI-g", "VRI-h", "VRI-i":
+		mn, enc = mnemonic_VRI(mnemonic, format, opcode)
+	case "VRR-a", "VRR-b", "VRR-c", "VRR-d", "VRR-e", "VRR-f", "VRR-g", "VRR-h", "VRR-i", "VRR-j", "VRR-k":
+		mn, enc = mnemonic_VRR(mnemonic, format, opcode)
+	case "VRS-a", "VRS-b", "VRS-c", "VRS-d":
+		mn, enc = mnemonic_VRS(mnemonic, format, opcode)
+	case "VRV":
+		mn, enc = mnemonic_VRV(mnemonic, opcode)
+	case "VRX":
+		mn, enc = mnemonic_VRX(mnemonic, opcode)
+	case "VSI":
+		mn, enc = mnemonic_VSI(mnemonic, opcode)
+	default:
+		mn = mnemonic
+	}
+	return mn, enc
+}
+
+func mnemonic_E(mnemonic, opcode string) (string, string) {
+	var enc string
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	enc = str + "@0|??@16"
+	return mnemonic, enc
+}
+
+func mnemonic_I(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " I"
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	enc = str + "@0|I@8|??@16"
+	return mnemonic, enc
+}
+
+func mnemonic_IE(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " I1,I2"
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	enc = str + "@0|//@16|I1@24|I2@28|??@32"
+	return mnemonic, enc
+}
+
+func mnemonic_MII(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " M1,RI2,RI3"
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	enc = str + "@0|M1@8|RI2@12|RI3@24|??@48"
+	return mnemonic, enc
+}
+
+func mnemonic_RI(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:3], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch format {
+	case "RI-a":
+		mnemonic += " R1,I2"
+		enc = str1 + "@0|R1@8|" + str2 + "@12|I2@16|??@32"
+	case "RI-b":
+		mnemonic += " R1,RI2"
+		enc = str1 + "@0|R1@8|" + str2 + "@12|RI2@16|??@32"
+	case "RI-c":
+		mnemonic += " M1,RI2"
+		enc = str1 + "@0|M1@8|" + str2 + "@12|RI2@16|??@32"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RIE(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch format {
+	case "RIE-a":
+		mnemonic += " R1,I2,M3"
+		enc = str1 + "@0|R1@8|//@12|I2@16|M3@32|//@36|" + str2 + "@40|??@48"
+	case "RIE-b":
+		mnemonic += " R1,R2,M3,RI4"
+		enc = str1 + "@0|R1@8|R2@12|RI4@16|M3@32|//@36|" + str2 + "@40|??@48"
+	case "RIE-c":
+		mnemonic += " R1,I2,M3,RI4"
+		enc = str1 + "@0|R1@8|M3@12|RI4@16|I2@32|" + str2 + "@40|??@48"
+	case "RIE-d":
+		mnemonic += " R1,R3,I2"
+		enc = str1 + "@0|R1@8|R3@12|I2@16|//@32|" + str2 + "@40|??@48"
+	case "RIE-e":
+		mnemonic += " R1,R3,RI2"
+		enc = str1 + "@0|R1@8|R3@12|RI2@16|//@32|" + str2 + "@40|??@48"
+	case "RIE-f":
+		mnemonic += " R1,R2,I3,I4,I5"
+		enc = str1 + "@0|R1@8|R2@12|I3@16|I4@24|I5@32|" + str2 + "@40|??@48"
+	case "RIE-g":
+		mnemonic += " R1,I2,M3"
+		enc = str1 + "@0|R1@8|M3@12|I2@16|//@32|" + str2 + "@40|??@48"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RIL(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch format {
+	case "RIL-a":
+		mnemonic += " R1,I2"
+		enc = str1 + "@0|R1@8|" + str2 + "@12|I2@16|??@48"
+	case "RIL-b":
+		mnemonic += " R1,RI2"
+		enc = str1 + "@0|R1@8|" + str2 + "@12|RI2@16|??@48"
+	case "RIL-c":
+		mnemonic += " M1,RI2"
+		enc = str1 + "@0|M1@8|" + str2 + "@12|RI2@16|??@48"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RIS(mnemonic, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	mnemonic += " R1,I2,M3,D4(B4)"
+	enc = str1 + "@0|R1@8|M3@12|B4@16|D4@20|I2@32|" + str2 + "@40|??@48"
+	return mnemonic, enc
+}
+
+func mnemonic_RR(mnemonic, opcode string) (string, string) {
+	var enc string
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	switch mnemonic {
+	case "BCR":
+		mnemonic += " M1,R2"
+		enc = str + "@0|M1@8|R2@12|??@16"
+	case "SPM":
+		mnemonic += " R1"
+		enc = str + "@0|R1@8|//@12|??@16"
+	default:
+		mnemonic += " R1,R2"
+		enc = str + "@0|R1@8|R2@12|??@16"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RRD(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " R1,R3,R2"
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	enc = str + "@0|R1@16|//@20|R3@24|R2@28|??@32"
+	return mnemonic, enc
+}
+
+func mnemonic_RRE(mnemonic, opcode string) (string, string) {
+	var enc string
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	switch mnemonic {
+	case "LZER", "LZDR", "LZXR", "EFPC", "EPAR", "EPAIR", "ESEA", "ESAIR", "ESAR", "ETND", "IAC", "IPM", "MSTA", "PTF", "SFASR", "SFPC", "SSAR", "SSAIR":
+		mnemonic += " R1"
+		enc = str + "@0|//@16|R1@24|//@28|??@32"
+	case "NNPA", "PALB", "PCC", "PCKMO":
+		enc = str + "@0|//@16|??@32"
+	default:
+		mnemonic += " R1,R2"
+		enc = str + "@0|//@16|R1@24|R2@28|??@32"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RRF(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	switch format {
+	case "RRF-a":
+		switch mnemonic {
+		case "SELR", "SELGR", "SELFHR", "IPTE", "AXTRA", "ADTRA",
+			"DDTRA", "DXTRA", "MDTRA", "MXTRA", "SDTRA", "SXTRA":
+			mnemonic += " R1,R2,R3,M4"
+			enc = str + "@0|R3@16|M4@20|R1@24|R2@28|??@32"
+		default:
+			mnemonic += " R1,R2,R3"
+			enc = str + "@0|R3@16|//@20|R1@24|R2@28|??@32"
+		}
+	case "RRF-b":
+		switch mnemonic {
+		case "CRDTE", "IDTE", "LPTEA", "RDP", "DIEBR", "DIDBR",
+			"QADTR", "QAXTR", "RRDTR", "RRXTR":
+			mnemonic += " R1,R3,R2,M4"
+			enc = str + "@0|R3@16|M4@20|R1@24|R2@28|??@32"
+		default:
+			mnemonic += " R1,R3,R2"
+			enc = str + "@0|R3@16|//@20|R1@24|R2@28|??@32"
+		}
+	case "RRF-c":
+		mnemonic += " R1,R2,M3"
+		enc = str + "@0|M3@16|//@20|R1@24|R2@28|??@32"
+	case "RRF-d":
+		mnemonic += " R1,R2,M4"
+		enc = str + "@0|//@16|M4@20|R1@24|R2@28|??@32"
+	case "RRF-e":
+		switch mnemonic {
+		case "CXFBRA", "CXFTR", "CDFBRA", "CDFTR", "CEFBRA", "CXGBRA", "CXGTRA", "CDGBRA", "CDGTRA", "CEGBRA", "CXLFBR", "CXLFTR", "CDLFBR", "CDLFTR", "CELFBR",
+			"CXLGBR", "CXLGTR", "CDLGBR", "CDLGTR", "CELGBR", "CFXBRA", "CGXBRA", "CFXTR", "CGXTRA", "CFDBRA", "CGDBRA", "CFDTR", "CGDTRA", "CFEBRA", "CGEBRA",
+			"CLFEBR", "CLFDBR", "CLFXBR", "CLGEBR", "CLGDBR", "CLGXBR", "CLFXTR", "CLFDTR", "CLGXTR", "CLGDTR", "FIEBRA", "FIDBRA", "FIXBRA", "FIDTR", "FIXTR",
+			"LDXBRA", "LEDBRA", "LEXBRA", "LEDTR", "LDXTR":
+			mnemonic += " R1,M3,R2,M4"
+			enc = str + "@0|M3@16|M4@20|R1@24|R2@28|??@32"
+		default:
+			mnemonic += " R1,M3,R2"
+			enc = str + "@0|M3@16|//@20|R1@24|R2@28|??@32"
+		}
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RRS(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " R1,R2,M3,D4(B4)"
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	enc = str1 + "@0|R1@8|R2@12|B4@16|D4@20|M3@32|//@36|" + str2 + "@40|??@48"
+	return mnemonic, enc
+}
+
+func mnemonic_RS(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	switch format {
+	case "RS-a":
+		switch mnemonic {
+		case "SLDA", "SLDL", "SLA", "SLL", "SRA", "SRDA", "SRDL", "SRL":
+			mnemonic += " R1,D2(B2)"
+			enc = str + "@0|R1@8|//@12|B2@16|D2@20|??@32"
+		default:
+			mnemonic += " R1,R3,D2(B2)"
+			enc = str + "@0|R1@8|R3@12|B2@16|D2@20|??@32"
+		}
+	case "RS-b":
+		mnemonic += " R1,M3,D2(B2)"
+		enc = str + "@0|R1@8|M3@12|B2@16|D2@20|??@32"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RSI(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " R1,R3,RI2"
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	enc = str + "@0|R1@8|R3@12|RI2@16|??@32"
+	return mnemonic, enc
+}
+
+func mnemonic_RSL(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch format {
+	case "RSL-a":
+		mnemonic += " D1(L1,B1)"
+		enc = str1 + "@0|L1@8|//@12|B1@16|D1@20|//@32|" + str2 + "@40|??@48"
+	case "RSL-b":
+		mnemonic += " R1,D2(L2,B2),M3"
+		enc = str1 + "@0|L2@8|B2@16|D2@20|R1@32|M3@36|" + str2 + "@40|??@48"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RSY(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch format {
+	case "RSY-a":
+		mnemonic += " R1,R3,D2(B2)"
+		enc = str1 + "@0|R1@8|R3@12|B2@16|D2@20|" + str2 + "@40|??@48"
+	case "RSY-b":
+		switch mnemonic {
+		case "LOC", "LOCFH", "LOCG", "STOCFH", "STOC", "STOCG":
+			mnemonic += " R1,D2(B2),M3"
+		default:
+			mnemonic += " R1,M3,D2(B2)"
+		}
+		enc = str1 + "@0|R1@8|M3@12|B2@16|D2@20|" + str2 + "@40|??@48"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RX(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val, _ := strconv.ParseInt(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	switch format {
+	case "RX-a":
+		mnemonic += " R1,D2(X2,B2)"
+		enc = str + "@0|R1@8|X2@12|B2@16|D2@20|??@32"
+	case "RX-b":
+		mnemonic += " M1,D2(X2,B2)"
+		enc = str + "@0|M1@8|X2@12|B2@16|D2@20|??@32"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RXE(mnemonic, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch mnemonic {
+	case "LCBB":
+		mnemonic += " R1,D2(X2,B2),M3"
+		enc = str1 + "@0|R1@8|X2@12|B2@16|D2@20|M3@32|//@36|" + str2 + "@40|??@48"
+	default:
+		mnemonic += " R1,D2(X2,B2)"
+		enc = str1 + "@0|R1@8|X2@12|B2@16|D2@20|//@32|" + str2 + "@40|??@48"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_RXF(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " R1,R3,D2(X2,B2)"
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	enc = str1 + "@0|R3@8|X2@12|B2@16|D2@20|R1@32|//@36|" + str2 + "@40|??@48"
+	return mnemonic, enc
+}
+
+func mnemonic_RXY(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch format {
+	case "RXY-a":
+		mnemonic += " R1,D2(X2,B2)"
+		enc = str1 + "@0|R1@8|X2@12|B2@16|D2@20|" + str2 + "@40|??@48"
+	case "RXY-b":
+		mnemonic += " M1,D2(X2,B2)"
+		enc = str1 + "@0|M1@8|X2@12|B2@16|D2@20|" + str2 + "@40|??@48"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_S(mnemonic, opcode string) (string, string) {
+	var enc string
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	switch mnemonic {
+	case "PTLB", "TEND", "XSCH", "CSCH", "HSCH", "IPK", "RCHP", "RSCH", "SAL", "SCHM":
+		enc = str + "@0|//@16|??@32"
+	default:
+		mnemonic += " D2(B2)"
+		enc = str + "@0|B2@16|D2@20|??@32"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_SI(mnemonic, opcode string) (string, string) {
+	var enc string
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	switch mnemonic {
+	case "TS", "SSM", "LPSW":
+		mnemonic += " D1(B1)"
+	default:
+		mnemonic += " D1(B1),I2"
+	}
+	enc = str + "@0|I2@8|B1@16|D1@20|??@32"
+	return mnemonic, enc
+}
+
+func mnemonic_SIL(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " D1(B1),I2"
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	enc = str + "@0|B1@16|D1@20|I2@32|??@48"
+	return mnemonic, enc
+}
+
+func mnemonic_SIY(mnemonic, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch mnemonic {
+	case "LPSWEY":
+		mnemonic += " D1(B1)"
+		enc = str1 + "@0|//@8|B1@16|D1@20|" + str2 + "@40|??@48"
+	default:
+		mnemonic += " D1(B1),I2"
+		enc = str1 + "@0|I2@8|B1@16|D1@20|" + str2 + "@40|??@48"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_SMI(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " M1,RI2,D3(B3)"
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	enc = str + "@0|M1@8|//@12|B3@16|D3@20|RI2@32|??@48"
+	return mnemonic, enc
+}
+
+func mnemonic_SS(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	switch format {
+	case "SS-a":
+		mnemonic += " D1(L1,B1),D2(B2)"
+		enc = str + "@0|L1@8|B1@16|D1@20|B2@32|D2@36|??@48"
+	case "SS-b":
+		mnemonic += " D1(L1,B1),D2(L2,B2)"
+		enc = str + "@0|L1@8|L2@12|B1@16|D1@20|B2@32|D2@36|??@48"
+	case "SS-c":
+		mnemonic += " D1(L1,B1),D2(B2),I3"
+		enc = str + "@0|L1@8|I3@12|B1@16|D1@20|B2@32|D2@36|??@48"
+	case "SS-d":
+		mnemonic += " D1(R1,B1),D2(B2),R3"
+		enc = str + "@0|R1@8|R3@12|B1@16|D1@20|B2@32|D2@36|??@48"
+	case "SS-e":
+		switch mnemonic {
+		case "LMD":
+			mnemonic += " R1,R3,D2(B2),D4(B4)"
+		default:
+			mnemonic += " R1,D2(B2),R3,D4(B4)"
+		}
+		enc = str + "@0|R1@8|R3@12|B2@16|D2@20|B4@32|D4@36|??@48"
+	case "SS-f":
+		mnemonic += " D1(B1),D2(L2,B2)"
+		enc = str + "@0|L2@8|B1@16|D1@20|B2@32|D2@36|??@48"
+	}
+	return mnemonic, enc
+
+}
+
+func mnemonic_SSE(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " D1(B1),D2(B2)"
+	val, _ := strconv.ParseUint(opcode, 16, 16)
+	str := strconv.Itoa(int(val))
+	enc = str + "@0|B1@16|D1@20|B2@32|D2@36|??@48"
+	return mnemonic, enc
+}
+
+func mnemonic_SSF(mnemonic, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch mnemonic {
+	case "LPD", "LPDG":
+		mnemonic += " R3,D1(B1),D2(B2)"
+	default:
+		mnemonic += " D1(B1),D2(B2),R3"
+	}
+	enc = str1 + "@0|R3@8|" + str2 + "@12|B1@16|D1@20|B2@32|D2@36|??@48"
+	return mnemonic, enc
+}
+
+func mnemonic_VRI(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch format {
+	case "VRI-a":
+		if strings.Contains(mnemonic, "VGBM") { // Check for M3 field
+			mnemonic += " V1,I2"
+			enc = str1 + "@0|V1@8|//@12|I2@16|//@32|RXB@36|" + str2 + "@40|??@48"
+		} else {
+			mnemonic += " V1,I2,M3"
+			enc = str1 + "@0|V1@8|//@12|I2@16|M3@32|RXB@36|" + str2 + "@40|??@48"
+		}
+	case "VRI-b":
+		mnemonic += " V1,I2,I3,M4"
+		enc = str1 + "@0|V1@8|//@12|I2@16|I3@24|M4@32|RXB@36|" + str2 + "@40|??@48"
+	case "VRI-c":
+		mnemonic += " V1,V3,I2,M4"
+		enc = str1 + "@0|V1@8|V3@12|I2@16|M4@32|RXB@36|" + str2 + "@40|??@48"
+	case "VRI-d":
+		if strings.Contains(mnemonic, "VERIM") { // Check for M5 field
+			mnemonic += " V1,V2,V3,I4,M5"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|I4@24|M5@32|RXB@36|" + str2 + "@40|??@48"
+		} else {
+			mnemonic += " V1,V2,V3,I4"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|I4@24|//@32|RXB@36|" + str2 + "@40|??@48"
+		}
+	case "VRI-e":
+		mnemonic += " V1,V2,I3,M4,M5"
+		enc = str1 + "@0|V1@8|V2@12|I3@16|M5@28|M4@32|RXB@36|" + str2 + "@40|??@48"
+	case "VRI-f":
+		mnemonic += " V1,V2,V3,I4,M5"
+		enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M5@24|I4@28|RXB@36|" + str2 + "@40|??@48"
+	case "VRI-g":
+		mnemonic += " V1,V2,I3,I4,M5"
+		enc = str1 + "@0|V1@8|V2@12|I4@16|M5@24|I3@28|RXB@36|" + str2 + "@40|??@48"
+	case "VRI-h":
+		mnemonic += " V1,I2,I3"
+		enc = str1 + "@0|V1@8|//@12|I2@16|I3@32|RXB@36|" + str2 + "@40|??@48"
+	case "VRI-i":
+		mnemonic += " V1,R2,I3,M4"
+		enc = str1 + "@0|V1@8|R2@12|//@16|M4@24|I3@28|RXB@36|" + str2 + "@40|??@48"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_VRR(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch format {
+	case "VRR-a":
+		switch mnemonic {
+		case "VLR", "VTM": // V1,V2
+			mnemonic += " V1,V2"
+			enc = str1 + "@0|V1@8|V2@12|//@16|RXB@36|" + str2 + "@40|??@48"
+
+		case "VSEG", "VUPH", "VUPLH", "VUPL", "VUPLL", "VCLZ", "VCTZ", "VEC", "VECL", "VLC", "VLP", "VPOPCT": // V1,V2,M3
+			mnemonic += " V1,V2,M3"
+			enc = str1 + "@0|V1@8|V2@12|//@16|M3@32|RXB@36|" + str2 + "@40|??@48"
+
+		case "VISTR": // V1,V2,M3,M5
+			mnemonic += " V1,V2,M3,M5"
+			enc = str1 + "@0|V1@8|V2@12|//@16|M5@24|//@28|M3@32|RXB@36|" + str2 + "@40|??@48"
+
+		case "WFC", "WFK", "VFLL", "VFSQ", "VCLFNH", "VCLFNL", "VCFN", "VCNF": // V1,V2,M3,M4
+			mnemonic += " V1,V2,M3,M4"
+			enc = str1 + "@0|V1@8|V2@12|//@16|M4@28|M3@32|RXB@36|" + str2 + "@40|??@48"
+
+		case "VCFPS", "VCDG", "VCDLG", "VCGD", "VCFPL", "VCSFP", "VCLFP", "VCLGD", "VFI", "VFLR", "VFPSO": // V1,V2,M3,M4,M5
+			mnemonic += " V1,V2,M3,M4,M5"
+			enc = str1 + "@0|V1@8|V2@12|//@16|M5@24|M4@28|M3@32|RXB@36|" + str2 + "@40|??@48"
+		}
+	case "VRR-b":
+		switch mnemonic {
+		case "VSCSHP":
+			mnemonic += " V1,V2,V3"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|RXB@36|" + str2 + "@40|??@48"
+		default:
+			mnemonic += " V1,V2,V3,M4,M5"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M5@24|//@28|M4@32|RXB@36|" + str2 + "@40|??@48"
+		}
+	case "VRR-c":
+		switch mnemonic {
+		case "VFA", "VFD", "VFM", "VFS", "VCRNF": // V1,V2,V3,M4,M5
+			mnemonic += " V1,V2,V3,M4,M5"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M5@28|M4@32|RXB@36|" + str2 + "@40|??@48"
+
+		case "VFCE", "VFCH", "VFCHE", "VFMAX", "VFMIN": // V1,V2,V3,M4,M5,M6
+			mnemonic += " V1,V2,V3,M4,M5,M6"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M6@24|M5@28|M4@32|RXB@36|" + str2 + "@40|??@48"
+
+		case "VBPERM", "VN", "VNC", "VCKSM", "VX", "VNN", "VNO", "VNX",
+			"VO", "VOC", "VSL", "VSLB", "VSRA", "VSRAB", "VSRL", "VSRLB": // V1,V2,V3
+			mnemonic += " V1,V2,V3"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|RXB@36|" + str2 + "@40|??@48"
+		default: // V1,V2,V3,M4
+			mnemonic += " V1,V2,V3,M4"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M4@32|RXB@36|" + str2 + "@40|??@48"
+		}
+	case "VRR-d":
+		switch mnemonic {
+		case "VMSL", "VSTRC", "VSTRS": // V1,V2,V3,V4,M5,M6
+			mnemonic += " V1,V2,V3,V4,M5,M6"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|M5@20|M6@24|//@28|V4@32|RXB@36|" + str2 + "@40|??@48"
+		default:
+			mnemonic += " V1,V2,V3,V4,M5"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|M5@20|//@24|V4@32|RXB@36|" + str2 + "@40|??@48"
+		}
+	case "VRR-e":
+		switch mnemonic {
+		case "VPERM", "VSEL":
+			mnemonic += " V1,V2,V3,V4"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|V4@32|RXB@36|" + str2 + "@40|??@48"
+		default:
+			mnemonic += " V1,V2,V3,V4,M5,M6"
+			enc = str1 + "@0|V1@8|V2@12|V3@16|M6@20|//@24|M5@28|V4@32|RXB@36|" + str2 + "@40|??@48"
+		}
+	case "VRR-f":
+		mnemonic += " V1,R2,R3"
+		enc = str1 + "@0|V1@8|R2@12|R3@16|//@20|RXB@36|" + str2 + "@40|??@48"
+	case "VRR-g":
+		mnemonic += " V1"
+		enc = str1 + "@0|//@8|V1@12|//@16|RXB@36|" + str2 + "@40|??@48"
+	case "VRR-h":
+		mnemonic += " V1,V2,M3"
+		enc = str1 + "@0|//@8|V1@12|V2@16|//@20|M3@24|//@28|RXB@36|" + str2 + "@40|??@48"
+	case "VRR-i":
+		mnemonic += " R1,V2,M3,M4"
+		enc = str1 + "@0|R1@8|V2@12|//@16|M3@24|M4@28|//@32|RXB@36|" + str2 + "@40|??@48"
+	case "VRR-j":
+		mnemonic += " V1,V2,V3,M4"
+		enc = str1 + "@0|V1@8|V2@12|V3@16|//@20|M4@24|//@28|RXB@36|" + str2 + "@40|??@48"
+	case "VRR-k":
+		mnemonic += " V1,V2,M3"
+		enc = str1 + "@0|V1@8|V2@12|//@16|M3@24|//@28|RXB@36|" + str2 + "@40|??@48"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_VRS(mnemonic, format, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	switch format {
+	case "VRS-a":
+		mnemonic += " V1,V3,D2(B2),M4"
+		enc = str1 + "@0|V1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|" + str2 + "@40|??@48"
+	case "VRS-b":
+		if strings.Contains(mnemonic, "VLVG") {
+			mnemonic += " V1,R3,D2(B2),M4"
+			enc = str1 + "@0|V1@8|R3@12|B2@16|D2@20|M4@32|RXB@36|" + str2 + "@40|??@48"
+		} else {
+			mnemonic += " V1,R3,D2(B2)"
+			enc = str1 + "@0|V1@8|R3@12|B2@16|D2@20|//@32|RXB@36|" + str2 + "@40|??@48"
+		}
+	case "VRS-c":
+		mnemonic += " R1,V3,D2(B2),M4"
+		enc = str1 + "@0|R1@8|V3@12|B2@16|D2@20|M4@32|RXB@36|" + str2 + "@40|??@48"
+	case "VRS-d":
+		mnemonic += " V1,R3,D2(B2)"
+		enc = str1 + "@0|//@8|R3@12|B2@16|D2@20|V1@32|RXB@36|" + str2 + "@40|??@48"
+	}
+	return mnemonic, enc
+}
+
+func mnemonic_VRV(mnemonic, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	mnemonic += " V1,D2(V2,B2),M3"
+	enc = str1 + "@0|V1@8|V2@12|B2@16|D2@20|M3@32|RXB@36|" + str2 + "@40|??@48"
+	return mnemonic, enc
+}
+
+func mnemonic_VRX(mnemonic, opcode string) (string, string) {
+	var enc string
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	mnemonic += " V1,D2(X2,B2),M3"
+	enc = str1 + "@0|V1@8|X2@12|B2@16|D2@20|M3@32|RXB@36|" + str2 + "@40|??@48"
+	return mnemonic, enc
+}
+
+func mnemonic_VSI(mnemonic, opcode string) (string, string) {
+	var enc string
+	mnemonic += " V1,D2(B2),I3"
+	val1, _ := strconv.ParseUint(opcode[:2], 16, 16)
+	str1 := strconv.Itoa(int(val1))
+	val2, _ := strconv.ParseUint(opcode[2:], 16, 16)
+	str2 := strconv.Itoa(int(val2))
+	enc = str1 + "@0|I3@8|B2@16|D2@20|V1@32|RXB@36|" + str2 + "@40|??@48"
+	return mnemonic, enc
+}
+
+func matchXCord(t pdf.Text, Xcord float64) bool {
+	return math.Abs(t.X-Xcord) < 0.9
+}
+
+func match(t pdf.Text, font string, size float64, substr string) bool {
+	return t.Font == font && (size == 0 || math.Abs(t.FontSize-size) < 0.2) && strings.Contains(t.S, substr)
+}
+
+func findWords(chars []pdf.Text) (words []pdf.Text) {
+	// Sort by Y coordinate and normalize.
+	const nudge = 1.5
+	sort.Sort(pdf.TextVertical(chars))
+	old := -100000.0
+	for i, c := range chars {
+		if c.Y != old && math.Abs(old-c.Y) < nudge {
+			chars[i].Y = old
+		} else {
+			old = c.Y
+		}
+	}
+
+	// Sort by Y coordinate, breaking ties with X.
+	// This will bring letters in a single word together.
+	sort.Sort(pdf.TextVertical(chars))
+
+	// Loop over chars.
+	for i := 0; i < len(chars); {
+		// Find all chars on line.
+		j := i + 1
+		for j < len(chars) && chars[j].Y == chars[i].Y {
+			j++
+		}
+		var end float64
+		// Split line into words (really, phrases).
+		for k := i; k < j; {
+			ck := &chars[k]
+			s := ck.S
+			end = ck.X + ck.W
+			charSpace := ck.FontSize / 6
+			wordSpace := ck.FontSize * 2 / 3
+			l := k + 1
+			for l < j {
+				// Grow word.
+				cl := &chars[l]
+				if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
+					s += cl.S
+					end = cl.X + cl.W
+					l++
+					continue
+				}
+				// Add space to phrase before next word.
+				if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
+					s += " " + cl.S
+					end = cl.X + cl.W
+					l++
+					continue
+				}
+				break
+			}
+			f := ck.Font
+			f = strings.TrimSuffix(f, ",Italic")
+			f = strings.TrimSuffix(f, "-Italic")
+			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
+			k = l
+		}
+		i = j
+	}
+	return words
+}
+
+func sameFont(f1, f2 string) bool {
+	f1 = strings.TrimSuffix(f1, ",Italic")
+	f1 = strings.TrimSuffix(f1, "-Italic")
+	f2 = strings.TrimSuffix(f1, ",Italic")
+	f2 = strings.TrimSuffix(f1, "-Italic")
+	return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman"
+}
diff --git a/s390x/s390xutil/hack.h b/s390x/s390xutil/hack.h
new file mode 100644
index 00000000..22ef049f
--- /dev/null
+++ b/s390x/s390xutil/hack.h
@@ -0,0 +1,56 @@
+// Copyright 2024 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file requires gcc and binutils with -march=z16 support.
+// s390xutil runs a series of commands like:
+//    go run map.go -fmt=asm ../s390x.csv > asm.S
+//    /usr/bin/gcc -c asm.S -march=z16
+//    /usr/bin/objdump -d asm.o
+// to create the file decode_generated.txt used to verify the disassembler.
+//
+// Note, the Go disassembler is not expected to support every extended
+// mnemonic, but it should support those which frequently show up in object
+// files compiled by the Go toolchain.
+
+
+#define R1 8
+#define R2 0
+#define R3 0
+
+#define X2 2
+
+#define L1 4
+#define L2 4
+
+#define B1 2
+#define B2 1
+#define B3 6
+#define B4 8
+
+#define D1 6
+#define D2 11
+#define D3 182
+#define D4 205
+
+#define V1 18
+#define V2 3
+#define V3 5
+#define V4 8
+
+#define I 124
+#define I1 12
+#define I2 8
+#define I3 9
+#define I4 105
+#define I5 18
+
+#define RI2 0
+#define RI3 294
+#define RI4 -168
+
+#define M1 7
+#define M3 3
+#define M4 1
+#define M5 9
+#define M6 11
diff --git a/s390x/s390xutil/util.go b/s390x/s390xutil/util.go
new file mode 100644
index 00000000..003ce5df
--- /dev/null
+++ b/s390x/s390xutil/util.go
@@ -0,0 +1,90 @@
+// Copyright 2024 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+// Generate interesting test cases from s390x objdump via
+// go run util.go
+//
+// This requires "/usr/bin/gcc" and "objdump" be in the PATH this command is run.
+//
+// These tools can be acquired from the IBM advance toolchain for amd64 hosts too.
+
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// Emit a test file using the generator called name.txt.  This requires
+// a GCC toolchain which supports -march=z16.
+func genOutput(name, tcPfx string, generator func(io.Writer)) {
+	// Generate object code from gcc
+	cmd := exec.Command(tcPfx+"gcc", "-c", "-march=z16", "-x", "assembler-with-cpp", "-o", name+".o", "-")
+	input, _ := cmd.StdinPipe()
+	cmd.Stderr = os.Stderr
+	go func() {
+		defer input.Close()
+		generator(input.(io.Writer))
+	}()
+	if cmd.Run() != nil {
+		fmt.Printf("Failed running gcc for: %s\n", name)
+		return
+	}
+	defer os.Remove(name + ".o")
+	cmd = exec.Command(tcPfx+"objdump", "-d", name+".o")
+
+	// Run objdump and parse output into test format
+	output, _ := cmd.StdoutPipe()
+	defer output.Close()
+	scanner := bufio.NewScanner(output)
+	spacere := regexp.MustCompile("[[:space:]]+")
+	outf, _ := os.Create(name + ".txt")
+	defer outf.Close()
+	if cmd.Start() != nil {
+		fmt.Printf("Failed running objdump for: %s\n", name)
+		return
+	}
+
+	for scanner.Scan() {
+		ln := spacere.Split(scanner.Text(), -1)
+		var cnt int16
+		if len(ln) >= 5 {
+			v, _ := strconv.ParseInt(ln[2], 16, 16)
+			if (v >> 6 & 0x3) == 0 {
+				cnt = 2
+			} else if v>>6&0x3 == 1 || v>>6&0x3 == 2 {
+				cnt = 4
+			} else {
+				cnt = 6
+			}
+			opc := strings.Join(ln[2:cnt+2], "")
+			dec := strings.Join(ln[cnt+2:], " ")
+			fmt.Fprintf(outf, "%12s|\tgnu\t%-18s\n", opc, dec)
+		}
+	}
+	cmd.Wait()
+}
+
+// Generate representative instructions for all[1] instructions in s390x.csv.
+//
+// [1] See hack.h for a few minor, exceptional workarounds.
+func emitGenerated(out io.Writer) {
+	cmd := exec.Command("go", "run", "../s390xmap/map.go", "-fmt=asm", "../s390x.csv")
+	cmdout, _ := cmd.Output()
+	out.Write(cmdout)
+}
+
+// Produce generated test outputs.  This should be run every so often with
+// new versions of objdump to ensure we stay up to date.
+func main() {
+	genOutput("decode_generated", "/usr/bin/", emitGenerated)
+}

From 655f7a06f2d1c86bad93313e6431199c99c2daf2 Mon Sep 17 00:00:00 2001
From: Lin Runze <lrzlin9@gmail.com>
Date: Sun, 4 Aug 2024 18:27:19 +0800
Subject: [PATCH 026/200] riscv64: implement riscv64spec for instruction table
 generation

Support generate all riscv extensions in
$GOROOT/src/src/cmd/internal/obj/riscv/inst.go,
also including "C" Standard Extension for Compressed Instructions,
used to support instruction decoding on riscv64 target.

riscv64spec relies on the riscv-opcodes project:
https://github.com/riscv/riscv-opcodes

Change-Id: Ib0589a87d1ba31fe431162d1f2d44a42bdb2ae06
Reviewed-on: https://go-review.googlesource.com/c/arch/+/602875
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
---
 riscv64/riscv64spec/spec.go | 476 ++++++++++++++++++++++++++++++++++++
 1 file changed, 476 insertions(+)
 create mode 100644 riscv64/riscv64spec/spec.go

diff --git a/riscv64/riscv64spec/spec.go b/riscv64/riscv64spec/spec.go
new file mode 100644
index 00000000..53c0f1de
--- /dev/null
+++ b/riscv64/riscv64spec/spec.go
@@ -0,0 +1,476 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// riscv64spec reads the files contained in riscv-opcodes repo
+// to collect instruction encoding details.
+// repo url: https://github.com/riscv/riscv-opcodes
+// usage: go run spec.go <opcodes-repo-path>
+
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+// RV64GC_zba_zbb_zbs Extensions Listing
+// Reference: $GOROOT/src/src/cmd/internal/obj/riscv/inst.go
+var extensions = []string{
+	"rv_a",
+	"rv_c",
+	"rv_c_d",
+	"rv_d",
+	"rv_f",
+	"rv_i",
+	"rv_m",
+	"rv_q",
+	"rv_zba",
+	"rv_zbb",
+	"rv_zbs",
+	"rv_zfh",
+	"rv_zicsr",
+	"rv_zifencei",
+	"rv64_a",
+	"rv64_c",
+	"rv64_d",
+	"rv64_f",
+	"rv64_i",
+	"rv64_m",
+	"rv64_q",
+	"rv64_zba",
+	"rv64_zbb",
+	"rv64_zbs",
+	"rv64_zfh",
+}
+
+const (
+	prologueSec    = "// Generated by riscv64spec riscv-opcodes\n// DO NOT EDIT\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage riscv64asm\n\n"
+	opSec          = "const (\n\t_ Op = iota\n"
+	opstrSec       = "var opstr = [...]string{\n"
+	instFormatsSec = "var instFormats = [...]instFormat{\n"
+)
+
+var (
+	ops                []string
+	opstrs             = make(map[string]string)
+	instFormatComments = make(map[string]string)
+	instFormats        = make(map[string]string)
+)
+
+func main() {
+	log.SetFlags(0)
+	log.SetPrefix("riscv64spec: ")
+
+	var repoPath string
+	if len(os.Args) < 1 {
+		log.Fatal("usage: go run spec.go <opcodes-repo-path>")
+	}
+	repoPath = os.Args[1]
+
+	fileTables, err := os.Create("tables.go")
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	buf := bufio.NewWriter(fileTables)
+	_, err = buf.Write([]byte(prologueSec))
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	for _, ext := range extensions {
+		f, err := os.Open(filepath.Join(repoPath, ext))
+		if err != nil {
+			log.Fatal(err)
+		}
+		defer f.Close()
+
+		buf := bufio.NewScanner(f)
+		for buf.Scan() {
+			line := buf.Text()
+			if len(line) == 0 {
+				continue
+			}
+			words := strings.Fields(line)
+			if len(words) == 0 || words[0][0] == '#' {
+				continue
+			}
+
+			// skip $pseudo_op except rv_zbb/rv64_zbb
+			if words[0][0] == '$' {
+				if ext != "rv_zbb" && ext != "rv64_zbb" {
+					continue
+				}
+				words = words[2:]
+			}
+
+			genInst(words)
+		}
+	}
+
+	// c.unimp wasn't in riscv-opcodes, so add it there
+	c_unimp := "c.unimp 15..0=0"
+	genInst(strings.Fields(c_unimp))
+
+	sort.Strings(ops)
+
+	// 1. write op
+	if _, err := buf.Write([]byte(opSec)); err != nil {
+		log.Fatal(err)
+	}
+	for _, op := range ops {
+		if _, err := fmt.Fprintf(buf, "\t%s\n", op); err != nil {
+			log.Fatal(err)
+		}
+	}
+	if _, err := buf.Write([]byte(")\n\n")); err != nil {
+		log.Fatal(err)
+	}
+
+	// 2. write opstr
+	if _, err := buf.Write([]byte(opstrSec)); err != nil {
+		log.Fatal(err)
+	}
+	for _, op := range ops {
+		if _, err := fmt.Fprintf(buf, "\t%s\n", opstrs[op]); err != nil {
+			log.Fatal(err)
+		}
+	}
+	if _, err := buf.Write([]byte("}\n\n")); err != nil {
+		log.Fatal(err)
+	}
+
+	// 3. write instFormatComment and instFormat
+	if _, err := buf.Write([]byte(instFormatsSec)); err != nil {
+		log.Fatal(err)
+	}
+	for _, op := range ops {
+		if _, err := fmt.Fprintf(buf, "\t%s\n\t%s\n", instFormatComments[op], instFormats[op]); err != nil {
+			log.Fatal(err)
+		}
+	}
+	if _, err = buf.Write([]byte("}\n")); err != nil {
+		log.Fatal(err)
+	}
+
+	if err := buf.Flush(); err != nil {
+		log.Fatal(err)
+	}
+
+	if err := fileTables.Close(); err != nil {
+		log.Fatal(err)
+	}
+}
+
+func genInst(words []string) {
+	op := strings.ToUpper(strings.Replace(words[0], ".", "_", -1))
+	opstr := fmt.Sprintf("%s:\t\"%s\",", op, strings.ToUpper(words[0]))
+
+	var value uint32
+	var mask uint32
+	var instArgs []string
+
+	for i := 1; i < len(words); i++ {
+		if strings.Contains(words[i], "=") {
+			val := strings.Split(words[i], "=")
+			sec := strings.Split(val[0], "..")
+			if len(sec) < 2 {
+				sec[0] = val[0]
+			}
+			subval, submsk := genValueAndMask(val, sec)
+			value |= subval
+			mask |= submsk
+		} else if len(words[i]) > 0 {
+			instArgs = append(instArgs, words[i])
+		}
+	}
+
+	instArgsStr := inferFormats(instArgs, op)
+	instFormatComment := "// " + strings.Replace(op, "_", ".", -1) + " " + strings.Replace(instArgsStr, "arg_", "", -1)
+	instFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: instArgs{%s}},", mask, value, op, instArgsStr)
+
+	// Handle the suffix of atomic instruction.
+	if isAtomic(op) {
+		suffix := []string{"", ".RL", ".AQ", ".AQRL"}
+		// Re-generate the opcode string, opcode value and mask.
+		for i, suf := range suffix {
+			aop := op + strings.Replace(suf, ".", "_", -1)
+			aopstr := fmt.Sprintf("%s:\t\"%s\",", aop, strings.ToUpper(words[0])+suf)
+			avalue := value | (uint32(i) << 25)
+			amask := mask | 0x06000000
+			ainstFormatComment := "// " + strings.Replace(aop, "_", ".", -1) + " " + strings.Replace(instArgsStr, "arg_", "", -1)
+			ainstFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: instArgs{%s}},", amask, avalue, aop, instArgsStr)
+			ops = append(ops, aop)
+			opstrs[aop] = aopstr
+			instFormats[aop] = ainstFormat
+			instFormatComments[aop] = ainstFormatComment
+		}
+	} else {
+		ops = append(ops, op)
+		opstrs[op] = opstr
+		instFormats[op] = instFormat
+		instFormatComments[op] = instFormatComment
+	}
+}
+
+// inferFormats identifies inst format:
+// R-Type (inst rd, rs1, rs2),
+// I-Type (inst rd, rs1, imm / inst rd, offset(rs1)),
+// UJ-Type (inst rd, imm),
+// U-Type (inst rd, imm),
+// SB-Type (inst rs1, rs2, offset)
+// S-Type (inst rs2, offset(rs1))
+func inferFormats(instArgs []string, op string) string {
+	switch {
+	case strings.Contains(op, "AMO") || strings.Contains(op, "SC_"):
+		return "arg_rd, arg_rs2, arg_rs1_amo"
+
+	case strings.Contains(op, "LR_"):
+		return "arg_rd, arg_rs1_amo"
+
+	case op == "LB" || op == "LBU" || op == "LD" ||
+		op == "LH" || op == "LHU" || op == "LW" || op == "LWU":
+		return "arg_rd, arg_rs1_mem"
+
+	case op == "FLD" || op == "FLW" || op == "FLH" || op == "FLQ":
+		return "arg_fd, arg_rs1_mem"
+
+	case op == "FSD" || op == "FSW" || op == "FSH" || op == "FSQ":
+		return "arg_fs2, arg_rs1_store"
+
+	case op == "SD" || op == "SB" || op == "SW" || op == "SH":
+		return "arg_rs2, arg_rs1_store"
+
+	case op == "CSRRW" || op == "CSRRS" || op == "CSRRC":
+		return "arg_rd, arg_csr, arg_rs1"
+
+	case op == "CSRRWI" || op == "CSRRSI" || op == "CSRRCI":
+		return "arg_rd, arg_csr, arg_zimm"
+
+	case op == "JALR":
+		return "arg_rd, arg_rs1_mem"
+
+	case op == "FENCE_I":
+		return ""
+
+	case op == "FENCE":
+		return "arg_pred, arg_succ"
+
+	default:
+		var instStr []string
+		for _, arg := range instArgs {
+			if decodeArgs(arg, op) != "" {
+				instStr = append(instStr, decodeArgs(arg, op))
+			}
+		}
+		return strings.Join(instStr, ", ")
+	}
+}
+
+// decodeArgs turns the args into formats defined in arg.go
+func decodeArgs(arg string, op string) string {
+	switch {
+	case strings.Contains("arg_rd", arg):
+		if isFloatReg(op, "rd") || strings.Contains(op, "C_FLDSP") {
+			return "arg_fd"
+		}
+		return "arg_rd"
+
+	case strings.Contains("arg_rs1", arg):
+		if isFloatReg(op, "rs") {
+			return "arg_fs1"
+		}
+		return "arg_rs1"
+
+	case strings.Contains("arg_rs2", arg):
+		if isFloatReg(op, "rs") {
+			return "arg_fs2"
+		}
+		return "arg_rs2"
+
+	case strings.Contains("arg_rs3", arg):
+		if isFloatReg(op, "rs") {
+			return "arg_fs3"
+		}
+		return "arg_rs3"
+
+	case arg == "imm12":
+		return "arg_imm12"
+
+	case arg == "imm20":
+		return "arg_imm20"
+
+	case arg == "jimm20":
+		return "arg_jimm20"
+
+	case arg == "bimm12lo":
+		return "arg_bimm12"
+
+	case arg == "imm12lo":
+		return "arg_simm12"
+
+	case arg == "shamtw":
+		return "arg_shamt5"
+
+	case arg == "shamtd":
+		return "arg_shamt6"
+
+	case arg == "rd_p":
+		if strings.Contains(op, "C_FLD") {
+			return "arg_fd_p"
+		}
+		return "arg_rd_p"
+
+	case arg == "rs1_p":
+		return "arg_rs1_p"
+
+	case arg == "rd_rs1_p":
+		return "arg_rd_rs1_p"
+
+	case arg == "rs2_p":
+		if strings.Contains(op, "C_FSD") {
+			return "arg_fs2_p"
+		}
+		return "arg_rs2_p"
+
+	case arg == "rd_n0":
+		return "arg_rd_n0"
+
+	case arg == "rs1_n0":
+		return "arg_rs1_n0"
+
+	case arg == "rd_rs1_n0":
+		return "arg_rd_rs1_n0"
+
+	case arg == "c_rs1_n0":
+		return "arg_c_rs1_n0"
+
+	case arg == "c_rs2_n0":
+		return "arg_c_rs2_n0"
+
+	case arg == "c_rs2":
+		if strings.Contains(op, "C_FSDSP") {
+			return "arg_c_fs2"
+		}
+		return "arg_c_rs2"
+
+	case arg == "rd_n2":
+		return "arg_rd_n2"
+
+	case arg == "c_imm6lo":
+		return "arg_c_imm6"
+
+	case arg == "c_nzimm6lo":
+		return "arg_c_nzimm6"
+
+	case arg == "c_nzuimm6lo":
+		return "arg_c_nzuimm6"
+
+	case arg == "c_uimm7lo":
+		return "arg_c_uimm7"
+
+	case arg == "c_uimm8lo":
+		return "arg_c_uimm8"
+
+	case arg == "c_uimm8sp_s":
+		return "arg_c_uimm8sp_s"
+
+	case arg == "c_uimm8splo":
+		return "arg_c_uimm8sp"
+
+	case arg == "c_uimm9sp_s":
+		return "arg_c_uimm9sp_s"
+
+	case arg == "c_uimm9splo":
+		return "arg_c_uimm9sp"
+
+	case arg == "c_bimm9lo":
+		return "arg_c_bimm9"
+
+	case arg == "c_nzimm10lo":
+		return "arg_c_nzimm10"
+
+	case arg == "c_nzuimm10":
+		return "arg_c_nzuimm10"
+
+	case arg == "c_imm12":
+		return "arg_c_imm12"
+
+	case arg == "c_nzimm18lo":
+		return "arg_c_nzimm18"
+	}
+	return ""
+}
+
+// genValueAndMask generates instruction value and relative mask.
+func genValueAndMask(valStr []string, secStr []string) (uint32, uint32) {
+	var val int64
+
+	val, err := strconv.ParseInt(valStr[1], 0, 32)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	l, err := strconv.Atoi(secStr[0])
+	if err != nil {
+		log.Fatal(err)
+	}
+	var r int
+	if len(secStr) == 1 {
+		r = l
+	} else {
+		r, err = strconv.Atoi(secStr[1])
+		if err != nil {
+			log.Fatal(err)
+		}
+	}
+
+	subval := uint32(val << r)
+	submsk := ^uint32(0) << (31 - l) >> (31 - l + r) << r
+	return subval, submsk
+}
+
+// isAtomic reports whether the instruction is atomic.
+func isAtomic(op string) bool {
+	return strings.HasPrefix(op, "AMO") || strings.HasPrefix(op, "LR_") || strings.HasPrefix(op, "SC_")
+}
+
+// isFloatReg reports whether the register of a floating point instruction is a floating point register.
+func isFloatReg(op string, reg string) bool {
+	switch {
+	case strings.Contains(op, "FADD") || strings.Contains(op, "FSUB") ||
+		strings.Contains(op, "FDIV") || strings.Contains(op, "FMUL") ||
+		strings.Contains(op, "FMIN") || strings.Contains(op, "FMAX") ||
+		strings.Contains(op, "FMADD") || strings.Contains(op, "FMSUB") ||
+		strings.Contains(op, "FCVT_D_S") || strings.Contains(op, "FCVT_S_D") ||
+		strings.Contains(op, "FCVT_D_Q") || strings.Contains(op, "FCVT_Q_D") ||
+		strings.Contains(op, "FCVT_S_Q") || strings.Contains(op, "FCVT_Q_S") ||
+		strings.Contains(op, "FCVT_H_S") || strings.Contains(op, "FCVT_S_H") ||
+		strings.Contains(op, "FNM") || strings.Contains(op, "FNEG") ||
+		strings.Contains(op, "FSQRT") || strings.Contains(op, "FSGNJ"):
+		return true
+
+	case strings.Contains(op, "FCLASS") || strings.Contains(op, "FCVT_L") ||
+		strings.Contains(op, "FCVT_W") || strings.Contains(op, "FEQ") ||
+		strings.Contains(op, "FLE") || strings.Contains(op, "FLT") ||
+		strings.Contains(op, "FMV_X_H") || strings.Contains(op, "FMV_X_D") ||
+		strings.Contains(op, "FMV_X_W"):
+		return reg != "rd"
+
+	case strings.Contains(op, "FCVT_D") || strings.Contains(op, "FCVT_S") ||
+		strings.Contains(op, "FCVT_H") || strings.Contains(op, "FCVT_Q") ||
+		strings.Contains(op, "FMV_H_X") || strings.Contains(op, "FMV_D_X") ||
+		strings.Contains(op, "FMV_W_X"):
+		return reg != "rs"
+
+	default:
+		return false
+	}
+}

From 93cb9f839e50a5d87c483533133abe1488cc1c7a Mon Sep 17 00:00:00 2001
From: limeidan <limeidan@loongson.cn>
Date: Sat, 16 Oct 2021 15:24:32 +0800
Subject: [PATCH 027/200] loong64: Implement loong64 GNU and plan9 format
 disassembler

Loong64 documentation: https://github.com/loongson/LoongArch-Documentation.git

Change-Id: Iff47bdcfc787f69361be510bc4784fe91e10431c
Co-authored-by: huangqiqi <huangqiqi@loongson.cn>
Co-authored-by: chenguoqi <chenguoqi@loongson.cn>
Reviewed-on: https://go-review.googlesource.com/c/arch/+/358854
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
Reviewed-by: Zxilly Chou <zxilly@outlook.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 loong64/loong64asm/arg.go                  |   93 ++
 loong64/loong64asm/decode.go               |  269 ++++
 loong64/loong64asm/decode_test.go          |   76 +
 loong64/loong64asm/ext_test.go             |  405 +++++
 loong64/loong64asm/gnu.go                  |   16 +
 loong64/loong64asm/inst.go                 |  298 ++++
 loong64/loong64asm/objdump_test.go         |  145 ++
 loong64/loong64asm/objdumpext_test.go      |  249 +++
 loong64/loong64asm/plan9x.go               |  536 +++++++
 loong64/loong64asm/tables.go               | 1613 ++++++++++++++++++++
 loong64/loong64asm/testdata/gnucases.txt   |  415 +++++
 loong64/loong64asm/testdata/plan9cases.txt |  365 +++++
 loong64/loong64spec/spec.go                |  528 +++++++
 13 files changed, 5008 insertions(+)
 create mode 100644 loong64/loong64asm/arg.go
 create mode 100644 loong64/loong64asm/decode.go
 create mode 100644 loong64/loong64asm/decode_test.go
 create mode 100644 loong64/loong64asm/ext_test.go
 create mode 100644 loong64/loong64asm/gnu.go
 create mode 100644 loong64/loong64asm/inst.go
 create mode 100644 loong64/loong64asm/objdump_test.go
 create mode 100644 loong64/loong64asm/objdumpext_test.go
 create mode 100644 loong64/loong64asm/plan9x.go
 create mode 100644 loong64/loong64asm/tables.go
 create mode 100644 loong64/loong64asm/testdata/gnucases.txt
 create mode 100644 loong64/loong64asm/testdata/plan9cases.txt
 create mode 100644 loong64/loong64spec/spec.go

diff --git a/loong64/loong64asm/arg.go b/loong64/loong64asm/arg.go
new file mode 100644
index 00000000..460af3d1
--- /dev/null
+++ b/loong64/loong64asm/arg.go
@@ -0,0 +1,93 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64asm
+
+// Naming for Go decoder arguments:
+//
+// - arg_fd: a Floating Point operand register fd encoded in the fd[4:0] field
+//
+// - arg_fj: a Floating Point operand register fj encoded in the fj[9:5] field
+//
+// - arg_fk: a Floating Point operand register fk encoded in the fk[14:10] field
+//
+// - arg_fa: a Floating Point operand register fa encoded in the fa[19:15] field
+//
+// - arg_rd: a general-purpose register rd encoded in the rd[4:0] field
+//
+// - arg_rj: a general-purpose register rj encoded in the rj[9:5] field
+//
+// - arg_rk: a general-purpose register rk encoded in the rk[14:10] field
+//
+// - arg_fcsr_4_0: float control status register encoded in [4:0] field
+//
+// - arg_cd_2_0: condition flag register encoded in [2:0] field
+//
+// - arg_sa2_16_15: shift bits constant encoded in [16:15] field
+//
+// - arg_code_14_0: arg for exception process routine encoded in [14:0] field
+//
+// - arg_ui5_14_10: 5bits unsigned immediate
+//
+// - arg_lsbw: For details, please refer to chapter 2.2.3.8 of instruction manual
+//
+// - arg_msbw: For details, please refer to chapter 2.2.3.9 of instruction manual
+//
+// - arg_hint_4_0: hint field implied the prefetch type and the data should fetch to cache's level
+//		0: load to data cache level 1
+//		8: store to data cache level 1
+//		other: no define
+//
+// - arg_si12_21_10: 12bits signed immediate
+
+type instArg uint16
+
+const (
+	_ instArg = iota
+	// 1-5
+	arg_fd
+	arg_fj
+	arg_fk
+	arg_fa
+	arg_rd
+	// 6-10
+	arg_rj
+	arg_rk
+	arg_op_4_0
+	arg_fcsr_4_0
+	arg_fcsr_9_5
+	// 11-15
+	arg_csr_23_10
+	arg_cd
+	arg_cj
+	arg_ca
+	arg_sa2_16_15
+	// 16-20
+	arg_sa3_17_15
+	arg_code_4_0
+	arg_code_14_0
+	arg_ui5_14_10
+	arg_ui6_15_10
+	// 21-25
+	arg_ui12_21_10
+	arg_lsbw
+	arg_msbw
+	arg_lsbd
+	arg_msbd
+	// 26-30
+	arg_hint_4_0
+	arg_hint_14_0
+	arg_level_14_0
+	arg_level_17_10
+	arg_seq_17_10
+	// 31-35
+	arg_si12_21_10
+	arg_si14_23_10
+	arg_si16_25_10
+	arg_si20_24_5
+	arg_offset_20_0
+	// 36~
+	arg_offset_25_0
+	arg_offset_15_0
+)
diff --git a/loong64/loong64asm/decode.go b/loong64/loong64asm/decode.go
new file mode 100644
index 00000000..3aca0074
--- /dev/null
+++ b/loong64/loong64asm/decode.go
@@ -0,0 +1,269 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64asm
+
+import (
+	"encoding/binary"
+	"fmt"
+)
+
+type instArgs [5]instArg
+
+// An instFormat describes the format of an instruction encoding.
+type instFormat struct {
+	mask  uint32
+	value uint32
+	op    Op
+	// args describe how to decode the instruction arguments.
+	// args is stored as a fixed-size array.
+	// if there are fewer than len(args) arguments, args[i] == 0 marks
+	// the end of the argument list.
+	args instArgs
+}
+
+var (
+	errShort   = fmt.Errorf("truncated instruction")
+	errUnknown = fmt.Errorf("unknown instruction")
+)
+
+var decoderCover []bool
+
+func init() {
+	decoderCover = make([]bool, len(instFormats))
+}
+
+// Decode decodes the 4 bytes in src as a single instruction.
+func Decode(src []byte) (inst Inst, err error) {
+	if len(src) < 4 {
+		return Inst{}, errShort
+	}
+
+	x := binary.LittleEndian.Uint32(src)
+
+Search:
+	for i := range instFormats {
+		f := &instFormats[i]
+
+		if (x & f.mask) != f.value {
+			continue
+		}
+
+		// Decode args.
+		var args Args
+		for j, aop := range f.args {
+			if aop == 0 {
+				break
+			}
+
+			arg := decodeArg(aop, x, i)
+			if arg == nil {
+				// Cannot decode argument
+				continue Search
+			}
+
+			args[j] = arg
+		}
+
+		decoderCover[i] = true
+		inst = Inst{
+			Op:   f.op,
+			Args: args,
+			Enc:  x,
+		}
+		return inst, nil
+	}
+
+	return Inst{}, errUnknown
+}
+
+// decodeArg decodes the arg described by aop from the instruction bits x.
+// It returns nil if x cannot be decoded according to aop.
+func decodeArg(aop instArg, x uint32, index int) Arg {
+	switch aop {
+	case arg_fd:
+		return F0 + Reg(x&((1<<5)-1))
+
+	case arg_fj:
+		return F0 + Reg((x>>5)&((1<<5)-1))
+
+	case arg_fk:
+		return F0 + Reg((x>>10)&((1<<5)-1))
+
+	case arg_fa:
+		return F0 + Reg((x>>15)&((1<<5)-1))
+
+	case arg_rd:
+		return R0 + Reg(x&((1<<5)-1))
+
+	case arg_rj:
+		return R0 + Reg((x>>5)&((1<<5)-1))
+
+	case arg_rk:
+		return R0 + Reg((x>>10)&((1<<5)-1))
+
+	case arg_fcsr_4_0:
+		return FCSR0 + Fcsr(x&((1<<5)-1))
+
+	case arg_fcsr_9_5:
+		return FCSR0 + Fcsr((x>>5)&((1<<5)-1))
+
+	case arg_cd:
+		return FCC0 + Fcc(x&((1<<3)-1))
+
+	case arg_cj:
+		return FCC0 + Fcc((x>>5)&((1<<3)-1))
+
+	case arg_ca:
+		return FCC0 + Fcc((x>>15)&((1<<3)-1))
+
+	case arg_op_4_0:
+		tmp := x & ((1 << 5) - 1)
+		return Uimm{tmp, false}
+
+	case arg_csr_23_10:
+		tmp := (x >> 10) & ((1 << 14) - 1)
+		return Uimm{tmp, false}
+
+	case arg_sa2_16_15:
+		f := &instFormats[index]
+		tmp := SaSimm((x >> 15) & ((1 << 2) - 1))
+		if (f.op == ALSL_D) || (f.op == ALSL_W) || (f.op == ALSL_WU) {
+			return tmp + 1
+		} else {
+			return tmp + 0
+		}
+
+	case arg_sa3_17_15:
+		return SaSimm((x >> 15) & ((1 << 3) - 1))
+
+	case arg_code_4_0:
+		return CodeSimm(x & ((1 << 5) - 1))
+
+	case arg_code_14_0:
+		return CodeSimm(x & ((1 << 15) - 1))
+
+	case arg_ui5_14_10:
+		tmp := (x >> 10) & ((1 << 5) - 1)
+		return Uimm{tmp, false}
+
+	case arg_ui6_15_10:
+		tmp := (x >> 10) & ((1 << 6) - 1)
+		return Uimm{tmp, false}
+
+	case arg_ui12_21_10:
+		tmp := ((x >> 10) & ((1 << 12) - 1) & 0xfff)
+		return Uimm{tmp, false}
+
+	case arg_lsbw:
+		tmp := (x >> 10) & ((1 << 5) - 1)
+		return Uimm{tmp, false}
+
+	case arg_msbw:
+		tmp := (x >> 16) & ((1 << 5) - 1)
+		return Uimm{tmp, false}
+
+	case arg_lsbd:
+		tmp := (x >> 10) & ((1 << 6) - 1)
+		return Uimm{tmp, false}
+
+	case arg_msbd:
+		tmp := (x >> 16) & ((1 << 6) - 1)
+		return Uimm{tmp, false}
+
+	case arg_hint_4_0:
+		tmp := x & ((1 << 5) - 1)
+		return Uimm{tmp, false}
+
+	case arg_hint_14_0:
+		tmp := x & ((1 << 15) - 1)
+		return Uimm{tmp, false}
+
+	case arg_level_14_0:
+		tmp := x & ((1 << 15) - 1)
+		return Uimm{tmp, false}
+
+	case arg_level_17_10:
+		tmp := (x >> 10) & ((1 << 8) - 1)
+		return Uimm{tmp, false}
+
+	case arg_seq_17_10:
+		tmp := (x >> 10) & ((1 << 8) - 1)
+		return Uimm{tmp, false}
+
+	case arg_si12_21_10:
+		var tmp int16
+
+		// no int12, so sign-extend a 12-bit signed to 16-bit signed
+		if (x & 0x200000) == 0x200000 {
+			tmp = int16(((x >> 10) & ((1 << 12) - 1)) | 0xf000)
+		} else {
+			tmp = int16(((x >> 10) & ((1 << 12) - 1)) | 0x0000)
+		}
+		return Simm16{tmp, 12}
+
+	case arg_si14_23_10:
+		var tmp int32
+		if (x & 0x800000) == 0x800000 {
+			tmp = int32((((x >> 10) & ((1 << 14) - 1)) << 2) | 0xffff0000)
+		} else {
+			tmp = int32((((x >> 10) & ((1 << 14) - 1)) << 2) | 0x00000000)
+		}
+		return Simm32{tmp, 14}
+
+	case arg_si16_25_10:
+		var tmp int32
+
+		if (x & 0x2000000) == 0x2000000 {
+			tmp = int32(((x >> 10) & ((1 << 16) - 1)) | 0xffff0000)
+		} else {
+			tmp = int32(((x >> 10) & ((1 << 16) - 1)) | 0x00000000)
+		}
+
+		return Simm32{tmp, 16}
+
+	case arg_si20_24_5:
+		var tmp int32
+		if (x & 0x1000000) == 0x1000000 {
+			tmp = int32(((x >> 5) & ((1 << 20) - 1)) | 0xfff00000)
+		} else {
+			tmp = int32(((x >> 5) & ((1 << 20) - 1)) | 0x00000000)
+		}
+		return Simm32{tmp, 20}
+
+	case arg_offset_20_0:
+		var tmp int32
+
+		if (x & 0x10) == 0x10 {
+			tmp = int32(((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 21) - 1)) << 2) | 0xff800000)
+		} else {
+			tmp = int32((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 21) - 1)) << 2)
+		}
+
+		return OffsetSimm{tmp, 21}
+
+	case arg_offset_15_0:
+		var tmp int32
+		if (x & 0x2000000) == 0x2000000 {
+			tmp = int32((((x >> 10) & ((1 << 16) - 1)) << 2) | 0xfffc0000)
+		} else {
+			tmp = int32((((x >> 10) & ((1 << 16) - 1)) << 2) | 0x00000000)
+		}
+
+		return OffsetSimm{tmp, 16}
+
+	case arg_offset_25_0:
+		var tmp int32
+
+		if (x & 0x200) == 0x200 {
+			tmp = int32(((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 26) - 1)) << 2) | 0xf0000000)
+		} else {
+			tmp = int32(((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 26) - 1)) << 2) | 0x00000000)
+		}
+
+		return OffsetSimm{tmp, 26}
+	default:
+		return nil
+	}
+}
diff --git a/loong64/loong64asm/decode_test.go b/loong64/loong64asm/decode_test.go
new file mode 100644
index 00000000..74a32773
--- /dev/null
+++ b/loong64/loong64asm/decode_test.go
@@ -0,0 +1,76 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64asm
+
+import (
+	"encoding/hex"
+	"io/ioutil"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func testDecode(t *testing.T, syntax string) {
+	input := filepath.Join("testdata", syntax+"cases.txt")
+	data, err := ioutil.ReadFile(input)
+	if err != nil {
+		t.Fatal(err)
+	}
+	all := string(data)
+	for strings.Contains(all, "\t\t") {
+		all = strings.Replace(all, "\t\t", "\t", -1)
+	}
+	for _, line := range strings.Split(all, "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.SplitN(line, "\t", 2)
+		i := strings.Index(f[0], "|")
+		if i < 0 {
+			t.Errorf("parsing %q: missing | separator", f[0])
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("parsing %q: misaligned | separator", f[0])
+		}
+		code, err := hex.DecodeString(f[0][:i] + f[0][i+1:])
+		if err != nil {
+			t.Errorf("parsing %q: %v", f[0], err)
+			continue
+		}
+		asm := f[1]
+		inst, decodeErr := Decode(code)
+		if decodeErr != nil && decodeErr != errUnknown {
+			// Some rarely used system instructions are not supported
+			// Following logicals will filter such unknown instructions
+			t.Errorf("parsing %x: %s", code, decodeErr)
+			continue
+		}
+		var out string
+		switch syntax {
+		case "gnu":
+			out = GNUSyntax(inst)
+		case "plan9":
+			out = GoSyntax(inst, 0, nil)
+		default:
+			t.Errorf("unknown syntax %q", syntax)
+			continue
+		}
+
+		// var out string
+		if asm != out || len(asm) != len(out) {
+			t.Errorf("Decode(%s) [%s] = %s want %s", f[0], syntax, out, asm)
+		}
+	}
+}
+
+func TestDecodeGNUSyntax(t *testing.T) {
+	testDecode(t, "gnu")
+}
+
+func TestDecodeGoSyntax(t *testing.T) {
+	testDecode(t, "plan9")
+}
diff --git a/loong64/loong64asm/ext_test.go b/loong64/loong64asm/ext_test.go
new file mode 100644
index 00000000..5c90586e
--- /dev/null
+++ b/loong64/loong64asm/ext_test.go
@@ -0,0 +1,405 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Support for testing against external disassembler program.
+
+package loong64asm
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/hex"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"math/rand"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+var (
+	dumpTest = flag.Bool("dump", false, "dump all encodings")
+	mismatch = flag.Bool("mismatch", false, "log allowed mismatches")
+	keep     = flag.Bool("keep", false, "keep object files around")
+	debug    = false
+)
+
+// An ExtInst represents a single decoded instruction parsed
+// from an external disassembler's output.
+type ExtInst struct {
+	addr uint64
+	enc  [4]byte
+	nenc int
+	text string
+}
+
+func (r ExtInst) String() string {
+	return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
+}
+
+// An ExtDis is a connection between an external disassembler and a test.
+type ExtDis struct {
+	Dec  chan ExtInst
+	File *os.File
+	Size int
+	Cmd  *exec.Cmd
+}
+
+// Run runs the given command - the external disassembler - and returns
+// a buffered reader of its standard output.
+func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
+	if *keep {
+		log.Printf("%s\n", strings.Join(cmd, " "))
+	}
+	ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
+	out, err := ext.Cmd.StdoutPipe()
+	if err != nil {
+		return nil, fmt.Errorf("stdoutpipe: %v", err)
+	}
+	if err := ext.Cmd.Start(); err != nil {
+		return nil, fmt.Errorf("exec: %v", err)
+	}
+
+	b := bufio.NewReaderSize(out, 1<<20)
+	return b, nil
+}
+
+// Wait waits for the command started with Run to exit.
+func (ext *ExtDis) Wait() error {
+	return ext.Cmd.Wait()
+}
+
+// testExtDis tests a set of byte sequences against an external disassembler.
+// The disassembler is expected to produce the given syntax and run
+// in the given architecture mode (16, 32, or 64-bit).
+// The extdis function must start the external disassembler
+// and then parse its output, sending the parsed instructions on ext.Dec.
+// The generate function calls its argument f once for each byte sequence
+// to be tested. The generate function itself will be called twice, and it must
+// make the same sequence of calls to f each time.
+// When a disassembly does not match the internal decoding,
+// allowedMismatch determines whether this mismatch should be
+// allowed, or else considered an error.
+func testExtDis(
+	t *testing.T,
+	syntax string,
+	extdis func(ext *ExtDis) error,
+	generate func(f func([]byte)),
+	allowedMismatch func(text string, inst *Inst, dec ExtInst) bool,
+) {
+	start := time.Now()
+	ext := &ExtDis{
+		Dec: make(chan ExtInst),
+	}
+	errc := make(chan error)
+
+	// First pass: write instructions to input file for external disassembler.
+	file, f, size, err := writeInst(generate)
+	if err != nil {
+		t.Fatal(err)
+	}
+	ext.Size = size
+	ext.File = f
+	defer func() {
+		f.Close()
+		if !*keep {
+			os.Remove(file)
+		}
+	}()
+
+	// Second pass: compare disassembly against our decodings.
+	var (
+		totalTests  = 0
+		totalSkips  = 0
+		totalErrors = 0
+
+		errors = make([]string, 0, 100) // Sampled errors, at most cap
+	)
+	go func() {
+		errc <- extdis(ext)
+	}()
+
+	generate(func(enc []byte) {
+		dec, ok := <-ext.Dec
+		if !ok {
+			t.Errorf("decoding stream ended early")
+			return
+		}
+		inst, text := disasm(syntax, pad(enc))
+
+		totalTests++
+		if *dumpTest {
+			fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
+		}
+
+		if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" {
+			suffix := ""
+			if allowedMismatch(text, &inst, dec) {
+				totalSkips++
+				if !*mismatch {
+					return
+				}
+				suffix += " (allowed mismatch)"
+			}
+			totalErrors++
+			cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix)
+
+			if len(errors) >= cap(errors) {
+				j := rand.Intn(totalErrors)
+				if j >= cap(errors) {
+					return
+				}
+				errors = append(errors[:j], errors[j+1:]...)
+			}
+			errors = append(errors, cmp)
+		}
+	})
+
+	if *mismatch {
+		totalErrors -= totalSkips
+	}
+
+	fmt.Printf("totalTest: %d total skip: %d total error: %d\n", totalTests, totalSkips, totalErrors)
+
+	// Here are some errors about mismatches(44)
+	for _, b := range errors {
+		t.Log(b)
+	}
+
+	if totalErrors > 0 {
+		t.Fail()
+	}
+	t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
+	t.Logf("decoder coverage: %.1f%%;\n", decodeCoverage())
+}
+
+// Start address of text.
+const start = 0x8000
+
+// writeInst writes the generated byte sequences to a new file
+// starting at offset start. That file is intended to be the input to
+// the external disassembler.
+func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
+	f, err = ioutil.TempFile("", "loong64asm")
+	if err != nil {
+		return
+	}
+
+	file = f.Name()
+
+	f.Seek(start, io.SeekStart)
+	w := bufio.NewWriter(f)
+	defer w.Flush()
+	size = 0
+	generate(func(x []byte) {
+		if debug {
+			fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
+		}
+		w.Write(x)
+		w.Write(zeros[len(x):])
+		size += len(zeros)
+	})
+	return file, f, size, nil
+}
+
+var zeros = []byte{0, 0, 0, 0}
+
+// pad pads the code sequence with pops.
+func pad(enc []byte) []byte {
+	if len(enc) < 4 {
+		enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...)
+	}
+	return enc
+}
+
+// disasm returns the decoded instruction and text
+// for the given source bytes, using the given syntax and mode.
+func disasm(syntax string, src []byte) (inst Inst, text string) {
+	var err error
+	inst, err = Decode(src)
+	if err != nil {
+		text = "error: " + err.Error()
+		return
+	}
+	text = inst.String()
+	switch syntax {
+	case "gnu":
+		text = GNUSyntax(inst)
+	case "plan9": // [sic]
+		text = GoSyntax(inst, 0, nil)
+	default:
+		text = "error: unknown syntax " + syntax
+	}
+	return
+}
+
+// decodecoverage returns a floating point number denoting the
+// decoder coverage.
+func decodeCoverage() float64 {
+	n := 0
+	for _, t := range decoderCover {
+		if t {
+			n++
+		}
+	}
+	return 100 * float64(1+n) / float64(1+len(decoderCover))
+}
+
+// Helpers for writing disassembler output parsers.
+
+// isHex reports whether b is a hexadecimal character (0-9a-fA-F).
+func isHex(b byte) bool {
+	return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F')
+}
+
+// parseHex parses the hexadecimal byte dump in hex,
+// appending the parsed bytes to raw and returning the updated slice.
+// The returned bool reports whether any invalid hex was found.
+// Spaces and tabs between bytes are okay but any other non-hex is not.
+func parseHex(hex []byte, raw []byte) ([]byte, bool) {
+	hex = bytes.TrimSpace(hex)
+	for j := 0; j < len(hex); {
+		for hex[j] == ' ' || hex[j] == '\t' {
+			j++
+		}
+		if j >= len(hex) {
+			break
+		}
+		if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
+			return nil, false
+		}
+		raw = append(raw, unhex(hex[j])<<4|unhex(hex[j+1]))
+		j += 2
+	}
+	return raw, true
+}
+
+func unhex(b byte) byte {
+	if '0' <= b && b <= '9' {
+		return b - '0'
+	} else if 'A' <= b && b <= 'F' {
+		return b - 'A' + 10
+	} else if 'a' <= b && b <= 'f' {
+		return b - 'a' + 10
+	}
+	return 0
+}
+
+// index is like bytes.Index(s, []byte(t)) but avoids the allocation.
+func index(s []byte, t string) int {
+	i := 0
+	for {
+		j := bytes.IndexByte(s[i:], t[0])
+		if j < 0 {
+			return -1
+		}
+		i = i + j
+		if i+len(t) > len(s) {
+			return -1
+		}
+		for k := 1; k < len(t); k++ {
+			if s[i+k] != t[k] {
+				goto nomatch
+			}
+		}
+		return i
+	nomatch:
+		i++
+	}
+}
+
+// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
+// If s must be rewritten, it is rewritten in place.
+func fixSpace(s []byte) []byte {
+	s = bytes.TrimSpace(s)
+	for i := 0; i < len(s); i++ {
+		if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
+			goto Fix
+		}
+	}
+	return s
+
+Fix:
+	b := s
+	w := 0
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c == '\t' || c == '\n' {
+			c = ' '
+		}
+		if c == ' ' && w > 0 && b[w-1] == ' ' {
+			continue
+		}
+		b[w] = c
+		w++
+	}
+	if w > 0 && b[w-1] == ' ' {
+		w--
+	}
+	return b[:w]
+}
+
+// Generators.
+//
+// The test cases are described as functions that invoke a callback repeatedly,
+// with a new input sequence each time. These helpers make writing those
+// a little easier.
+
+// hexCases generates the cases written in hexadecimal in the encoded string.
+// Spaces in 'encoded' separate entire test cases, not individual bytes.
+func hexCases(t *testing.T, encoded string) func(func([]byte)) {
+	return func(try func([]byte)) {
+		for _, x := range strings.Fields(encoded) {
+			src, err := hex.DecodeString(x)
+			if err != nil {
+				t.Errorf("parsing %q: %v", x, err)
+			}
+			try(src)
+		}
+	}
+}
+
+// testdataCases generates the test cases recorded in testdata/cases.txt.
+// It only uses the inputs; it ignores the answers recorded in that file.
+func testdataCases(t *testing.T, syntax string) func(func([]byte)) {
+	var codes [][]byte
+	input := filepath.Join("testdata", syntax+"cases.txt")
+	data, err := ioutil.ReadFile(input)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, line := range strings.Split(string(data), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.Fields(line)[0]
+		i := strings.Index(f, "|")
+		if i < 0 {
+			t.Errorf("parsing %q: missing | separator", f)
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("parsing %q: misaligned | separator", f)
+		}
+		code, err := hex.DecodeString(f[:i] + f[i+1:])
+		if err != nil {
+			t.Errorf("parsing %q: %v", f, err)
+			continue
+		}
+		codes = append(codes, code)
+	}
+
+	return func(try func([]byte)) {
+		for _, code := range codes {
+			try(code)
+		}
+	}
+}
diff --git a/loong64/loong64asm/gnu.go b/loong64/loong64asm/gnu.go
new file mode 100644
index 00000000..4807abc5
--- /dev/null
+++ b/loong64/loong64asm/gnu.go
@@ -0,0 +1,16 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64asm
+
+import (
+	"strings"
+)
+
+// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils.
+// This form typically matches the syntax defined in the Loong64 Reference Manual. See
+// https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html
+func GNUSyntax(inst Inst) string {
+	return strings.ToLower(inst.String())
+}
diff --git a/loong64/loong64asm/inst.go b/loong64/loong64asm/inst.go
new file mode 100644
index 00000000..1ac5c797
--- /dev/null
+++ b/loong64/loong64asm/inst.go
@@ -0,0 +1,298 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// An Inst is a single instruction.
+type Inst struct {
+	Op   Op     // Opcode mnemonic
+	Enc  uint32 // Raw encoding bits.
+	Args Args   // Instruction arguments, in Loong64 manual order.
+}
+
+func (i Inst) String() string {
+	var op string = i.Op.String()
+	var args []string
+
+	for _, arg := range i.Args {
+		if arg == nil {
+			break
+		}
+		args = append(args, arg.String())
+	}
+
+	switch i.Op {
+	case OR:
+		if i.Args[2].(Reg) == R0 {
+			op = "move"
+			args = args[0:2]
+		}
+
+	case ANDI:
+		if i.Args[0].(Reg) == R0 && i.Args[1].(Reg) == R0 {
+			return "nop"
+		}
+
+	case JIRL:
+		if i.Args[0].(Reg) == R0 && i.Args[2].(OffsetSimm).Imm == 0 {
+			return "jr " + args[1]
+		}
+
+	case BLT:
+		if i.Args[0].(Reg) == R0 {
+			op = "bgtz"
+			args = args[1:]
+		} else if i.Args[1].(Reg) == R0 {
+			op = "bltz"
+			args = append(args[:1], args[2:]...)
+		}
+
+	case BGE:
+		if i.Args[0].(Reg) == R0 {
+			op = "blez"
+			args = args[1:]
+		} else if i.Args[1].(Reg) == R0 {
+			op = "bgez"
+			args = append(args[:1], args[2:]...)
+		}
+	}
+
+	if len(args) == 0 {
+		return op
+	} else {
+		return op + " " + strings.Join(args, ", ")
+	}
+}
+
+// An Op is an Loong64 opcode.
+type Op uint16
+
+// NOTE: The actual Op values are defined in tables.go.
+// They are chosen to simplify instruction decoding and
+// are not a dense packing from 0 to N, although the
+// density is high, probably at least 90%.
+func (op Op) String() string {
+	if (op >= Op(len(opstr))) || (opstr[op] == "") {
+		return fmt.Sprintf("Op(%d)", int(op))
+	}
+
+	return opstr[op]
+}
+
+// An Args holds the instruction arguments.
+// If an instruction has fewer than 5 arguments,
+// the final elements in the array are nil.
+type Args [5]Arg
+
+// An Arg is a single instruction argument
+type Arg interface {
+	String() string
+}
+
+// A Reg is a single register.
+// The zero value denotes R0, not the absence of a register.
+type Reg uint16
+
+const (
+	// General-purpose register
+	R0 Reg = iota
+	R1
+	R2
+	R3
+	R4
+	R5
+	R6
+	R7
+	R8
+	R9
+	R10
+	R11
+	R12
+	R13
+	R14
+	R15
+	R16
+	R17
+	R18
+	R19
+	R20
+	R21
+	R22
+	R23
+	R24
+	R25
+	R26
+	R27
+	R28
+	R29
+	R30
+	R31
+
+	// Float point register
+	F0
+	F1
+	F2
+	F3
+	F4
+	F5
+	F6
+	F7
+	F8
+	F9
+	F10
+	F11
+	F12
+	F13
+	F14
+	F15
+	F16
+	F17
+	F18
+	F19
+	F20
+	F21
+	F22
+	F23
+	F24
+	F25
+	F26
+	F27
+	F28
+	F29
+	F30
+	F31
+)
+
+func (r Reg) String() string {
+	switch {
+	case r == R0:
+		return "$zero"
+
+	case r == R1:
+		return "$ra"
+
+	case r == R2:
+		return "$tp"
+
+	case r == R3:
+		return "$sp"
+
+	case (r >= R4) && (r <= R11):
+		return fmt.Sprintf("$a%d", int(r-R4))
+
+	case (r >= R12) && (r <= R20):
+		return fmt.Sprintf("$t%d", int(r-R12))
+
+	case r == R21:
+		return "$r21"
+
+	case r == R22:
+		return "$fp"
+
+	case (r >= R23) && (r <= R31):
+		return fmt.Sprintf("$s%d", int(r-R23))
+
+	case (r >= F0) && (r <= F7):
+		return fmt.Sprintf("$fa%d", int(r-F0))
+
+	case (r >= F8) && (r <= F23):
+		return fmt.Sprintf("$ft%d", int(r-F8))
+
+	case (r >= F24) && (r <= F31):
+		return fmt.Sprintf("$fs%d", int(r-F24))
+
+	default:
+		return fmt.Sprintf("Unknown(%d)", int(r))
+	}
+}
+
+// float control status register
+type Fcsr uint8
+
+const (
+	FCSR0 Fcsr = iota
+	FCSR1
+	FCSR2
+	FCSR3
+)
+
+func (f Fcsr) String() string {
+	return fmt.Sprintf("$fcsr%d", uint8(f))
+}
+
+// float condition flags register
+type Fcc uint8
+
+const (
+	FCC0 Fcc = iota
+	FCC1
+	FCC2
+	FCC3
+	FCC4
+	FCC5
+	FCC6
+	FCC7
+)
+
+func (f Fcc) String() string {
+	return fmt.Sprintf("$fcc%d", uint8(f))
+}
+
+// An Imm is an integer constant.
+type Uimm struct {
+	Imm     uint32
+	Decimal bool
+}
+
+func (i Uimm) String() string {
+	if i.Decimal == true {
+		return fmt.Sprintf("%d", i.Imm)
+	} else {
+		return fmt.Sprintf("%#x", i.Imm)
+	}
+}
+
+type Simm16 struct {
+	Imm   int16
+	Width uint8
+}
+
+func (si Simm16) String() string {
+	return fmt.Sprintf("%d", int32(si.Imm))
+}
+
+type Simm32 struct {
+	Imm   int32
+	Width uint8
+}
+
+func (si Simm32) String() string {
+	return fmt.Sprintf("%d", int32(si.Imm))
+}
+
+type OffsetSimm struct {
+	Imm   int32
+	Width uint8
+}
+
+func (o OffsetSimm) String() string {
+	return fmt.Sprintf("%d", int32(o.Imm))
+}
+
+type SaSimm int16
+
+func (s SaSimm) String() string {
+	return fmt.Sprintf("%#x", int(s))
+}
+
+type CodeSimm int16
+
+func (c CodeSimm) String() string {
+	return fmt.Sprintf("%#x", int(c))
+}
diff --git a/loong64/loong64asm/objdump_test.go b/loong64/loong64asm/objdump_test.go
new file mode 100644
index 00000000..04766557
--- /dev/null
+++ b/loong64/loong64asm/objdump_test.go
@@ -0,0 +1,145 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64asm
+
+import (
+	"strconv"
+	"strings"
+	"testing"
+)
+
+func TestObjdumpLoong64TestDecodeGNUSyntaxdata(t *testing.T) {
+	testObjdumpLoong64(t, testdataCases(t, "gnu"))
+}
+
+func TestObjdumpLoong64TestDecodeGoSyntaxdata(t *testing.T) {
+	testObjdumpLoong64(t, testdataCases(t, "plan9"))
+}
+
+func TestObjdumpLoong64Manual(t *testing.T) {
+	testObjdumpLoong64(t, hexCases(t, objdumpManualTests))
+}
+
+// objdumpManualTests holds test cases that will be run by TestObjdumpLoong64Manual.
+// If you are debugging a few cases that turned up in a longer run, it can be useful
+// to list them here and then use -run=Manual, particularly with tracing enabled.
+// Note that these are byte sequences, so they must be reversed from the usual
+// word presentation.
+var objdumpManualTests = `
+00007238
+00807238
+00004003
+00100050
+ac410028
+ac41002a
+ac41c028
+ac414028
+ac41402a
+ac418028
+ac41802a
+ac397838
+acb97938
+acb97838
+ac397938
+ac397a38
+acb97b38
+acb97a38
+ac397b38
+ac110026
+ac110024
+ac390038
+ac392038
+ac390c38
+ac390438
+ac392438
+ac390838
+ac392838
+ac391600
+ac391400
+ac391500
+ac418003
+`
+
+// allowedMismatchObjdump reports whether the mismatch between text and dec
+// should be allowed by the test.
+func allowedMismatchObjdump(text string, inst *Inst, dec ExtInst) bool {
+	// GNU objdump use register, decode use alias of register, so corrected it in here
+	var dec_text = strings.Replace(dec.text, " ", ",", -1)
+	var decsp []string = strings.Split(dec_text, ",")
+	var num int = cap(decsp)
+	for i := 0; i < num; i++ {
+		dex := strings.Index(decsp[i], "$r")
+		fdex := strings.Index(decsp[i], "$f")
+		ddex := strings.Index(decsp[i], "(")
+		if ddex > 0 {
+			// ldptr.w $r12,$r13,16(0x10)
+			decsp[i] = decsp[i][0:ddex]
+		}
+		xdex := strings.Index(decsp[i], "0x")
+		// convert registers to registers aliases
+		if dex >= 0 {
+			reg, _ := strconv.Atoi(decsp[i][dex+2:])
+			// r12~r20 $t0~t8
+			if reg >= 12 && reg <= 20 {
+				decsp[i] = strings.Join([]string{"t", strconv.Itoa(reg - 12)}, "")
+			}
+			// r4~r11 $a0~a7
+			if reg >= 4 && reg <= 11 {
+				decsp[i] = strings.Join([]string{"a", strconv.Itoa(reg - 4)}, "")
+			}
+			// r23~r31 $s0~s8
+			if reg >= 23 && reg <= 31 {
+				decsp[i] = strings.Join([]string{"s", strconv.Itoa(reg - 23)}, "")
+			}
+			// r0 zero
+			if reg == 0 {
+				decsp[i] = strings.Join([]string{"zero"}, "")
+			}
+			// r1 ra
+			if reg == 1 {
+				decsp[i] = strings.Join([]string{"ra"}, "")
+			}
+			// r2 tp
+			if reg == 2 {
+				decsp[i] = strings.Join([]string{"tp"}, "")
+			}
+			// r3 sp
+			if reg == 3 {
+				decsp[i] = strings.Join([]string{"sp"}, "")
+			}
+			// r21 x
+			if reg == 21 {
+				decsp[i] = strings.Join([]string{"x"}, "")
+			}
+			// r22 fp
+			if reg == 22 {
+				decsp[i] = strings.Join([]string{"fp"}, "")
+			}
+		}
+		// convert hexadecimal to decimal
+		if xdex >= 0 {
+			parseint, _ := strconv.ParseInt(decsp[i][xdex+2:], 16, 32)
+			decsp[i] = strings.Join([]string{strconv.Itoa(int(parseint))}, "")
+		}
+		// convert floating-point registers to floating-point aliases
+		if fdex >= 0 && !strings.Contains(decsp[i], "$fcc") {
+			freg, _ := strconv.Atoi(decsp[i][fdex+2:])
+			// f0~f7 fa0~fa7
+			if freg >= 0 && freg <= 7 {
+				decsp[i] = strings.Join([]string{"fa", strconv.Itoa(freg - 0)}, "")
+			}
+			// f8~f23 ft0~ft15
+			if freg >= 8 && freg <= 23 {
+				decsp[i] = strings.Join([]string{"ft", strconv.Itoa(freg - 8)}, "")
+			}
+			// f24~f31 fs0~fs7
+			if freg >= 24 && freg <= 31 {
+				decsp[i] = strings.Join([]string{"fs", strconv.Itoa(freg - 24)}, "")
+			}
+		}
+	}
+
+	return false
+}
diff --git a/loong64/loong64asm/objdumpext_test.go b/loong64/loong64asm/objdumpext_test.go
new file mode 100644
index 00000000..80396d99
--- /dev/null
+++ b/loong64/loong64asm/objdumpext_test.go
@@ -0,0 +1,249 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64asm
+
+import (
+	"bytes"
+	"debug/elf"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"os/exec"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+const objdumpPath = "/usr/bin/objdump"
+
+func testObjdumpLoong64(t *testing.T, generate func(func([]byte))) {
+	testObjdumpArch(t, generate)
+}
+
+func testObjdumpArch(t *testing.T, generate func(func([]byte))) {
+	checkObjdumpLoong64(t)
+	testExtDis(t, "gnu", objdump, generate, allowedMismatchObjdump)
+	testExtDis(t, "plan9", objdump, generate, allowedMismatchObjdump)
+}
+
+func checkObjdumpLoong64(t *testing.T) {
+	out, err := exec.Command(objdumpPath, "-i").Output()
+	if err != nil {
+		t.Skipf("cannot run objdump: %v\n%s", err, out)
+	}
+	if !strings.Contains(string(out), "Loongarch64") {
+		t.Skip("objdump does not have loong64 support")
+	}
+}
+
+func objdump(ext *ExtDis) error {
+	// File already written with instructions; add ELF header.
+	if err := writeELF64(ext.File, ext.Size); err != nil {
+		return err
+	}
+
+	b, err := ext.Run(objdumpPath, "-d", "-z", ext.File.Name())
+	if err != nil {
+		return err
+	}
+
+	var (
+		nmatch  int
+		reading bool
+		next    uint64 = start
+		addr    uint64
+		encbuf  [4]byte
+		enc     []byte
+		text    string
+	)
+	flush := func() {
+		if addr == next {
+			// PC-relative addresses are translated to absolute addresses based on PC by GNU objdump
+			// Following logical rewrites the absolute addresses back to PC-relative ones for comparing
+			// with our disassembler output which are PC-relative
+			if text == "undefined" && len(enc) == 4 {
+				text = "error: unknown instruction"
+				enc = nil
+			}
+			if len(enc) == 4 {
+				// prints as word but we want to record bytes
+				enc[0], enc[3] = enc[3], enc[0]
+				enc[1], enc[2] = enc[2], enc[1]
+			}
+			ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
+			encbuf = [4]byte{}
+			enc = nil
+			next += 4
+		}
+	}
+	var textangle = []byte("<.text>:")
+	for {
+		line, err := b.ReadSlice('\n')
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("reading objdump output: %v", err)
+		}
+		if bytes.Contains(line, textangle) {
+			reading = true
+			continue
+		}
+		if !reading {
+			continue
+		}
+		if debug {
+			os.Stdout.Write(line)
+		}
+		if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil {
+			enc = enc1
+			continue
+		}
+		flush()
+		nmatch++
+		addr, enc, text = parseLine(line, encbuf[:0])
+		if addr > next {
+			return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
+		}
+	}
+	flush()
+	if next != start+uint64(ext.Size) {
+		return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
+	}
+	if err := ext.Wait(); err != nil {
+		return fmt.Errorf("exec: %v", err)
+	}
+
+	return nil
+}
+
+var (
+	undefined     = []byte("undefined")
+	unpredictable = []byte("unpredictable")
+	slashslash    = []byte("//")
+)
+
+func parseLine(line []byte, encstart []byte) (addr uint64, enc []byte, text string) {
+	ok := false
+	oline := line
+	i := index(line, ":\t")
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	x, err := strconv.ParseUint(string(bytes.TrimSpace(line[:i])), 16, 32)
+	if err != nil {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	addr = uint64(x)
+	line = line[i+2:]
+	i = bytes.IndexByte(line, '\t')
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	enc, ok = parseHex(line[:i], encstart)
+	if !ok {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	line = bytes.TrimSpace(line[i:])
+	if bytes.Contains(line, undefined) {
+		text = "undefined"
+		return
+	}
+	if false && bytes.Contains(line, unpredictable) {
+		text = "unpredictable"
+		return
+	}
+	// Strip trailing comment starting with '#'
+	if i := bytes.IndexByte(line, '#'); i >= 0 {
+		line = bytes.TrimSpace(line[:i])
+	}
+	// Strip trailing comment starting with "//"
+	if i := bytes.Index(line, slashslash); i >= 0 {
+		line = bytes.TrimSpace(line[:i])
+	}
+	text = string(fixSpace(line))
+	return
+}
+
+func parseContinuation(line []byte, enc []byte) []byte {
+	i := index(line, ":\t")
+	if i < 0 {
+		return nil
+	}
+	line = line[i+1:]
+	enc, _ = parseHex(line, enc)
+	return enc
+}
+
+// writeELF64 writes an ELF64 header to the file, describing a text
+// segment that starts at start (0x8000) and extends for size bytes.
+func writeELF64(f *os.File, size int) error {
+	f.Seek(0, io.SeekStart)
+	var hdr elf.Header64
+	var prog elf.Prog64
+	var sect elf.Section64
+	var buf bytes.Buffer
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	off1 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	off2 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	off3 := buf.Len()
+	buf.Reset()
+	data := byte(elf.ELFDATA2LSB)
+	hdr = elf.Header64{
+		Ident:     [16]byte{0x7F, 'E', 'L', 'F', 2, data, 1},
+		Type:      2,
+		Machine:   uint16(elf.EM_LOONGARCH),
+		Version:   1,
+		Entry:     start,
+		Phoff:     uint64(off1),
+		Shoff:     uint64(off2),
+		Flags:     0x3,
+		Ehsize:    uint16(off1),
+		Phentsize: uint16(off2 - off1),
+		Phnum:     1,
+		Shentsize: uint16(off3 - off2),
+		Shnum:     3,
+		Shstrndx:  2,
+	}
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	prog = elf.Prog64{
+		Type:   1,
+		Off:    start,
+		Vaddr:  start,
+		Paddr:  start,
+		Filesz: uint64(size),
+		Memsz:  uint64(size),
+		Flags:  5,
+		Align:  start,
+	}
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	binary.Write(&buf, binary.LittleEndian, &sect) // NULL section
+	sect = elf.Section64{
+		Name:      1,
+		Type:      uint32(elf.SHT_PROGBITS),
+		Addr:      start,
+		Off:       start,
+		Size:      uint64(size),
+		Flags:     uint64(elf.SHF_ALLOC | elf.SHF_EXECINSTR),
+		Addralign: 4,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect) // .text
+	sect = elf.Section64{
+		Name:      uint32(len("\x00.text\x00")),
+		Type:      uint32(elf.SHT_STRTAB),
+		Addr:      0,
+		Off:       uint64(off2 + (off3-off2)*3),
+		Size:      uint64(len("\x00.text\x00.shstrtab\x00")),
+		Addralign: 1,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	buf.WriteString("\x00.text\x00.shstrtab\x00")
+	f.Write(buf.Bytes())
+	return nil
+}
diff --git a/loong64/loong64asm/plan9x.go b/loong64/loong64asm/plan9x.go
new file mode 100644
index 00000000..5db32903
--- /dev/null
+++ b/loong64/loong64asm/plan9x.go
@@ -0,0 +1,536 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// GoSyntax returns the Go assembler syntax for the instruction.
+// The syntax was originally defined by Plan 9.
+// The pc is the program counter of the instruction, used for
+// expanding PC-relative addresses into absolute ones.
+// The symname function queries the symbol table for the program
+// being disassembled. Given a target address it returns the name
+// and base address of the symbol containing the target, if any;
+// otherwise it returns "", 0.
+func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) string {
+	if symname == nil {
+		symname = func(uint64) (string, uint64) { return "", 0 }
+	}
+	if inst.Op == 0 && inst.Enc == 0 {
+		return "WORD $0"
+	} else if inst.Op == 0 {
+		return "?"
+	}
+
+	var args []string
+	for _, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		args = append(args, plan9Arg(&inst, pc, symname, a))
+	}
+
+	var op string = plan9OpMap[inst.Op]
+	if op == "" {
+		op = "Unknown " + inst.Op.String()
+	}
+
+	switch inst.Op {
+	case BSTRPICK_W, BSTRPICK_D, BSTRINS_W, BSTRINS_D:
+		msbw, lsbw := inst.Args[2].(Uimm), inst.Args[3].(Uimm)
+		if inst.Op == BSTRPICK_D && msbw.Imm == 15 && lsbw.Imm == 0 {
+			op = "MOVHU"
+			args = append(args[1:2], args[0:1]...)
+		} else {
+			args[0], args[1], args[2], args[3] = args[2], args[1], args[3], args[0]
+		}
+
+	case BCNEZ, BCEQZ:
+		args = args[1:2]
+
+	case BEQ, BNE:
+		rj := inst.Args[0].(Reg)
+		rd := inst.Args[1].(Reg)
+		if rj == rd && inst.Op == BEQ {
+			op = "JMP"
+			args = args[2:]
+		} else if rj == R0 {
+			args = args[1:]
+		} else if rd == R0 {
+			args = append(args[:1], args[2:]...)
+		}
+
+	case BEQZ, BNEZ:
+		if inst.Args[0].(Reg) == R0 && inst.Op == BEQ {
+			op = "JMP"
+			args = args[1:]
+		}
+
+	case BLT, BLTU, BGE, BGEU:
+		rj := inst.Args[0].(Reg)
+		rd := inst.Args[1].(Reg)
+		if rj == rd && (inst.Op == BGE || inst.Op == BGEU) {
+			op = "JMP"
+			args = args[2:]
+		} else if rj == R0 {
+			switch inst.Op {
+			case BGE:
+				op = "BLEZ"
+			case BLT:
+				op = "BGTZ"
+			}
+			args = args[1:]
+		} else if rd == R0 {
+			if !strings.HasSuffix(op, "U") {
+				op += "Z"
+			}
+			args = append(args[:1], args[2:]...)
+		}
+
+	case JIRL:
+		rd := inst.Args[0].(Reg)
+		rj := inst.Args[1].(Reg)
+		regno := uint16(rj) & 31
+		if rd == R0 {
+			return fmt.Sprintf("JMP (R%d)", regno)
+		}
+		return fmt.Sprintf("CALL (R%d)", regno)
+
+	case LD_B, LD_H, LD_W, LD_D, LD_BU, LD_HU, LD_WU, LL_W, LL_D,
+		ST_B, ST_H, ST_W, ST_D, SC_W, SC_D, FLD_S, FLD_D, FST_S, FST_D:
+		var off int32
+		switch a := inst.Args[2].(type) {
+		case Simm16:
+			off = signumConvInt32(int32(a.Imm), a.Width)
+		case Simm32:
+			off = signumConvInt32(int32(a.Imm), a.Width) >> 2
+		}
+		Iop := strings.ToUpper(inst.Op.String())
+		if strings.HasPrefix(Iop, "L") || strings.HasPrefix(Iop, "FL") {
+			return fmt.Sprintf("%s %d(%s), %s", op, off, args[1], args[0])
+		}
+		return fmt.Sprintf("%s %s, %d(%s)", op, args[0], off, args[1])
+
+	case LDX_B, LDX_H, LDX_W, LDX_D, LDX_BU, LDX_HU, LDX_WU, FLDX_S, FLDX_D,
+		STX_B, STX_H, STX_W, STX_D, FSTX_S, FSTX_D:
+		Iop := strings.ToUpper(inst.Op.String())
+		if strings.HasPrefix(Iop, "L") || strings.HasPrefix(Iop, "FL") {
+			return fmt.Sprintf("%s (%s)(%s), %s", op, args[1], args[2], args[0])
+		}
+		return fmt.Sprintf("%s %s, (%s)(%s)", op, args[0], args[1], args[2])
+
+	case AMADD_B, AMADD_D, AMADD_DB_B, AMADD_DB_D, AMADD_DB_H, AMADD_DB_W, AMADD_H,
+		AMADD_W, AMAND_D, AMAND_DB_D, AMAND_DB_W, AMAND_W, AMCAS_B, AMCAS_D, AMCAS_DB_B,
+		AMCAS_DB_D, AMCAS_DB_H, AMCAS_DB_W, AMCAS_H, AMCAS_W, AMMAX_D, AMMAX_DB_D,
+		AMMAX_DB_DU, AMMAX_DB_W, AMMAX_DB_WU, AMMAX_DU, AMMAX_W, AMMAX_WU, AMMIN_D,
+		AMMIN_DB_D, AMMIN_DB_DU, AMMIN_DB_W, AMMIN_DB_WU, AMMIN_DU, AMMIN_W, AMMIN_WU,
+		AMOR_D, AMOR_DB_D, AMOR_DB_W, AMOR_W, AMSWAP_B, AMSWAP_D, AMSWAP_DB_B, AMSWAP_DB_D,
+		AMSWAP_DB_H, AMSWAP_DB_W, AMSWAP_H, AMSWAP_W, AMXOR_D, AMXOR_DB_D, AMXOR_DB_W, AMXOR_W:
+		return fmt.Sprintf("%s %s, (%s), %s", op, args[1], args[2], args[0])
+
+	default:
+		// Reverse args, placing dest last
+		for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+			args[i], args[j] = args[j], args[i]
+		}
+		switch len(args) { // Special use cases
+		case 0, 1:
+			if inst.Op != B && inst.Op != BL {
+				return op
+			}
+
+		case 3:
+			switch a0 := inst.Args[0].(type) {
+			case Reg:
+				rj := inst.Args[1].(Reg)
+				if a0 == rj && a0 != R0 {
+					args = args[0:2]
+				}
+			}
+			switch inst.Op {
+			case SUB_W, SUB_D, ADDI_W, ADDI_D, ORI:
+				rj := inst.Args[1].(Reg)
+				if rj == R0 {
+					args = append(args[0:1], args[2:]...)
+					if inst.Op == SUB_W {
+						op = "NEGW"
+					} else if inst.Op == SUB_D {
+						op = "NEGV"
+					} else {
+						op = "MOVW"
+					}
+				}
+
+			case ANDI:
+				ui12 := inst.Args[2].(Uimm)
+				if ui12.Imm == uint32(0xff) {
+					op = "MOVBU"
+					args = args[1:]
+				} else if ui12.Imm == 0 && inst.Args[0].(Reg) == R0 && inst.Args[1].(Reg) == R0 {
+					return "NOOP"
+				}
+
+			case SLL_W, OR:
+				rk := inst.Args[2].(Reg)
+				if rk == R0 {
+					args = args[1:]
+					if inst.Op == SLL_W {
+						op = "MOVW"
+					} else {
+						op = "MOVV"
+					}
+				}
+			}
+		}
+	}
+
+	if args != nil {
+		op += " " + strings.Join(args, ", ")
+	}
+	return op
+}
+
+func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string {
+	// Reg:			gpr[0, 31] and fpr[0, 31]
+	// Fcsr:		fcsr[0, 3]
+	// Fcc:			fcc[0, 7]
+	// Uimm:		unsigned integer constant
+	// Simm16:		si16
+	// Simm32:		si32
+	// OffsetSimm:	si32
+	switch a := arg.(type) {
+	case Reg:
+		regenum := uint16(a)
+		regno := uint16(a) & 0x1f
+		// General-purpose register
+		if regenum >= uint16(R0) && regenum <= uint16(R31) {
+			return fmt.Sprintf("R%d", regno)
+		} else { // Float point register
+			return fmt.Sprintf("F%d", regno)
+		}
+
+	case Fcsr:
+		regno := uint8(a) & 0x1f
+		return fmt.Sprintf("FCSR%d", regno)
+
+	case Fcc:
+		regno := uint8(a) & 0x1f
+		return fmt.Sprintf("FCC%d", regno)
+
+	case Uimm:
+		return fmt.Sprintf("$%d", a.Imm)
+
+	case Simm16:
+		si16 := signumConvInt32(int32(a.Imm), a.Width)
+		return fmt.Sprintf("$%d", si16)
+
+	case Simm32:
+		si32 := signumConvInt32(a.Imm, a.Width)
+		return fmt.Sprintf("$%d", si32)
+
+	case OffsetSimm:
+		offs := offsConvInt32(a.Imm, a.Width)
+		if inst.Op == B || inst.Op == BL {
+			addr := int64(pc) + int64(a.Imm)
+			if s, base := symname(uint64(addr)); s != "" && uint64(addr) == base {
+				return fmt.Sprintf("%s(SB)", s)
+			}
+		}
+		return fmt.Sprintf("%d(PC)", offs>>2)
+
+	case SaSimm:
+		return fmt.Sprintf("$%d", a)
+
+	case CodeSimm:
+		return fmt.Sprintf("$%d", a)
+
+	}
+	return strings.ToUpper(arg.String())
+}
+
+func signumConvInt32(imm int32, width uint8) int32 {
+	active := uint32(1<<width) - 1
+	signum := uint32(imm) & active
+	if ((signum >> (width - 1)) & 0x1) == 1 {
+		signum |= ^active
+	}
+	return int32(signum)
+}
+
+func offsConvInt32(imm int32, width uint8) int32 {
+	relWidth := width + 2
+	return signumConvInt32(imm, relWidth)
+}
+
+var plan9OpMap = map[Op]string{
+	ADD_W:       "ADD",
+	ADD_D:       "ADDV",
+	SUB_W:       "SUB",
+	SUB_D:       "SUBV",
+	ADDI_W:      "ADD",
+	ADDI_D:      "ADDV",
+	LU12I_W:     "LU12IW",
+	LU32I_D:     "LU32ID",
+	LU52I_D:     "LU52ID",
+	SLT:         "SGT",
+	SLTU:        "SGTU",
+	SLTI:        "SGT",
+	SLTUI:       "SGTU",
+	PCADDU12I:   "PCADDU12I",
+	PCALAU12I:   "PCALAU12I",
+	AND:         "AND",
+	OR:          "OR",
+	NOR:         "NOR",
+	XOR:         "XOR",
+	ANDI:        "AND",
+	ORI:         "OR",
+	XORI:        "XOR",
+	MUL_W:       "MUL",
+	MULH_W:      "MULH",
+	MULH_WU:     "MULHU",
+	MUL_D:       "MULV",
+	MULH_D:      "MULHV",
+	MULH_DU:     "MULHVU",
+	DIV_W:       "DIV",
+	DIV_WU:      "DIVU",
+	DIV_D:       "DIVV",
+	DIV_DU:      "DIVVU",
+	MOD_W:       "REM",
+	MOD_WU:      "REMU",
+	MOD_D:       "REMV",
+	MOD_DU:      "REMVU",
+	SLL_W:       "SLL",
+	SRL_W:       "SRL",
+	SRA_W:       "SRA",
+	ROTR_W:      "ROTR",
+	SLL_D:       "SLLV",
+	SRL_D:       "SRLV",
+	SRA_D:       "SRAV",
+	ROTR_D:      "ROTRV",
+	SLLI_W:      "SLL",
+	SRLI_W:      "SRL",
+	SRAI_W:      "SRA",
+	ROTRI_W:     "ROTR",
+	SLLI_D:      "SLLV",
+	SRLI_D:      "SRLV",
+	SRAI_D:      "SRAV",
+	ROTRI_D:     "ROTRV",
+	EXT_W_B:     "?",
+	EXT_W_H:     "?",
+	BITREV_W:    "BITREVW",
+	BITREV_D:    "BITREVV",
+	CLO_W:       "CLOW",
+	CLO_D:       "CLOV",
+	CLZ_W:       "CLZW",
+	CLZ_D:       "CLZV",
+	CTO_W:       "CTOW",
+	CTO_D:       "CTOV",
+	CTZ_W:       "CTZW",
+	CTZ_D:       "CTZV",
+	REVB_2H:     "REVB2H",
+	REVB_2W:     "REVB2W",
+	REVB_4H:     "REVB4H",
+	REVB_D:      "REVBV",
+	BSTRPICK_W:  "BSTRPICKW",
+	BSTRPICK_D:  "BSTRPICKV",
+	BSTRINS_W:   "BSTRINSW",
+	BSTRINS_D:   "BSTRINSV",
+	MASKEQZ:     "MASKEQZ",
+	MASKNEZ:     "MASKNEZ",
+	BCNEZ:       "BFPT",
+	BCEQZ:       "BFPF",
+	BEQ:         "BEQ",
+	BNE:         "BNE",
+	BEQZ:        "BEQ",
+	BNEZ:        "BNE",
+	BLT:         "BLT",
+	BLTU:        "BLTU",
+	BGE:         "BGE",
+	BGEU:        "BGEU",
+	B:           "JMP",
+	BL:          "CALL",
+	LD_B:        "MOVB",
+	LD_H:        "MOVH",
+	LD_W:        "MOVW",
+	LD_D:        "MOVV",
+	LD_BU:       "MOVBU",
+	LD_HU:       "MOVHU",
+	LD_WU:       "MOVWU",
+	ST_B:        "MOVB",
+	ST_H:        "MOVH",
+	ST_W:        "MOVW",
+	ST_D:        "MOVV",
+	LDX_B:       "MOVB",
+	LDX_BU:      "MOVBU",
+	LDX_D:       "MOVV",
+	LDX_H:       "MOVH",
+	LDX_HU:      "MOVHU",
+	LDX_W:       "MOVW",
+	LDX_WU:      "MOVWU",
+	STX_B:       "MOVB",
+	STX_D:       "MOVV",
+	STX_H:       "MOVH",
+	STX_W:       "MOVW",
+	AMADD_B:     "AMADDB",
+	AMADD_D:     "AMADDV",
+	AMADD_DB_B:  "AMADDDBB",
+	AMADD_DB_D:  "AMADDDBV",
+	AMADD_DB_H:  "AMADDDBH",
+	AMADD_DB_W:  "AMADDDBW",
+	AMADD_H:     "AMADDH",
+	AMADD_W:     "AMADDW",
+	AMAND_D:     "AMANDV",
+	AMAND_DB_D:  "AMANDDBV",
+	AMAND_DB_W:  "AMANDDBW",
+	AMAND_W:     "AMANDW",
+	AMCAS_B:     "AMCASB",
+	AMCAS_D:     "AMCASV",
+	AMCAS_DB_B:  "AMCASDBB",
+	AMCAS_DB_D:  "AMCASDBV",
+	AMCAS_DB_H:  "AMCASDBH",
+	AMCAS_DB_W:  "AMCASDBW",
+	AMCAS_H:     "AMCASH",
+	AMCAS_W:     "AMCASW",
+	AMMAX_D:     "AMMAXV",
+	AMMAX_DB_D:  "AMMAXDBV",
+	AMMAX_DB_DU: "AMMAXDBVU",
+	AMMAX_DB_W:  "AMMAXDBW",
+	AMMAX_DB_WU: "AMMAXDBWU",
+	AMMAX_DU:    "AMMAXVU",
+	AMMAX_W:     "AMMAXW",
+	AMMAX_WU:    "AMMAXWU",
+	AMMIN_D:     "AMMINV",
+	AMMIN_DB_D:  "AMMINDBV",
+	AMMIN_DB_DU: "AMMINDBVU",
+	AMMIN_DB_W:  "AMMINDBW",
+	AMMIN_DB_WU: "AMMINDBWU",
+	AMMIN_DU:    "AMMINVU",
+	AMMIN_W:     "AMMINW",
+	AMMIN_WU:    "AMMINWU",
+	AMOR_D:      "AMORV",
+	AMOR_DB_D:   "AMORDBV",
+	AMOR_DB_W:   "AMORDBW",
+	AMOR_W:      "AMORW",
+	AMSWAP_B:    "AMSWAPB",
+	AMSWAP_D:    "AMSWAPV",
+	AMSWAP_DB_B: "AMSWAPDBB",
+	AMSWAP_DB_D: "AMSWAPDBV",
+	AMSWAP_DB_H: "AMSWAPDBH",
+	AMSWAP_DB_W: "AMSWAPDBW",
+	AMSWAP_H:    "AMSWAPH",
+	AMSWAP_W:    "AMSWAPW",
+	AMXOR_D:     "AMXORV",
+	AMXOR_DB_D:  "AMXORDBV",
+	AMXOR_DB_W:  "AMXORDBW",
+	AMXOR_W:     "AMXORW",
+	LL_W:        "LL",
+	LL_D:        "LLV",
+	SC_W:        "SC",
+	SC_D:        "SCV",
+	CRCC_W_B_W:  "CRCCWBW",
+	CRCC_W_D_W:  "CRCCWVW",
+	CRCC_W_H_W:  "CRCCWHW",
+	CRCC_W_W_W:  "CRCCWWW",
+	CRC_W_B_W:   "CRCWBW",
+	CRC_W_D_W:   "CRCWVW",
+	CRC_W_H_W:   "CRCWHW",
+	CRC_W_W_W:   "CRCWWW",
+	DBAR:        "DBAR",
+	SYSCALL:     "SYSCALL",
+	BREAK:       "BREAK",
+	RDTIMEL_W:   "RDTIMELW",
+	RDTIMEH_W:   "RDTIMEHW",
+	RDTIME_D:    "RDTIMED",
+	CPUCFG:      "CPUCFG",
+
+	// Floating-point instructions
+	FADD_S:       "ADDF",
+	FADD_D:       "ADDD",
+	FSUB_S:       "SUBF",
+	FSUB_D:       "SUBD",
+	FMUL_S:       "MULF",
+	FMUL_D:       "MULD",
+	FDIV_S:       "DIVF",
+	FDIV_D:       "DIVD",
+	FMSUB_S:      "FMSUBF",
+	FMSUB_D:      "FMSUBD",
+	FMADD_S:      "FMADDF",
+	FMADD_D:      "FMADDD",
+	FNMADD_S:     "FNMADDF",
+	FNMADD_D:     "FNMADDD",
+	FNMSUB_S:     "FNMSUBF",
+	FNMSUB_D:     "FNMSUBD",
+	FABS_S:       "ABSF",
+	FABS_D:       "ABSD",
+	FNEG_S:       "NEGF",
+	FNEG_D:       "NEGD",
+	FSQRT_S:      "SQRTF",
+	FSQRT_D:      "SQRTD",
+	FCOPYSIGN_S:  "FCOPYSGF",
+	FCOPYSIGN_D:  "FCOPYSGD",
+	FMAX_S:       "FMAXF",
+	FMAX_D:       "FMAXD",
+	FMIN_S:       "FMINF",
+	FMIN_D:       "FMIND",
+	FCLASS_S:     "FCLASSF",
+	FCLASS_D:     "FCLASSD",
+	FCMP_CEQ_S:   "CMPEQF",
+	FCMP_CEQ_D:   "CMPEQD",
+	FCMP_SLE_S:   "CMPGEF",
+	FCMP_SLE_D:   "CMPGED",
+	FCMP_SLT_S:   "CMPGTF",
+	FCMP_SLT_D:   "CMPGTD",
+	FCVT_D_S:     "MOVFD",
+	FCVT_S_D:     "MOVDF",
+	FFINT_S_W:    "FFINTFW",
+	FFINT_S_L:    "FFINTFV",
+	FFINT_D_W:    "FFINTDW",
+	FFINT_D_L:    "FFINTDV",
+	FTINTRM_L_D:  "FTINTRMVD",
+	FTINTRM_L_S:  "FTINTRMVF",
+	FTINTRM_W_D:  "FTINTRMWD",
+	FTINTRM_W_S:  "FTINTRMWF",
+	FTINTRNE_L_D: "FTINTRNEVD",
+	FTINTRNE_L_S: "FTINTRNEVF",
+	FTINTRNE_W_D: "FTINTRNEWD",
+	FTINTRNE_W_S: "FTINTRNEWF",
+	FTINTRP_L_D:  "FTINTRPVD",
+	FTINTRP_L_S:  "FTINTRPVF",
+	FTINTRP_W_D:  "FTINTRPWD",
+	FTINTRP_W_S:  "FTINTRPWF",
+	FTINTRZ_L_D:  "FTINTRZVD",
+	FTINTRZ_L_S:  "FTINTRZVF",
+	FTINTRZ_W_D:  "FTINTRZWD",
+	FTINTRZ_W_S:  "FTINTRZWF",
+	FTINT_L_D:    "FTINTVD",
+	FTINT_L_S:    "FTINTVF",
+	FTINT_W_D:    "FTINTWD",
+	FTINT_W_S:    "FTINTWF",
+	FRINT_S:      "FRINTS",
+	FRINT_D:      "FRINTD",
+	FMOV_S:       "MOVF",
+	FMOV_D:       "MOVD",
+	MOVGR2FR_W:   "MOVW",
+	MOVGR2FR_D:   "MOVV",
+	MOVFR2GR_S:   "MOVW",
+	MOVFR2GR_D:   "MOVV",
+	MOVGR2CF:     "MOVV",
+	MOVCF2GR:     "MOVV",
+	MOVFCSR2GR:   "MOVV",
+	MOVGR2FCSR:   "MOVV",
+	MOVFR2CF:     "MOVV",
+	MOVCF2FR:     "MOVV",
+	FLD_S:        "MOVF",
+	FLD_D:        "MOVD",
+	FST_S:        "MOVF",
+	FST_D:        "MOVD",
+	FLDX_S:       "MOVF",
+	FLDX_D:       "MOVD",
+	FSTX_S:       "MOVF",
+	FSTX_D:       "MOVD",
+}
diff --git a/loong64/loong64asm/tables.go b/loong64/loong64asm/tables.go
new file mode 100644
index 00000000..c85d47c2
--- /dev/null
+++ b/loong64/loong64asm/tables.go
@@ -0,0 +1,1613 @@
+// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.
+
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64asm
+
+const (
+	_ Op = iota
+	ADDI_D
+	ADDI_W
+	ADDU16I_D
+	ADD_D
+	ADD_W
+	ALSL_D
+	ALSL_W
+	ALSL_WU
+	AMADD_B
+	AMADD_D
+	AMADD_DB_B
+	AMADD_DB_D
+	AMADD_DB_H
+	AMADD_DB_W
+	AMADD_H
+	AMADD_W
+	AMAND_D
+	AMAND_DB_D
+	AMAND_DB_W
+	AMAND_W
+	AMCAS_B
+	AMCAS_D
+	AMCAS_DB_B
+	AMCAS_DB_D
+	AMCAS_DB_H
+	AMCAS_DB_W
+	AMCAS_H
+	AMCAS_W
+	AMMAX_D
+	AMMAX_DB_D
+	AMMAX_DB_DU
+	AMMAX_DB_W
+	AMMAX_DB_WU
+	AMMAX_DU
+	AMMAX_W
+	AMMAX_WU
+	AMMIN_D
+	AMMIN_DB_D
+	AMMIN_DB_DU
+	AMMIN_DB_W
+	AMMIN_DB_WU
+	AMMIN_DU
+	AMMIN_W
+	AMMIN_WU
+	AMOR_D
+	AMOR_DB_D
+	AMOR_DB_W
+	AMOR_W
+	AMSWAP_B
+	AMSWAP_D
+	AMSWAP_DB_B
+	AMSWAP_DB_D
+	AMSWAP_DB_H
+	AMSWAP_DB_W
+	AMSWAP_H
+	AMSWAP_W
+	AMXOR_D
+	AMXOR_DB_D
+	AMXOR_DB_W
+	AMXOR_W
+	AND
+	ANDI
+	ANDN
+	ASRTGT_D
+	ASRTLE_D
+	B
+	BCEQZ
+	BCNEZ
+	BEQ
+	BEQZ
+	BGE
+	BGEU
+	BITREV_4B
+	BITREV_8B
+	BITREV_D
+	BITREV_W
+	BL
+	BLT
+	BLTU
+	BNE
+	BNEZ
+	BREAK
+	BSTRINS_D
+	BSTRINS_W
+	BSTRPICK_D
+	BSTRPICK_W
+	BYTEPICK_D
+	BYTEPICK_W
+	CACOP
+	CLO_D
+	CLO_W
+	CLZ_D
+	CLZ_W
+	CPUCFG
+	CRCC_W_B_W
+	CRCC_W_D_W
+	CRCC_W_H_W
+	CRCC_W_W_W
+	CRC_W_B_W
+	CRC_W_D_W
+	CRC_W_H_W
+	CRC_W_W_W
+	CSRRD
+	CSRWR
+	CSRXCHG
+	CTO_D
+	CTO_W
+	CTZ_D
+	CTZ_W
+	DBAR
+	DBCL
+	DIV_D
+	DIV_DU
+	DIV_W
+	DIV_WU
+	ERTN
+	EXT_W_B
+	EXT_W_H
+	FABS_D
+	FABS_S
+	FADD_D
+	FADD_S
+	FCLASS_D
+	FCLASS_S
+	FCMP_CAF_D
+	FCMP_CAF_S
+	FCMP_CEQ_D
+	FCMP_CEQ_S
+	FCMP_CLE_D
+	FCMP_CLE_S
+	FCMP_CLT_D
+	FCMP_CLT_S
+	FCMP_CNE_D
+	FCMP_CNE_S
+	FCMP_COR_D
+	FCMP_COR_S
+	FCMP_CUEQ_D
+	FCMP_CUEQ_S
+	FCMP_CULE_D
+	FCMP_CULE_S
+	FCMP_CULT_D
+	FCMP_CULT_S
+	FCMP_CUNE_D
+	FCMP_CUNE_S
+	FCMP_CUN_D
+	FCMP_CUN_S
+	FCMP_SAF_D
+	FCMP_SAF_S
+	FCMP_SEQ_D
+	FCMP_SEQ_S
+	FCMP_SLE_D
+	FCMP_SLE_S
+	FCMP_SLT_D
+	FCMP_SLT_S
+	FCMP_SNE_D
+	FCMP_SNE_S
+	FCMP_SOR_D
+	FCMP_SOR_S
+	FCMP_SUEQ_D
+	FCMP_SUEQ_S
+	FCMP_SULE_D
+	FCMP_SULE_S
+	FCMP_SULT_D
+	FCMP_SULT_S
+	FCMP_SUNE_D
+	FCMP_SUNE_S
+	FCMP_SUN_D
+	FCMP_SUN_S
+	FCOPYSIGN_D
+	FCOPYSIGN_S
+	FCVT_D_S
+	FCVT_S_D
+	FDIV_D
+	FDIV_S
+	FFINT_D_L
+	FFINT_D_W
+	FFINT_S_L
+	FFINT_S_W
+	FLDGT_D
+	FLDGT_S
+	FLDLE_D
+	FLDLE_S
+	FLDX_D
+	FLDX_S
+	FLD_D
+	FLD_S
+	FLOGB_D
+	FLOGB_S
+	FMADD_D
+	FMADD_S
+	FMAXA_D
+	FMAXA_S
+	FMAX_D
+	FMAX_S
+	FMINA_D
+	FMINA_S
+	FMIN_D
+	FMIN_S
+	FMOV_D
+	FMOV_S
+	FMSUB_D
+	FMSUB_S
+	FMUL_D
+	FMUL_S
+	FNEG_D
+	FNEG_S
+	FNMADD_D
+	FNMADD_S
+	FNMSUB_D
+	FNMSUB_S
+	FRECIPE_D
+	FRECIPE_S
+	FRECIP_D
+	FRECIP_S
+	FRINT_D
+	FRINT_S
+	FRSQRTE_D
+	FRSQRTE_S
+	FRSQRT_D
+	FRSQRT_S
+	FSCALEB_D
+	FSCALEB_S
+	FSEL
+	FSQRT_D
+	FSQRT_S
+	FSTGT_D
+	FSTGT_S
+	FSTLE_D
+	FSTLE_S
+	FSTX_D
+	FSTX_S
+	FST_D
+	FST_S
+	FSUB_D
+	FSUB_S
+	FTINTRM_L_D
+	FTINTRM_L_S
+	FTINTRM_W_D
+	FTINTRM_W_S
+	FTINTRNE_L_D
+	FTINTRNE_L_S
+	FTINTRNE_W_D
+	FTINTRNE_W_S
+	FTINTRP_L_D
+	FTINTRP_L_S
+	FTINTRP_W_D
+	FTINTRP_W_S
+	FTINTRZ_L_D
+	FTINTRZ_L_S
+	FTINTRZ_W_D
+	FTINTRZ_W_S
+	FTINT_L_D
+	FTINT_L_S
+	FTINT_W_D
+	FTINT_W_S
+	IBAR
+	IDLE
+	INVTLB
+	IOCSRRD_B
+	IOCSRRD_D
+	IOCSRRD_H
+	IOCSRRD_W
+	IOCSRWR_B
+	IOCSRWR_D
+	IOCSRWR_H
+	IOCSRWR_W
+	JIRL
+	LDDIR
+	LDGT_B
+	LDGT_D
+	LDGT_H
+	LDGT_W
+	LDLE_B
+	LDLE_D
+	LDLE_H
+	LDLE_W
+	LDPTE
+	LDPTR_D
+	LDPTR_W
+	LDX_B
+	LDX_BU
+	LDX_D
+	LDX_H
+	LDX_HU
+	LDX_W
+	LDX_WU
+	LD_B
+	LD_BU
+	LD_D
+	LD_H
+	LD_HU
+	LD_W
+	LD_WU
+	LLACQ_D
+	LLACQ_W
+	LL_D
+	LL_W
+	LU12I_W
+	LU32I_D
+	LU52I_D
+	MASKEQZ
+	MASKNEZ
+	MOD_D
+	MOD_DU
+	MOD_W
+	MOD_WU
+	MOVCF2FR
+	MOVCF2GR
+	MOVFCSR2GR
+	MOVFR2CF
+	MOVFR2GR_D
+	MOVFR2GR_S
+	MOVFRH2GR_S
+	MOVGR2CF
+	MOVGR2FCSR
+	MOVGR2FRH_W
+	MOVGR2FR_D
+	MOVGR2FR_W
+	MULH_D
+	MULH_DU
+	MULH_W
+	MULH_WU
+	MULW_D_W
+	MULW_D_WU
+	MUL_D
+	MUL_W
+	NOR
+	OR
+	ORI
+	ORN
+	PCADDI
+	PCADDU12I
+	PCADDU18I
+	PCALAU12I
+	PRELD
+	PRELDX
+	RDTIMEH_W
+	RDTIMEL_W
+	RDTIME_D
+	REVB_2H
+	REVB_2W
+	REVB_4H
+	REVB_D
+	REVH_2W
+	REVH_D
+	ROTRI_D
+	ROTRI_W
+	ROTR_D
+	ROTR_W
+	SCREL_D
+	SCREL_W
+	SC_D
+	SC_Q
+	SC_W
+	SLLI_D
+	SLLI_W
+	SLL_D
+	SLL_W
+	SLT
+	SLTI
+	SLTU
+	SLTUI
+	SRAI_D
+	SRAI_W
+	SRA_D
+	SRA_W
+	SRLI_D
+	SRLI_W
+	SRL_D
+	SRL_W
+	STGT_B
+	STGT_D
+	STGT_H
+	STGT_W
+	STLE_B
+	STLE_D
+	STLE_H
+	STLE_W
+	STPTR_D
+	STPTR_W
+	STX_B
+	STX_D
+	STX_H
+	STX_W
+	ST_B
+	ST_D
+	ST_H
+	ST_W
+	SUB_D
+	SUB_W
+	SYSCALL
+	TLBCLR
+	TLBFILL
+	TLBFLUSH
+	TLBRD
+	TLBSRCH
+	TLBWR
+	XOR
+	XORI
+)
+
+var opstr = [...]string{
+	ADDI_D:       "ADDI.D",
+	ADDI_W:       "ADDI.W",
+	ADDU16I_D:    "ADDU16I.D",
+	ADD_D:        "ADD.D",
+	ADD_W:        "ADD.W",
+	ALSL_D:       "ALSL.D",
+	ALSL_W:       "ALSL.W",
+	ALSL_WU:      "ALSL.WU",
+	AMADD_B:      "AMADD.B",
+	AMADD_D:      "AMADD.D",
+	AMADD_DB_B:   "AMADD_DB.B",
+	AMADD_DB_D:   "AMADD_DB.D",
+	AMADD_DB_H:   "AMADD_DB.H",
+	AMADD_DB_W:   "AMADD_DB.W",
+	AMADD_H:      "AMADD.H",
+	AMADD_W:      "AMADD.W",
+	AMAND_D:      "AMAND.D",
+	AMAND_DB_D:   "AMAND_DB.D",
+	AMAND_DB_W:   "AMAND_DB.W",
+	AMAND_W:      "AMAND.W",
+	AMCAS_B:      "AMCAS.B",
+	AMCAS_D:      "AMCAS.D",
+	AMCAS_DB_B:   "AMCAS_DB.B",
+	AMCAS_DB_D:   "AMCAS_DB.D",
+	AMCAS_DB_H:   "AMCAS_DB.H",
+	AMCAS_DB_W:   "AMCAS_DB.W",
+	AMCAS_H:      "AMCAS.H",
+	AMCAS_W:      "AMCAS.W",
+	AMMAX_D:      "AMMAX.D",
+	AMMAX_DB_D:   "AMMAX_DB.D",
+	AMMAX_DB_DU:  "AMMAX_DB.DU",
+	AMMAX_DB_W:   "AMMAX_DB.W",
+	AMMAX_DB_WU:  "AMMAX_DB.WU",
+	AMMAX_DU:     "AMMAX.DU",
+	AMMAX_W:      "AMMAX.W",
+	AMMAX_WU:     "AMMAX.WU",
+	AMMIN_D:      "AMMIN.D",
+	AMMIN_DB_D:   "AMMIN_DB.D",
+	AMMIN_DB_DU:  "AMMIN_DB.DU",
+	AMMIN_DB_W:   "AMMIN_DB.W",
+	AMMIN_DB_WU:  "AMMIN_DB.WU",
+	AMMIN_DU:     "AMMIN.DU",
+	AMMIN_W:      "AMMIN.W",
+	AMMIN_WU:     "AMMIN.WU",
+	AMOR_D:       "AMOR.D",
+	AMOR_DB_D:    "AMOR_DB.D",
+	AMOR_DB_W:    "AMOR_DB.W",
+	AMOR_W:       "AMOR.W",
+	AMSWAP_B:     "AMSWAP.B",
+	AMSWAP_D:     "AMSWAP.D",
+	AMSWAP_DB_B:  "AMSWAP_DB.B",
+	AMSWAP_DB_D:  "AMSWAP_DB.D",
+	AMSWAP_DB_H:  "AMSWAP_DB.H",
+	AMSWAP_DB_W:  "AMSWAP_DB.W",
+	AMSWAP_H:     "AMSWAP.H",
+	AMSWAP_W:     "AMSWAP.W",
+	AMXOR_D:      "AMXOR.D",
+	AMXOR_DB_D:   "AMXOR_DB.D",
+	AMXOR_DB_W:   "AMXOR_DB.W",
+	AMXOR_W:      "AMXOR.W",
+	AND:          "AND",
+	ANDI:         "ANDI",
+	ANDN:         "ANDN",
+	ASRTGT_D:     "ASRTGT.D",
+	ASRTLE_D:     "ASRTLE.D",
+	B:            "B",
+	BCEQZ:        "BCEQZ",
+	BCNEZ:        "BCNEZ",
+	BEQ:          "BEQ",
+	BEQZ:         "BEQZ",
+	BGE:          "BGE",
+	BGEU:         "BGEU",
+	BITREV_4B:    "BITREV.4B",
+	BITREV_8B:    "BITREV.8B",
+	BITREV_D:     "BITREV.D",
+	BITREV_W:     "BITREV.W",
+	BL:           "BL",
+	BLT:          "BLT",
+	BLTU:         "BLTU",
+	BNE:          "BNE",
+	BNEZ:         "BNEZ",
+	BREAK:        "BREAK",
+	BSTRINS_D:    "BSTRINS.D",
+	BSTRINS_W:    "BSTRINS.W",
+	BSTRPICK_D:   "BSTRPICK.D",
+	BSTRPICK_W:   "BSTRPICK.W",
+	BYTEPICK_D:   "BYTEPICK.D",
+	BYTEPICK_W:   "BYTEPICK.W",
+	CACOP:        "CACOP",
+	CLO_D:        "CLO.D",
+	CLO_W:        "CLO.W",
+	CLZ_D:        "CLZ.D",
+	CLZ_W:        "CLZ.W",
+	CPUCFG:       "CPUCFG",
+	CRCC_W_B_W:   "CRCC.W.B.W",
+	CRCC_W_D_W:   "CRCC.W.D.W",
+	CRCC_W_H_W:   "CRCC.W.H.W",
+	CRCC_W_W_W:   "CRCC.W.W.W",
+	CRC_W_B_W:    "CRC.W.B.W",
+	CRC_W_D_W:    "CRC.W.D.W",
+	CRC_W_H_W:    "CRC.W.H.W",
+	CRC_W_W_W:    "CRC.W.W.W",
+	CSRRD:        "CSRRD",
+	CSRWR:        "CSRWR",
+	CSRXCHG:      "CSRXCHG",
+	CTO_D:        "CTO.D",
+	CTO_W:        "CTO.W",
+	CTZ_D:        "CTZ.D",
+	CTZ_W:        "CTZ.W",
+	DBAR:         "DBAR",
+	DBCL:         "DBCL",
+	DIV_D:        "DIV.D",
+	DIV_DU:       "DIV.DU",
+	DIV_W:        "DIV.W",
+	DIV_WU:       "DIV.WU",
+	ERTN:         "ERTN",
+	EXT_W_B:      "EXT.W.B",
+	EXT_W_H:      "EXT.W.H",
+	FABS_D:       "FABS.D",
+	FABS_S:       "FABS.S",
+	FADD_D:       "FADD.D",
+	FADD_S:       "FADD.S",
+	FCLASS_D:     "FCLASS.D",
+	FCLASS_S:     "FCLASS.S",
+	FCMP_CAF_D:   "FCMP.CAF.D",
+	FCMP_CAF_S:   "FCMP.CAF.S",
+	FCMP_CEQ_D:   "FCMP.CEQ.D",
+	FCMP_CEQ_S:   "FCMP.CEQ.S",
+	FCMP_CLE_D:   "FCMP.CLE.D",
+	FCMP_CLE_S:   "FCMP.CLE.S",
+	FCMP_CLT_D:   "FCMP.CLT.D",
+	FCMP_CLT_S:   "FCMP.CLT.S",
+	FCMP_CNE_D:   "FCMP.CNE.D",
+	FCMP_CNE_S:   "FCMP.CNE.S",
+	FCMP_COR_D:   "FCMP.COR.D",
+	FCMP_COR_S:   "FCMP.COR.S",
+	FCMP_CUEQ_D:  "FCMP.CUEQ.D",
+	FCMP_CUEQ_S:  "FCMP.CUEQ.S",
+	FCMP_CULE_D:  "FCMP.CULE.D",
+	FCMP_CULE_S:  "FCMP.CULE.S",
+	FCMP_CULT_D:  "FCMP.CULT.D",
+	FCMP_CULT_S:  "FCMP.CULT.S",
+	FCMP_CUNE_D:  "FCMP.CUNE.D",
+	FCMP_CUNE_S:  "FCMP.CUNE.S",
+	FCMP_CUN_D:   "FCMP.CUN.D",
+	FCMP_CUN_S:   "FCMP.CUN.S",
+	FCMP_SAF_D:   "FCMP.SAF.D",
+	FCMP_SAF_S:   "FCMP.SAF.S",
+	FCMP_SEQ_D:   "FCMP.SEQ.D",
+	FCMP_SEQ_S:   "FCMP.SEQ.S",
+	FCMP_SLE_D:   "FCMP.SLE.D",
+	FCMP_SLE_S:   "FCMP.SLE.S",
+	FCMP_SLT_D:   "FCMP.SLT.D",
+	FCMP_SLT_S:   "FCMP.SLT.S",
+	FCMP_SNE_D:   "FCMP.SNE.D",
+	FCMP_SNE_S:   "FCMP.SNE.S",
+	FCMP_SOR_D:   "FCMP.SOR.D",
+	FCMP_SOR_S:   "FCMP.SOR.S",
+	FCMP_SUEQ_D:  "FCMP.SUEQ.D",
+	FCMP_SUEQ_S:  "FCMP.SUEQ.S",
+	FCMP_SULE_D:  "FCMP.SULE.D",
+	FCMP_SULE_S:  "FCMP.SULE.S",
+	FCMP_SULT_D:  "FCMP.SULT.D",
+	FCMP_SULT_S:  "FCMP.SULT.S",
+	FCMP_SUNE_D:  "FCMP.SUNE.D",
+	FCMP_SUNE_S:  "FCMP.SUNE.S",
+	FCMP_SUN_D:   "FCMP.SUN.D",
+	FCMP_SUN_S:   "FCMP.SUN.S",
+	FCOPYSIGN_D:  "FCOPYSIGN.D",
+	FCOPYSIGN_S:  "FCOPYSIGN.S",
+	FCVT_D_S:     "FCVT.D.S",
+	FCVT_S_D:     "FCVT.S.D",
+	FDIV_D:       "FDIV.D",
+	FDIV_S:       "FDIV.S",
+	FFINT_D_L:    "FFINT.D.L",
+	FFINT_D_W:    "FFINT.D.W",
+	FFINT_S_L:    "FFINT.S.L",
+	FFINT_S_W:    "FFINT.S.W",
+	FLDGT_D:      "FLDGT.D",
+	FLDGT_S:      "FLDGT.S",
+	FLDLE_D:      "FLDLE.D",
+	FLDLE_S:      "FLDLE.S",
+	FLDX_D:       "FLDX.D",
+	FLDX_S:       "FLDX.S",
+	FLD_D:        "FLD.D",
+	FLD_S:        "FLD.S",
+	FLOGB_D:      "FLOGB.D",
+	FLOGB_S:      "FLOGB.S",
+	FMADD_D:      "FMADD.D",
+	FMADD_S:      "FMADD.S",
+	FMAXA_D:      "FMAXA.D",
+	FMAXA_S:      "FMAXA.S",
+	FMAX_D:       "FMAX.D",
+	FMAX_S:       "FMAX.S",
+	FMINA_D:      "FMINA.D",
+	FMINA_S:      "FMINA.S",
+	FMIN_D:       "FMIN.D",
+	FMIN_S:       "FMIN.S",
+	FMOV_D:       "FMOV.D",
+	FMOV_S:       "FMOV.S",
+	FMSUB_D:      "FMSUB.D",
+	FMSUB_S:      "FMSUB.S",
+	FMUL_D:       "FMUL.D",
+	FMUL_S:       "FMUL.S",
+	FNEG_D:       "FNEG.D",
+	FNEG_S:       "FNEG.S",
+	FNMADD_D:     "FNMADD.D",
+	FNMADD_S:     "FNMADD.S",
+	FNMSUB_D:     "FNMSUB.D",
+	FNMSUB_S:     "FNMSUB.S",
+	FRECIPE_D:    "FRECIPE.D",
+	FRECIPE_S:    "FRECIPE.S",
+	FRECIP_D:     "FRECIP.D",
+	FRECIP_S:     "FRECIP.S",
+	FRINT_D:      "FRINT.D",
+	FRINT_S:      "FRINT.S",
+	FRSQRTE_D:    "FRSQRTE.D",
+	FRSQRTE_S:    "FRSQRTE.S",
+	FRSQRT_D:     "FRSQRT.D",
+	FRSQRT_S:     "FRSQRT.S",
+	FSCALEB_D:    "FSCALEB.D",
+	FSCALEB_S:    "FSCALEB.S",
+	FSEL:         "FSEL",
+	FSQRT_D:      "FSQRT.D",
+	FSQRT_S:      "FSQRT.S",
+	FSTGT_D:      "FSTGT.D",
+	FSTGT_S:      "FSTGT.S",
+	FSTLE_D:      "FSTLE.D",
+	FSTLE_S:      "FSTLE.S",
+	FSTX_D:       "FSTX.D",
+	FSTX_S:       "FSTX.S",
+	FST_D:        "FST.D",
+	FST_S:        "FST.S",
+	FSUB_D:       "FSUB.D",
+	FSUB_S:       "FSUB.S",
+	FTINTRM_L_D:  "FTINTRM.L.D",
+	FTINTRM_L_S:  "FTINTRM.L.S",
+	FTINTRM_W_D:  "FTINTRM.W.D",
+	FTINTRM_W_S:  "FTINTRM.W.S",
+	FTINTRNE_L_D: "FTINTRNE.L.D",
+	FTINTRNE_L_S: "FTINTRNE.L.S",
+	FTINTRNE_W_D: "FTINTRNE.W.D",
+	FTINTRNE_W_S: "FTINTRNE.W.S",
+	FTINTRP_L_D:  "FTINTRP.L.D",
+	FTINTRP_L_S:  "FTINTRP.L.S",
+	FTINTRP_W_D:  "FTINTRP.W.D",
+	FTINTRP_W_S:  "FTINTRP.W.S",
+	FTINTRZ_L_D:  "FTINTRZ.L.D",
+	FTINTRZ_L_S:  "FTINTRZ.L.S",
+	FTINTRZ_W_D:  "FTINTRZ.W.D",
+	FTINTRZ_W_S:  "FTINTRZ.W.S",
+	FTINT_L_D:    "FTINT.L.D",
+	FTINT_L_S:    "FTINT.L.S",
+	FTINT_W_D:    "FTINT.W.D",
+	FTINT_W_S:    "FTINT.W.S",
+	IBAR:         "IBAR",
+	IDLE:         "IDLE",
+	INVTLB:       "INVTLB",
+	IOCSRRD_B:    "IOCSRRD.B",
+	IOCSRRD_D:    "IOCSRRD.D",
+	IOCSRRD_H:    "IOCSRRD.H",
+	IOCSRRD_W:    "IOCSRRD.W",
+	IOCSRWR_B:    "IOCSRWR.B",
+	IOCSRWR_D:    "IOCSRWR.D",
+	IOCSRWR_H:    "IOCSRWR.H",
+	IOCSRWR_W:    "IOCSRWR.W",
+	JIRL:         "JIRL",
+	LDDIR:        "LDDIR",
+	LDGT_B:       "LDGT.B",
+	LDGT_D:       "LDGT.D",
+	LDGT_H:       "LDGT.H",
+	LDGT_W:       "LDGT.W",
+	LDLE_B:       "LDLE.B",
+	LDLE_D:       "LDLE.D",
+	LDLE_H:       "LDLE.H",
+	LDLE_W:       "LDLE.W",
+	LDPTE:        "LDPTE",
+	LDPTR_D:      "LDPTR.D",
+	LDPTR_W:      "LDPTR.W",
+	LDX_B:        "LDX.B",
+	LDX_BU:       "LDX.BU",
+	LDX_D:        "LDX.D",
+	LDX_H:        "LDX.H",
+	LDX_HU:       "LDX.HU",
+	LDX_W:        "LDX.W",
+	LDX_WU:       "LDX.WU",
+	LD_B:         "LD.B",
+	LD_BU:        "LD.BU",
+	LD_D:         "LD.D",
+	LD_H:         "LD.H",
+	LD_HU:        "LD.HU",
+	LD_W:         "LD.W",
+	LD_WU:        "LD.WU",
+	LLACQ_D:      "LLACQ.D",
+	LLACQ_W:      "LLACQ.W",
+	LL_D:         "LL.D",
+	LL_W:         "LL.W",
+	LU12I_W:      "LU12I.W",
+	LU32I_D:      "LU32I.D",
+	LU52I_D:      "LU52I.D",
+	MASKEQZ:      "MASKEQZ",
+	MASKNEZ:      "MASKNEZ",
+	MOD_D:        "MOD.D",
+	MOD_DU:       "MOD.DU",
+	MOD_W:        "MOD.W",
+	MOD_WU:       "MOD.WU",
+	MOVCF2FR:     "MOVCF2FR",
+	MOVCF2GR:     "MOVCF2GR",
+	MOVFCSR2GR:   "MOVFCSR2GR",
+	MOVFR2CF:     "MOVFR2CF",
+	MOVFR2GR_D:   "MOVFR2GR.D",
+	MOVFR2GR_S:   "MOVFR2GR.S",
+	MOVFRH2GR_S:  "MOVFRH2GR.S",
+	MOVGR2CF:     "MOVGR2CF",
+	MOVGR2FCSR:   "MOVGR2FCSR",
+	MOVGR2FRH_W:  "MOVGR2FRH.W",
+	MOVGR2FR_D:   "MOVGR2FR.D",
+	MOVGR2FR_W:   "MOVGR2FR.W",
+	MULH_D:       "MULH.D",
+	MULH_DU:      "MULH.DU",
+	MULH_W:       "MULH.W",
+	MULH_WU:      "MULH.WU",
+	MULW_D_W:     "MULW.D.W",
+	MULW_D_WU:    "MULW.D.WU",
+	MUL_D:        "MUL.D",
+	MUL_W:        "MUL.W",
+	NOR:          "NOR",
+	OR:           "OR",
+	ORI:          "ORI",
+	ORN:          "ORN",
+	PCADDI:       "PCADDI",
+	PCADDU12I:    "PCADDU12I",
+	PCADDU18I:    "PCADDU18I",
+	PCALAU12I:    "PCALAU12I",
+	PRELD:        "PRELD",
+	PRELDX:       "PRELDX",
+	RDTIMEH_W:    "RDTIMEH.W",
+	RDTIMEL_W:    "RDTIMEL.W",
+	RDTIME_D:     "RDTIME.D",
+	REVB_2H:      "REVB.2H",
+	REVB_2W:      "REVB.2W",
+	REVB_4H:      "REVB.4H",
+	REVB_D:       "REVB.D",
+	REVH_2W:      "REVH.2W",
+	REVH_D:       "REVH.D",
+	ROTRI_D:      "ROTRI.D",
+	ROTRI_W:      "ROTRI.W",
+	ROTR_D:       "ROTR.D",
+	ROTR_W:       "ROTR.W",
+	SCREL_D:      "SCREL.D",
+	SCREL_W:      "SCREL.W",
+	SC_D:         "SC.D",
+	SC_Q:         "SC.Q",
+	SC_W:         "SC.W",
+	SLLI_D:       "SLLI.D",
+	SLLI_W:       "SLLI.W",
+	SLL_D:        "SLL.D",
+	SLL_W:        "SLL.W",
+	SLT:          "SLT",
+	SLTI:         "SLTI",
+	SLTU:         "SLTU",
+	SLTUI:        "SLTUI",
+	SRAI_D:       "SRAI.D",
+	SRAI_W:       "SRAI.W",
+	SRA_D:        "SRA.D",
+	SRA_W:        "SRA.W",
+	SRLI_D:       "SRLI.D",
+	SRLI_W:       "SRLI.W",
+	SRL_D:        "SRL.D",
+	SRL_W:        "SRL.W",
+	STGT_B:       "STGT.B",
+	STGT_D:       "STGT.D",
+	STGT_H:       "STGT.H",
+	STGT_W:       "STGT.W",
+	STLE_B:       "STLE.B",
+	STLE_D:       "STLE.D",
+	STLE_H:       "STLE.H",
+	STLE_W:       "STLE.W",
+	STPTR_D:      "STPTR.D",
+	STPTR_W:      "STPTR.W",
+	STX_B:        "STX.B",
+	STX_D:        "STX.D",
+	STX_H:        "STX.H",
+	STX_W:        "STX.W",
+	ST_B:         "ST.B",
+	ST_D:         "ST.D",
+	ST_H:         "ST.H",
+	ST_W:         "ST.W",
+	SUB_D:        "SUB.D",
+	SUB_W:        "SUB.W",
+	SYSCALL:      "SYSCALL",
+	TLBCLR:       "TLBCLR",
+	TLBFILL:      "TLBFILL",
+	TLBFLUSH:     "TLBFLUSH",
+	TLBRD:        "TLBRD",
+	TLBSRCH:      "TLBSRCH",
+	TLBWR:        "TLBWR",
+	XOR:          "XOR",
+	XORI:         "XORI",
+}
+
+var instFormats = [...]instFormat{
+	// ADDI.D rd, rj, si12
+	{mask: 0xffc00000, value: 0x02c00000, op: ADDI_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// ADDI.W rd, rj, si12
+	{mask: 0xffc00000, value: 0x02800000, op: ADDI_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// ADDU16I.D rd, rj, si16
+	{mask: 0xfc000000, value: 0x10000000, op: ADDU16I_D, args: instArgs{arg_rd, arg_rj, arg_si16_25_10}},
+	// ADD.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x00108000, op: ADD_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// ADD.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00100000, op: ADD_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// ALSL.D rd, rj, rk, sa2
+	{mask: 0xfffe0000, value: 0x002c0000, op: ALSL_D, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}},
+	// ALSL.W rd, rj, rk, sa2
+	{mask: 0xfffe0000, value: 0x00040000, op: ALSL_W, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}},
+	// ALSL.WU rd, rj, rk, sa2
+	{mask: 0xfffe0000, value: 0x00060000, op: ALSL_WU, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}},
+	// AMADD.B rd, rk, rj
+	{mask: 0xffff8000, value: 0x385d0000, op: AMADD_B, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMADD.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x38618000, op: AMADD_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMADD_DB.B rd, rk, rj
+	{mask: 0xffff8000, value: 0x385f0000, op: AMADD_DB_B, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMADD_DB.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x386a8000, op: AMADD_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMADD_DB.H rd, rk, rj
+	{mask: 0xffff8000, value: 0x385f8000, op: AMADD_DB_H, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMADD_DB.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x386a0000, op: AMADD_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMADD.H rd, rk, rj
+	{mask: 0xffff8000, value: 0x385d8000, op: AMADD_H, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMADD.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x38610000, op: AMADD_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMAND.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x38628000, op: AMAND_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMAND_DB.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x386b8000, op: AMAND_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMAND_DB.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x386b0000, op: AMAND_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMAND.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x38620000, op: AMAND_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMCAS.B rd, rk, rj
+	{mask: 0xffff8000, value: 0x38580000, op: AMCAS_B, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMCAS.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x38598000, op: AMCAS_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMCAS_DB.B rd, rk, rj
+	{mask: 0xffff8000, value: 0x385a0000, op: AMCAS_DB_B, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMCAS_DB.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x385b8000, op: AMCAS_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMCAS_DB.H rd, rk, rj
+	{mask: 0xffff8000, value: 0x385a8000, op: AMCAS_DB_H, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMCAS_DB.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x385b0000, op: AMCAS_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMCAS.H rd, rk, rj
+	{mask: 0xffff8000, value: 0x38588000, op: AMCAS_H, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMCAS.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x38590000, op: AMCAS_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMAX.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x38658000, op: AMMAX_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMAX_DB.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x386e8000, op: AMMAX_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMAX_DB.DU rd, rk, rj
+	{mask: 0xffff8000, value: 0x38708000, op: AMMAX_DB_DU, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMAX_DB.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x386e0000, op: AMMAX_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMAX_DB.WU rd, rk, rj
+	{mask: 0xffff8000, value: 0x38700000, op: AMMAX_DB_WU, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMAX.DU rd, rk, rj
+	{mask: 0xffff8000, value: 0x38678000, op: AMMAX_DU, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMAX.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x38650000, op: AMMAX_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMAX.WU rd, rk, rj
+	{mask: 0xffff8000, value: 0x38670000, op: AMMAX_WU, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMIN.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x38668000, op: AMMIN_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMIN_DB.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x386f8000, op: AMMIN_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMIN_DB.DU rd, rk, rj
+	{mask: 0xffff8000, value: 0x38718000, op: AMMIN_DB_DU, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMIN_DB.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x386f0000, op: AMMIN_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMIN_DB.WU rd, rk, rj
+	{mask: 0xffff8000, value: 0x38710000, op: AMMIN_DB_WU, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMIN.DU rd, rk, rj
+	{mask: 0xffff8000, value: 0x38688000, op: AMMIN_DU, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMIN.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x38660000, op: AMMIN_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMMIN.WU rd, rk, rj
+	{mask: 0xffff8000, value: 0x38680000, op: AMMIN_WU, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMOR.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x38638000, op: AMOR_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMOR_DB.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x386c8000, op: AMOR_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMOR_DB.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x386c0000, op: AMOR_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMOR.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x38630000, op: AMOR_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMSWAP.B rd, rk, rj
+	{mask: 0xffff8000, value: 0x385c0000, op: AMSWAP_B, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMSWAP.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x38608000, op: AMSWAP_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMSWAP_DB.B rd, rk, rj
+	{mask: 0xffff8000, value: 0x385e0000, op: AMSWAP_DB_B, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMSWAP_DB.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x38698000, op: AMSWAP_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMSWAP_DB.H rd, rk, rj
+	{mask: 0xffff8000, value: 0x385e8000, op: AMSWAP_DB_H, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMSWAP_DB.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x38690000, op: AMSWAP_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMSWAP.H rd, rk, rj
+	{mask: 0xffff8000, value: 0x385c8000, op: AMSWAP_H, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMSWAP.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x38600000, op: AMSWAP_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMXOR.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x38648000, op: AMXOR_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMXOR_DB.D rd, rk, rj
+	{mask: 0xffff8000, value: 0x386d8000, op: AMXOR_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMXOR_DB.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x386d0000, op: AMXOR_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AMXOR.W rd, rk, rj
+	{mask: 0xffff8000, value: 0x38640000, op: AMXOR_W, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// AND rd, rj, rk
+	{mask: 0xffff8000, value: 0x00148000, op: AND, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// ANDI rd, rj, ui12
+	{mask: 0xffc00000, value: 0x03400000, op: ANDI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}},
+	// ANDN rd, rj, rk
+	{mask: 0xffff8000, value: 0x00168000, op: ANDN, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// ASRTGT.D rj, rk
+	{mask: 0xffff801f, value: 0x00018000, op: ASRTGT_D, args: instArgs{arg_rj, arg_rk}},
+	// ASRTLE.D rj, rk
+	{mask: 0xffff801f, value: 0x00010000, op: ASRTLE_D, args: instArgs{arg_rj, arg_rk}},
+	// B offs
+	{mask: 0xfc000000, value: 0x50000000, op: B, args: instArgs{arg_offset_25_0}},
+	// BCEQZ cj, offs
+	{mask: 0xfc000300, value: 0x48000000, op: BCEQZ, args: instArgs{arg_cj, arg_offset_20_0}},
+	// BCNEZ cj, offs
+	{mask: 0xfc000300, value: 0x48000100, op: BCNEZ, args: instArgs{arg_cj, arg_offset_20_0}},
+	// BEQ rj, rd, offs
+	{mask: 0xfc000000, value: 0x58000000, op: BEQ, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}},
+	// BEQZ rj, offs
+	{mask: 0xfc000000, value: 0x40000000, op: BEQZ, args: instArgs{arg_rj, arg_offset_20_0}},
+	// BGE rj, rd, offs
+	{mask: 0xfc000000, value: 0x64000000, op: BGE, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}},
+	// BGEU rj, rd, offs
+	{mask: 0xfc000000, value: 0x6c000000, op: BGEU, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}},
+	// BITREV.4B rd, rj
+	{mask: 0xfffffc00, value: 0x00004800, op: BITREV_4B, args: instArgs{arg_rd, arg_rj}},
+	// BITREV.8B rd, rj
+	{mask: 0xfffffc00, value: 0x00004c00, op: BITREV_8B, args: instArgs{arg_rd, arg_rj}},
+	// BITREV.D rd, rj
+	{mask: 0xfffffc00, value: 0x00005400, op: BITREV_D, args: instArgs{arg_rd, arg_rj}},
+	// BITREV.W rd, rj
+	{mask: 0xfffffc00, value: 0x00005000, op: BITREV_W, args: instArgs{arg_rd, arg_rj}},
+	// BL offs
+	{mask: 0xfc000000, value: 0x54000000, op: BL, args: instArgs{arg_offset_25_0}},
+	// BLT rj, rd, offs
+	{mask: 0xfc000000, value: 0x60000000, op: BLT, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}},
+	// BLTU rj, rd, offs
+	{mask: 0xfc000000, value: 0x68000000, op: BLTU, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}},
+	// BNE rj, rd, offs
+	{mask: 0xfc000000, value: 0x5c000000, op: BNE, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}},
+	// BNEZ rj, offs
+	{mask: 0xfc000000, value: 0x44000000, op: BNEZ, args: instArgs{arg_rj, arg_offset_20_0}},
+	// BREAK code
+	{mask: 0xffff8000, value: 0x002a0000, op: BREAK, args: instArgs{arg_code_14_0}},
+	// BSTRINS.D rd, rj, msbd, lsbd
+	{mask: 0xffc00000, value: 0x00800000, op: BSTRINS_D, args: instArgs{arg_rd, arg_rj, arg_msbd, arg_lsbd}},
+	// BSTRINS.W rd, rj, msbw, lsbw
+	{mask: 0xffe08000, value: 0x00600000, op: BSTRINS_W, args: instArgs{arg_rd, arg_rj, arg_msbw, arg_lsbw}},
+	// BSTRPICK.D rd, rj, msbd, lsbd
+	{mask: 0xffc00000, value: 0x00c00000, op: BSTRPICK_D, args: instArgs{arg_rd, arg_rj, arg_msbd, arg_lsbd}},
+	// BSTRPICK.W rd, rj, msbw, lsbw
+	{mask: 0xffe08000, value: 0x00608000, op: BSTRPICK_W, args: instArgs{arg_rd, arg_rj, arg_msbw, arg_lsbw}},
+	// BYTEPICK.D rd, rj, rk, sa3
+	{mask: 0xfffc0000, value: 0x000c0000, op: BYTEPICK_D, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa3_17_15}},
+	// BYTEPICK.W rd, rj, rk, sa2
+	{mask: 0xfffe0000, value: 0x00080000, op: BYTEPICK_W, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}},
+	// CACOP code, rj, si12
+	{mask: 0xffc00000, value: 0x06000000, op: CACOP, args: instArgs{arg_code_4_0, arg_rj, arg_si12_21_10}},
+	// CLO.D rd, rj
+	{mask: 0xfffffc00, value: 0x00002000, op: CLO_D, args: instArgs{arg_rd, arg_rj}},
+	// CLO.W rd, rj
+	{mask: 0xfffffc00, value: 0x00001000, op: CLO_W, args: instArgs{arg_rd, arg_rj}},
+	// CLZ.D rd, rj
+	{mask: 0xfffffc00, value: 0x00002400, op: CLZ_D, args: instArgs{arg_rd, arg_rj}},
+	// CLZ.W rd, rj
+	{mask: 0xfffffc00, value: 0x00001400, op: CLZ_W, args: instArgs{arg_rd, arg_rj}},
+	// CPUCFG rd, rj
+	{mask: 0xfffffc00, value: 0x00006c00, op: CPUCFG, args: instArgs{arg_rd, arg_rj}},
+	// CRCC.W.B.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00260000, op: CRCC_W_B_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// CRCC.W.D.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00278000, op: CRCC_W_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// CRCC.W.H.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00268000, op: CRCC_W_H_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// CRCC.W.W.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00270000, op: CRCC_W_W_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// CRC.W.B.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00240000, op: CRC_W_B_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// CRC.W.D.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00258000, op: CRC_W_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// CRC.W.H.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00248000, op: CRC_W_H_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// CRC.W.W.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00250000, op: CRC_W_W_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// CSRRD rd, csr
+	{mask: 0xff0003e0, value: 0x04000000, op: CSRRD, args: instArgs{arg_rd, arg_csr_23_10}},
+	// CSRWR rd, csr
+	{mask: 0xff0003e0, value: 0x04000020, op: CSRWR, args: instArgs{arg_rd, arg_csr_23_10}},
+	// CSRXCHG rd, rj, csr
+	{mask: 0xff000000, value: 0x04000000, op: CSRXCHG, args: instArgs{arg_rd, arg_rj, arg_csr_23_10}},
+	// CTO.D rd, rj
+	{mask: 0xfffffc00, value: 0x00002800, op: CTO_D, args: instArgs{arg_rd, arg_rj}},
+	// CTO.W rd, rj
+	{mask: 0xfffffc00, value: 0x00001800, op: CTO_W, args: instArgs{arg_rd, arg_rj}},
+	// CTZ.D rd, rj
+	{mask: 0xfffffc00, value: 0x00002c00, op: CTZ_D, args: instArgs{arg_rd, arg_rj}},
+	// CTZ.W rd, rj
+	{mask: 0xfffffc00, value: 0x00001c00, op: CTZ_W, args: instArgs{arg_rd, arg_rj}},
+	// DBAR hint
+	{mask: 0xffff8000, value: 0x38720000, op: DBAR, args: instArgs{arg_hint_14_0}},
+	// DBCL code
+	{mask: 0xffff8000, value: 0x002a8000, op: DBCL, args: instArgs{arg_code_14_0}},
+	// DIV.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x00220000, op: DIV_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// DIV.DU rd, rj, rk
+	{mask: 0xffff8000, value: 0x00230000, op: DIV_DU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// DIV.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00200000, op: DIV_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// DIV.WU rd, rj, rk
+	{mask: 0xffff8000, value: 0x00210000, op: DIV_WU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// ERTN
+	{mask: 0xffffffff, value: 0x06483800, op: ERTN, args: instArgs{}},
+	// EXT.W.B rd, rj
+	{mask: 0xfffffc00, value: 0x00005c00, op: EXT_W_B, args: instArgs{arg_rd, arg_rj}},
+	// EXT.W.H rd, rj
+	{mask: 0xfffffc00, value: 0x00005800, op: EXT_W_H, args: instArgs{arg_rd, arg_rj}},
+	// FABS.D fd, fj
+	{mask: 0xfffffc00, value: 0x01140800, op: FABS_D, args: instArgs{arg_fd, arg_fj}},
+	// FABS.S fd, fj
+	{mask: 0xfffffc00, value: 0x01140400, op: FABS_S, args: instArgs{arg_fd, arg_fj}},
+	// FADD.D fd, fj, fk
+	{mask: 0xffff8000, value: 0x01010000, op: FADD_D, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FADD.S fd, fj, fk
+	{mask: 0xffff8000, value: 0x01008000, op: FADD_S, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FCLASS.D fd, fj
+	{mask: 0xfffffc00, value: 0x01143800, op: FCLASS_D, args: instArgs{arg_fd, arg_fj}},
+	// FCLASS.S fd, fj
+	{mask: 0xfffffc00, value: 0x01143400, op: FCLASS_S, args: instArgs{arg_fd, arg_fj}},
+	// FCMP.CAF.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c200000, op: FCMP_CAF_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CAF.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c100000, op: FCMP_CAF_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CEQ.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c220000, op: FCMP_CEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CEQ.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c120000, op: FCMP_CEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CLE.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c230000, op: FCMP_CLE_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CLE.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c130000, op: FCMP_CLE_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CLT.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c210000, op: FCMP_CLT_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CLT.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c110000, op: FCMP_CLT_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CNE.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c280000, op: FCMP_CNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CNE.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c180000, op: FCMP_CNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.COR.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c2a0000, op: FCMP_COR_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.COR.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c1a0000, op: FCMP_COR_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CUEQ.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c260000, op: FCMP_CUEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CUEQ.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c160000, op: FCMP_CUEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CULE.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c270000, op: FCMP_CULE_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CULE.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c170000, op: FCMP_CULE_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CULT.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c250000, op: FCMP_CULT_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CULT.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c150000, op: FCMP_CULT_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CUNE.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c2c0000, op: FCMP_CUNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CUNE.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c1c0000, op: FCMP_CUNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CUN.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c240000, op: FCMP_CUN_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.CUN.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c140000, op: FCMP_CUN_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SAF.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c208000, op: FCMP_SAF_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SAF.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c108000, op: FCMP_SAF_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SEQ.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c228000, op: FCMP_SEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SEQ.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c128000, op: FCMP_SEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SLE.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c238000, op: FCMP_SLE_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SLE.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c138000, op: FCMP_SLE_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SLT.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c218000, op: FCMP_SLT_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SLT.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c118000, op: FCMP_SLT_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SNE.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c288000, op: FCMP_SNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SNE.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c188000, op: FCMP_SNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SOR.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c2a8000, op: FCMP_SOR_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SOR.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c1a8000, op: FCMP_SOR_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SUEQ.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c268000, op: FCMP_SUEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SUEQ.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c168000, op: FCMP_SUEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SULE.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c278000, op: FCMP_SULE_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SULE.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c178000, op: FCMP_SULE_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SULT.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c258000, op: FCMP_SULT_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SULT.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c158000, op: FCMP_SULT_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SUNE.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c2c8000, op: FCMP_SUNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SUNE.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c1c8000, op: FCMP_SUNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SUN.D cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c248000, op: FCMP_SUN_D, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCMP.SUN.S cd, fj, fk
+	{mask: 0xffff8018, value: 0x0c148000, op: FCMP_SUN_S, args: instArgs{arg_cd, arg_fj, arg_fk}},
+	// FCOPYSIGN.D fd, fj, fk
+	{mask: 0xffff8000, value: 0x01130000, op: FCOPYSIGN_D, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FCOPYSIGN.S fd, fj, fk
+	{mask: 0xffff8000, value: 0x01128000, op: FCOPYSIGN_S, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FCVT.D.S fd, fj
+	{mask: 0xfffffc00, value: 0x01192400, op: FCVT_D_S, args: instArgs{arg_fd, arg_fj}},
+	// FCVT.S.D fd, fj
+	{mask: 0xfffffc00, value: 0x01191800, op: FCVT_S_D, args: instArgs{arg_fd, arg_fj}},
+	// FDIV.D fd, fj, fk
+	{mask: 0xffff8000, value: 0x01070000, op: FDIV_D, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FDIV.S fd, fj, fk
+	{mask: 0xffff8000, value: 0x01068000, op: FDIV_S, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FFINT.D.L fd, fj
+	{mask: 0xfffffc00, value: 0x011d2800, op: FFINT_D_L, args: instArgs{arg_fd, arg_fj}},
+	// FFINT.D.W fd, fj
+	{mask: 0xfffffc00, value: 0x011d2000, op: FFINT_D_W, args: instArgs{arg_fd, arg_fj}},
+	// FFINT.S.L fd, fj
+	{mask: 0xfffffc00, value: 0x011d1800, op: FFINT_S_L, args: instArgs{arg_fd, arg_fj}},
+	// FFINT.S.W fd, fj
+	{mask: 0xfffffc00, value: 0x011d1000, op: FFINT_S_W, args: instArgs{arg_fd, arg_fj}},
+	// FLDGT.D fd, rj, rk
+	{mask: 0xffff8000, value: 0x38748000, op: FLDGT_D, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FLDGT.S fd, rj, rk
+	{mask: 0xffff8000, value: 0x38740000, op: FLDGT_S, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FLDLE.D fd, rj, rk
+	{mask: 0xffff8000, value: 0x38758000, op: FLDLE_D, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FLDLE.S fd, rj, rk
+	{mask: 0xffff8000, value: 0x38750000, op: FLDLE_S, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FLDX.D fd, rj, rk
+	{mask: 0xffff8000, value: 0x38340000, op: FLDX_D, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FLDX.S fd, rj, rk
+	{mask: 0xffff8000, value: 0x38300000, op: FLDX_S, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FLD.D fd, rj, si12
+	{mask: 0xffc00000, value: 0x2b800000, op: FLD_D, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}},
+	// FLD.S fd, rj, si12
+	{mask: 0xffc00000, value: 0x2b000000, op: FLD_S, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}},
+	// FLOGB.D fd, fj
+	{mask: 0xfffffc00, value: 0x01142800, op: FLOGB_D, args: instArgs{arg_fd, arg_fj}},
+	// FLOGB.S fd, fj
+	{mask: 0xfffffc00, value: 0x01142400, op: FLOGB_S, args: instArgs{arg_fd, arg_fj}},
+	// FMADD.D fd, fj, fk, fa
+	{mask: 0xfff00000, value: 0x08200000, op: FMADD_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}},
+	// FMADD.S fd, fj, fk, fa
+	{mask: 0xfff00000, value: 0x08100000, op: FMADD_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}},
+	// FMAXA.D fd, fj, fk
+	{mask: 0xffff8000, value: 0x010d0000, op: FMAXA_D, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FMAXA.S fd, fj, fk
+	{mask: 0xffff8000, value: 0x010c8000, op: FMAXA_S, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FMAX.D fd, fj, fk
+	{mask: 0xffff8000, value: 0x01090000, op: FMAX_D, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FMAX.S fd, fj, fk
+	{mask: 0xffff8000, value: 0x01088000, op: FMAX_S, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FMINA.D fd, fj, fk
+	{mask: 0xffff8000, value: 0x010f0000, op: FMINA_D, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FMINA.S fd, fj, fk
+	{mask: 0xffff8000, value: 0x010e8000, op: FMINA_S, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FMIN.D fd, fj, fk
+	{mask: 0xffff8000, value: 0x010b0000, op: FMIN_D, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FMIN.S fd, fj, fk
+	{mask: 0xffff8000, value: 0x010a8000, op: FMIN_S, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FMOV.D fd, fj
+	{mask: 0xfffffc00, value: 0x01149800, op: FMOV_D, args: instArgs{arg_fd, arg_fj}},
+	// FMOV.S fd, fj
+	{mask: 0xfffffc00, value: 0x01149400, op: FMOV_S, args: instArgs{arg_fd, arg_fj}},
+	// FMSUB.D fd, fj, fk, fa
+	{mask: 0xfff00000, value: 0x08600000, op: FMSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}},
+	// FMSUB.S fd, fj, fk, fa
+	{mask: 0xfff00000, value: 0x08500000, op: FMSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}},
+	// FMUL.D fd, fj, fk
+	{mask: 0xffff8000, value: 0x01050000, op: FMUL_D, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FMUL.S fd, fj, fk
+	{mask: 0xffff8000, value: 0x01048000, op: FMUL_S, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FNEG.D fd, fj
+	{mask: 0xfffffc00, value: 0x01141800, op: FNEG_D, args: instArgs{arg_fd, arg_fj}},
+	// FNEG.S fd, fj
+	{mask: 0xfffffc00, value: 0x01141400, op: FNEG_S, args: instArgs{arg_fd, arg_fj}},
+	// FNMADD.D fd, fj, fk, fa
+	{mask: 0xfff00000, value: 0x08a00000, op: FNMADD_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}},
+	// FNMADD.S fd, fj, fk, fa
+	{mask: 0xfff00000, value: 0x08900000, op: FNMADD_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}},
+	// FNMSUB.D fd, fj, fk, fa
+	{mask: 0xfff00000, value: 0x08e00000, op: FNMSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}},
+	// FNMSUB.S fd, fj, fk, fa
+	{mask: 0xfff00000, value: 0x08d00000, op: FNMSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}},
+	// FRECIPE.D fd, fj
+	{mask: 0xfffffc00, value: 0x01147800, op: FRECIPE_D, args: instArgs{arg_fd, arg_fj}},
+	// FRECIPE.S fd, fj
+	{mask: 0xfffffc00, value: 0x01147400, op: FRECIPE_S, args: instArgs{arg_fd, arg_fj}},
+	// FRECIP.D fd, fj
+	{mask: 0xfffffc00, value: 0x01145800, op: FRECIP_D, args: instArgs{arg_fd, arg_fj}},
+	// FRECIP.S fd, fj
+	{mask: 0xfffffc00, value: 0x01145400, op: FRECIP_S, args: instArgs{arg_fd, arg_fj}},
+	// FRINT.D fd, fj
+	{mask: 0xfffffc00, value: 0x011e4800, op: FRINT_D, args: instArgs{arg_fd, arg_fj}},
+	// FRINT.S fd, fj
+	{mask: 0xfffffc00, value: 0x011e4400, op: FRINT_S, args: instArgs{arg_fd, arg_fj}},
+	// FRSQRTE.D fd, fj
+	{mask: 0xfffffc00, value: 0x01148800, op: FRSQRTE_D, args: instArgs{arg_fd, arg_fj}},
+	// FRSQRTE.S fd, fj
+	{mask: 0xfffffc00, value: 0x01148400, op: FRSQRTE_S, args: instArgs{arg_fd, arg_fj}},
+	// FRSQRT.D fd, fj
+	{mask: 0xfffffc00, value: 0x01146800, op: FRSQRT_D, args: instArgs{arg_fd, arg_fj}},
+	// FRSQRT.S fd, fj
+	{mask: 0xfffffc00, value: 0x01146400, op: FRSQRT_S, args: instArgs{arg_fd, arg_fj}},
+	// FSCALEB.D fd, fj, fk
+	{mask: 0xffff8000, value: 0x01110000, op: FSCALEB_D, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FSCALEB.S fd, fj, fk
+	{mask: 0xffff8000, value: 0x01108000, op: FSCALEB_S, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FSEL fd, fj, fk, ca
+	{mask: 0xfffc0000, value: 0x0d000000, op: FSEL, args: instArgs{arg_fd, arg_fj, arg_fk, arg_ca}},
+	// FSQRT.D fd, fj
+	{mask: 0xfffffc00, value: 0x01144800, op: FSQRT_D, args: instArgs{arg_fd, arg_fj}},
+	// FSQRT.S fd, fj
+	{mask: 0xfffffc00, value: 0x01144400, op: FSQRT_S, args: instArgs{arg_fd, arg_fj}},
+	// FSTGT.D fd, rj, rk
+	{mask: 0xffff8000, value: 0x38768000, op: FSTGT_D, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FSTGT.S fd, rj, rk
+	{mask: 0xffff8000, value: 0x38760000, op: FSTGT_S, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FSTLE.D fd, rj, rk
+	{mask: 0xffff8000, value: 0x38778000, op: FSTLE_D, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FSTLE.S fd, rj, rk
+	{mask: 0xffff8000, value: 0x38770000, op: FSTLE_S, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FSTX.D fd, rj, rk
+	{mask: 0xffff8000, value: 0x383c0000, op: FSTX_D, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FSTX.S fd, rj, rk
+	{mask: 0xffff8000, value: 0x38380000, op: FSTX_S, args: instArgs{arg_fd, arg_rj, arg_rk}},
+	// FST.D fd, rj, si12
+	{mask: 0xffc00000, value: 0x2bc00000, op: FST_D, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}},
+	// FST.S fd, rj, si12
+	{mask: 0xffc00000, value: 0x2b400000, op: FST_S, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}},
+	// FSUB.D fd, fj, fk
+	{mask: 0xffff8000, value: 0x01030000, op: FSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FSUB.S fd, fj, fk
+	{mask: 0xffff8000, value: 0x01028000, op: FSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk}},
+	// FTINTRM.L.D fd, fj
+	{mask: 0xfffffc00, value: 0x011a2800, op: FTINTRM_L_D, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRM.L.S fd, fj
+	{mask: 0xfffffc00, value: 0x011a2400, op: FTINTRM_L_S, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRM.W.D fd, fj
+	{mask: 0xfffffc00, value: 0x011a0800, op: FTINTRM_W_D, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRM.W.S fd, fj
+	{mask: 0xfffffc00, value: 0x011a0400, op: FTINTRM_W_S, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRNE.L.D fd, fj
+	{mask: 0xfffffc00, value: 0x011ae800, op: FTINTRNE_L_D, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRNE.L.S fd, fj
+	{mask: 0xfffffc00, value: 0x011ae400, op: FTINTRNE_L_S, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRNE.W.D fd, fj
+	{mask: 0xfffffc00, value: 0x011ac800, op: FTINTRNE_W_D, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRNE.W.S fd, fj
+	{mask: 0xfffffc00, value: 0x011ac400, op: FTINTRNE_W_S, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRP.L.D fd, fj
+	{mask: 0xfffffc00, value: 0x011a6800, op: FTINTRP_L_D, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRP.L.S fd, fj
+	{mask: 0xfffffc00, value: 0x011a6400, op: FTINTRP_L_S, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRP.W.D fd, fj
+	{mask: 0xfffffc00, value: 0x011a4800, op: FTINTRP_W_D, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRP.W.S fd, fj
+	{mask: 0xfffffc00, value: 0x011a4400, op: FTINTRP_W_S, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRZ.L.D fd, fj
+	{mask: 0xfffffc00, value: 0x011aa800, op: FTINTRZ_L_D, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRZ.L.S fd, fj
+	{mask: 0xfffffc00, value: 0x011aa400, op: FTINTRZ_L_S, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRZ.W.D fd, fj
+	{mask: 0xfffffc00, value: 0x011a8800, op: FTINTRZ_W_D, args: instArgs{arg_fd, arg_fj}},
+	// FTINTRZ.W.S fd, fj
+	{mask: 0xfffffc00, value: 0x011a8400, op: FTINTRZ_W_S, args: instArgs{arg_fd, arg_fj}},
+	// FTINT.L.D fd, fj
+	{mask: 0xfffffc00, value: 0x011b2800, op: FTINT_L_D, args: instArgs{arg_fd, arg_fj}},
+	// FTINT.L.S fd, fj
+	{mask: 0xfffffc00, value: 0x011b2400, op: FTINT_L_S, args: instArgs{arg_fd, arg_fj}},
+	// FTINT.W.D fd, fj
+	{mask: 0xfffffc00, value: 0x011b0800, op: FTINT_W_D, args: instArgs{arg_fd, arg_fj}},
+	// FTINT.W.S fd, fj
+	{mask: 0xfffffc00, value: 0x011b0400, op: FTINT_W_S, args: instArgs{arg_fd, arg_fj}},
+	// IBAR hint
+	{mask: 0xffff8000, value: 0x38728000, op: IBAR, args: instArgs{arg_hint_14_0}},
+	// IDLE level
+	{mask: 0xffff8000, value: 0x06488000, op: IDLE, args: instArgs{arg_level_14_0}},
+	// INVTLB op, rj, rk
+	{mask: 0xffff8000, value: 0x06498000, op: INVTLB, args: instArgs{arg_op_4_0, arg_rj, arg_rk}},
+	// IOCSRRD.B rd, rj
+	{mask: 0xfffffc00, value: 0x06480000, op: IOCSRRD_B, args: instArgs{arg_rd, arg_rj}},
+	// IOCSRRD.D rd, rj
+	{mask: 0xfffffc00, value: 0x06480c00, op: IOCSRRD_D, args: instArgs{arg_rd, arg_rj}},
+	// IOCSRRD.H rd, rj
+	{mask: 0xfffffc00, value: 0x06480400, op: IOCSRRD_H, args: instArgs{arg_rd, arg_rj}},
+	// IOCSRRD.W rd, rj
+	{mask: 0xfffffc00, value: 0x06480800, op: IOCSRRD_W, args: instArgs{arg_rd, arg_rj}},
+	// IOCSRWR.B rd, rj
+	{mask: 0xfffffc00, value: 0x06481000, op: IOCSRWR_B, args: instArgs{arg_rd, arg_rj}},
+	// IOCSRWR.D rd, rj
+	{mask: 0xfffffc00, value: 0x06481c00, op: IOCSRWR_D, args: instArgs{arg_rd, arg_rj}},
+	// IOCSRWR.H rd, rj
+	{mask: 0xfffffc00, value: 0x06481400, op: IOCSRWR_H, args: instArgs{arg_rd, arg_rj}},
+	// IOCSRWR.W rd, rj
+	{mask: 0xfffffc00, value: 0x06481800, op: IOCSRWR_W, args: instArgs{arg_rd, arg_rj}},
+	// JIRL rd, rj, offs
+	{mask: 0xfc000000, value: 0x4c000000, op: JIRL, args: instArgs{arg_rd, arg_rj, arg_offset_15_0}},
+	// LDDIR rd, rj, level
+	{mask: 0xfffc0000, value: 0x06400000, op: LDDIR, args: instArgs{arg_rd, arg_rj, arg_level_17_10}},
+	// LDGT.B rd, rj, rk
+	{mask: 0xffff8000, value: 0x38780000, op: LDGT_B, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDGT.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x38798000, op: LDGT_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDGT.H rd, rj, rk
+	{mask: 0xffff8000, value: 0x38788000, op: LDGT_H, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDGT.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x38790000, op: LDGT_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDLE.B rd, rj, rk
+	{mask: 0xffff8000, value: 0x387a0000, op: LDLE_B, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDLE.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x387b8000, op: LDLE_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDLE.H rd, rj, rk
+	{mask: 0xffff8000, value: 0x387a8000, op: LDLE_H, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDLE.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x387b0000, op: LDLE_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDPTE rj, seq
+	{mask: 0xfffc001f, value: 0x06440000, op: LDPTE, args: instArgs{arg_rj, arg_seq_17_10}},
+	// LDPTR.D rd, rj, si14
+	{mask: 0xff000000, value: 0x26000000, op: LDPTR_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}},
+	// LDPTR.W rd, rj, si14
+	{mask: 0xff000000, value: 0x24000000, op: LDPTR_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}},
+	// LDX.B rd, rj, rk
+	{mask: 0xffff8000, value: 0x38000000, op: LDX_B, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDX.BU rd, rj, rk
+	{mask: 0xffff8000, value: 0x38200000, op: LDX_BU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDX.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x380c0000, op: LDX_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDX.H rd, rj, rk
+	{mask: 0xffff8000, value: 0x38040000, op: LDX_H, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDX.HU rd, rj, rk
+	{mask: 0xffff8000, value: 0x38240000, op: LDX_HU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDX.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x38080000, op: LDX_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LDX.WU rd, rj, rk
+	{mask: 0xffff8000, value: 0x38280000, op: LDX_WU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// LD.B rd, rj, si12
+	{mask: 0xffc00000, value: 0x28000000, op: LD_B, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// LD.BU rd, rj, si12
+	{mask: 0xffc00000, value: 0x2a000000, op: LD_BU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// LD.D rd, rj, si12
+	{mask: 0xffc00000, value: 0x28c00000, op: LD_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// LD.H rd, rj, si12
+	{mask: 0xffc00000, value: 0x28400000, op: LD_H, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// LD.HU rd, rj, si12
+	{mask: 0xffc00000, value: 0x2a400000, op: LD_HU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// LD.W rd, rj, si12
+	{mask: 0xffc00000, value: 0x28800000, op: LD_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// LD.WU rd, rj, si12
+	{mask: 0xffc00000, value: 0x2a800000, op: LD_WU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// LLACQ.D rd, rj
+	{mask: 0xfffffc00, value: 0x38578800, op: LLACQ_D, args: instArgs{arg_rd, arg_rj}},
+	// LLACQ.W rd, rj
+	{mask: 0xfffffc00, value: 0x38578000, op: LLACQ_W, args: instArgs{arg_rd, arg_rj}},
+	// LL.D rd, rj, si14
+	{mask: 0xff000000, value: 0x22000000, op: LL_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}},
+	// LL.W rd, rj, si14
+	{mask: 0xff000000, value: 0x20000000, op: LL_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}},
+	// LU12I.W rd, si20
+	{mask: 0xfe000000, value: 0x14000000, op: LU12I_W, args: instArgs{arg_rd, arg_si20_24_5}},
+	// LU32I.D rd, si20
+	{mask: 0xfe000000, value: 0x16000000, op: LU32I_D, args: instArgs{arg_rd, arg_si20_24_5}},
+	// LU52I.D rd, rj, si12
+	{mask: 0xffc00000, value: 0x03000000, op: LU52I_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// MASKEQZ rd, rj, rk
+	{mask: 0xffff8000, value: 0x00130000, op: MASKEQZ, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MASKNEZ rd, rj, rk
+	{mask: 0xffff8000, value: 0x00138000, op: MASKNEZ, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MOD.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x00228000, op: MOD_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MOD.DU rd, rj, rk
+	{mask: 0xffff8000, value: 0x00238000, op: MOD_DU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MOD.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00208000, op: MOD_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MOD.WU rd, rj, rk
+	{mask: 0xffff8000, value: 0x00218000, op: MOD_WU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MOVCF2FR fd, cj
+	{mask: 0xffffff00, value: 0x0114d400, op: MOVCF2FR, args: instArgs{arg_fd, arg_cj}},
+	// MOVCF2GR rd, cj
+	{mask: 0xffffff00, value: 0x0114dc00, op: MOVCF2GR, args: instArgs{arg_rd, arg_cj}},
+	// MOVFCSR2GR rd, fcsr
+	{mask: 0xfffffc00, value: 0x0114c800, op: MOVFCSR2GR, args: instArgs{arg_rd, arg_fcsr_9_5}},
+	// MOVFR2CF cd, fj
+	{mask: 0xfffffc18, value: 0x0114d000, op: MOVFR2CF, args: instArgs{arg_cd, arg_fj}},
+	// MOVFR2GR.D rd, fj
+	{mask: 0xfffffc00, value: 0x0114b800, op: MOVFR2GR_D, args: instArgs{arg_rd, arg_fj}},
+	// MOVFR2GR.S rd, fj
+	{mask: 0xfffffc00, value: 0x0114b400, op: MOVFR2GR_S, args: instArgs{arg_rd, arg_fj}},
+	// MOVFRH2GR.S rd, fj
+	{mask: 0xfffffc00, value: 0x0114bc00, op: MOVFRH2GR_S, args: instArgs{arg_rd, arg_fj}},
+	// MOVGR2CF cd, rj
+	{mask: 0xfffffc18, value: 0x0114d800, op: MOVGR2CF, args: instArgs{arg_cd, arg_rj}},
+	// MOVGR2FCSR fcsr, rj
+	{mask: 0xfffffc00, value: 0x0114c000, op: MOVGR2FCSR, args: instArgs{arg_fcsr_4_0, arg_rj}},
+	// MOVGR2FRH.W fd, rj
+	{mask: 0xfffffc00, value: 0x0114ac00, op: MOVGR2FRH_W, args: instArgs{arg_fd, arg_rj}},
+	// MOVGR2FR.D fd, rj
+	{mask: 0xfffffc00, value: 0x0114a800, op: MOVGR2FR_D, args: instArgs{arg_fd, arg_rj}},
+	// MOVGR2FR.W fd, rj
+	{mask: 0xfffffc00, value: 0x0114a400, op: MOVGR2FR_W, args: instArgs{arg_fd, arg_rj}},
+	// MULH.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x001e0000, op: MULH_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MULH.DU rd, rj, rk
+	{mask: 0xffff8000, value: 0x001e8000, op: MULH_DU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MULH.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x001c8000, op: MULH_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MULH.WU rd, rj, rk
+	{mask: 0xffff8000, value: 0x001d0000, op: MULH_WU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MULW.D.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x001f0000, op: MULW_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MULW.D.WU rd, rj, rk
+	{mask: 0xffff8000, value: 0x001f8000, op: MULW_D_WU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MUL.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x001d8000, op: MUL_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// MUL.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x001c0000, op: MUL_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// NOR rd, rj, rk
+	{mask: 0xffff8000, value: 0x00140000, op: NOR, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// OR rd, rj, rk
+	{mask: 0xffff8000, value: 0x00150000, op: OR, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// ORI rd, rj, ui12
+	{mask: 0xffc00000, value: 0x03800000, op: ORI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}},
+	// ORN rd, rj, rk
+	{mask: 0xffff8000, value: 0x00160000, op: ORN, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// PCADDI rd, si20
+	{mask: 0xfe000000, value: 0x18000000, op: PCADDI, args: instArgs{arg_rd, arg_si20_24_5}},
+	// PCADDU12I rd, si20
+	{mask: 0xfe000000, value: 0x1c000000, op: PCADDU12I, args: instArgs{arg_rd, arg_si20_24_5}},
+	// PCADDU18I rd, si20
+	{mask: 0xfe000000, value: 0x1e000000, op: PCADDU18I, args: instArgs{arg_rd, arg_si20_24_5}},
+	// PCALAU12I rd, si20
+	{mask: 0xfe000000, value: 0x1a000000, op: PCALAU12I, args: instArgs{arg_rd, arg_si20_24_5}},
+	// PRELD hint, rj, si12
+	{mask: 0xffc00000, value: 0x2ac00000, op: PRELD, args: instArgs{arg_hint_4_0, arg_rj, arg_si12_21_10}},
+	// PRELDX hint, rj, rk
+	{mask: 0xffff8000, value: 0x382c0000, op: PRELDX, args: instArgs{arg_hint_4_0, arg_rj, arg_rk}},
+	// RDTIMEH.W rd, rj
+	{mask: 0xfffffc00, value: 0x00006400, op: RDTIMEH_W, args: instArgs{arg_rd, arg_rj}},
+	// RDTIMEL.W rd, rj
+	{mask: 0xfffffc00, value: 0x00006000, op: RDTIMEL_W, args: instArgs{arg_rd, arg_rj}},
+	// RDTIME.D rd, rj
+	{mask: 0xfffffc00, value: 0x00006800, op: RDTIME_D, args: instArgs{arg_rd, arg_rj}},
+	// REVB.2H rd, rj
+	{mask: 0xfffffc00, value: 0x00003000, op: REVB_2H, args: instArgs{arg_rd, arg_rj}},
+	// REVB.2W rd, rj
+	{mask: 0xfffffc00, value: 0x00003800, op: REVB_2W, args: instArgs{arg_rd, arg_rj}},
+	// REVB.4H rd, rj
+	{mask: 0xfffffc00, value: 0x00003400, op: REVB_4H, args: instArgs{arg_rd, arg_rj}},
+	// REVB.D rd, rj
+	{mask: 0xfffffc00, value: 0x00003c00, op: REVB_D, args: instArgs{arg_rd, arg_rj}},
+	// REVH.2W rd, rj
+	{mask: 0xfffffc00, value: 0x00004000, op: REVH_2W, args: instArgs{arg_rd, arg_rj}},
+	// REVH.D rd, rj
+	{mask: 0xfffffc00, value: 0x00004400, op: REVH_D, args: instArgs{arg_rd, arg_rj}},
+	// ROTRI.D rd, rj, ui6
+	{mask: 0xffff0000, value: 0x004d0000, op: ROTRI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}},
+	// ROTRI.W rd, rj, ui5
+	{mask: 0xffff8000, value: 0x004c8000, op: ROTRI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}},
+	// ROTR.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x001b8000, op: ROTR_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// ROTR.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x001b0000, op: ROTR_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// SCREL.D rd, rj
+	{mask: 0xfffffc00, value: 0x38578c00, op: SCREL_D, args: instArgs{arg_rd, arg_rj}},
+	// SCREL.W rd, rj
+	{mask: 0xfffffc00, value: 0x38578400, op: SCREL_W, args: instArgs{arg_rd, arg_rj}},
+	// SC.D rd, rj, si14
+	{mask: 0xff000000, value: 0x23000000, op: SC_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}},
+	// SC.Q rd, rk, rj
+	{mask: 0xffff8000, value: 0x38570000, op: SC_Q, args: instArgs{arg_rd, arg_rk, arg_rj}},
+	// SC.W rd, rj, si14
+	{mask: 0xff000000, value: 0x21000000, op: SC_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}},
+	// SLLI.D rd, rj, ui6
+	{mask: 0xffff0000, value: 0x00410000, op: SLLI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}},
+	// SLLI.W rd, rj, ui5
+	{mask: 0xffff8000, value: 0x00408000, op: SLLI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}},
+	// SLL.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x00188000, op: SLL_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// SLL.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00170000, op: SLL_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// SLT rd, rj, rk
+	{mask: 0xffff8000, value: 0x00120000, op: SLT, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// SLTI rd, rj, si12
+	{mask: 0xffc00000, value: 0x02000000, op: SLTI, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// SLTU rd, rj, rk
+	{mask: 0xffff8000, value: 0x00128000, op: SLTU, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// SLTUI rd, rj, si12
+	{mask: 0xffc00000, value: 0x02400000, op: SLTUI, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// SRAI.D rd, rj, ui6
+	{mask: 0xffff0000, value: 0x00490000, op: SRAI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}},
+	// SRAI.W rd, rj, ui5
+	{mask: 0xffff8000, value: 0x00488000, op: SRAI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}},
+	// SRA.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x00198000, op: SRA_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// SRA.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00180000, op: SRA_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// SRLI.D rd, rj, ui6
+	{mask: 0xffff0000, value: 0x00450000, op: SRLI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}},
+	// SRLI.W rd, rj, ui5
+	{mask: 0xffff8000, value: 0x00448000, op: SRLI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}},
+	// SRL.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x00190000, op: SRL_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// SRL.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00178000, op: SRL_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STGT.B rd, rj, rk
+	{mask: 0xffff8000, value: 0x387c0000, op: STGT_B, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STGT.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x387d8000, op: STGT_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STGT.H rd, rj, rk
+	{mask: 0xffff8000, value: 0x387c8000, op: STGT_H, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STGT.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x387d0000, op: STGT_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STLE.B rd, rj, rk
+	{mask: 0xffff8000, value: 0x387e0000, op: STLE_B, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STLE.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x387f8000, op: STLE_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STLE.H rd, rj, rk
+	{mask: 0xffff8000, value: 0x387e8000, op: STLE_H, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STLE.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x387f0000, op: STLE_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STPTR.D rd, rj, si14
+	{mask: 0xff000000, value: 0x27000000, op: STPTR_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}},
+	// STPTR.W rd, rj, si14
+	{mask: 0xff000000, value: 0x25000000, op: STPTR_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}},
+	// STX.B rd, rj, rk
+	{mask: 0xffff8000, value: 0x38100000, op: STX_B, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STX.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x381c0000, op: STX_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STX.H rd, rj, rk
+	{mask: 0xffff8000, value: 0x38140000, op: STX_H, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// STX.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x38180000, op: STX_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// ST.B rd, rj, si12
+	{mask: 0xffc00000, value: 0x29000000, op: ST_B, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// ST.D rd, rj, si12
+	{mask: 0xffc00000, value: 0x29c00000, op: ST_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// ST.H rd, rj, si12
+	{mask: 0xffc00000, value: 0x29400000, op: ST_H, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// ST.W rd, rj, si12
+	{mask: 0xffc00000, value: 0x29800000, op: ST_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}},
+	// SUB.D rd, rj, rk
+	{mask: 0xffff8000, value: 0x00118000, op: SUB_D, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// SUB.W rd, rj, rk
+	{mask: 0xffff8000, value: 0x00110000, op: SUB_W, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// SYSCALL code
+	{mask: 0xffff8000, value: 0x002b0000, op: SYSCALL, args: instArgs{arg_code_14_0}},
+	// TLBCLR
+	{mask: 0xffffffff, value: 0x06482000, op: TLBCLR, args: instArgs{}},
+	// TLBFILL
+	{mask: 0xffffffff, value: 0x06483400, op: TLBFILL, args: instArgs{}},
+	// TLBFLUSH
+	{mask: 0xffffffff, value: 0x06482400, op: TLBFLUSH, args: instArgs{}},
+	// TLBRD
+	{mask: 0xffffffff, value: 0x06482c00, op: TLBRD, args: instArgs{}},
+	// TLBSRCH
+	{mask: 0xffffffff, value: 0x06482800, op: TLBSRCH, args: instArgs{}},
+	// TLBWR
+	{mask: 0xffffffff, value: 0x06483000, op: TLBWR, args: instArgs{}},
+	// XOR rd, rj, rk
+	{mask: 0xffff8000, value: 0x00158000, op: XOR, args: instArgs{arg_rd, arg_rj, arg_rk}},
+	// XORI rd, rj, ui12
+	{mask: 0xffc00000, value: 0x03c00000, op: XORI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}},
+}
diff --git a/loong64/loong64asm/testdata/gnucases.txt b/loong64/loong64asm/testdata/gnucases.txt
new file mode 100644
index 00000000..2cfd5ea1
--- /dev/null
+++ b/loong64/loong64asm/testdata/gnucases.txt
@@ -0,0 +1,415 @@
+ac391000|	add.w $t0, $t1, $t2
+acb91000|	add.d $t0, $t1, $t2
+ac41c002|	addi.d $t0, $t1, 16
+ac01e002|	addi.d $t0, $t1, -2048
+acfdff02|	addi.d $t0, $t1, -1
+ac05e002|	addi.d $t0, $t1, -2047
+acf9ff02|	addi.d $t0, $t1, -2
+acfdff02|	addi.d $t0, $t1, -1
+ac418002|	addi.w $t0, $t1, 16
+ac410010|	addu16i.d $t0, $t1, 16
+a4fcff13|	addu16i.d $a0, $a1, -1
+acb92c00|	alsl.d $t0, $t1, $t2, 0x2
+acb90400|	alsl.w $t0, $t1, $t2, 0x2
+acb90600|	alsl.wu $t0, $t1, $t2, 0x2
+ccb56138|	amadd.d $t0, $t1, $t2
+cc356138|	amadd.w $t0, $t1, $t2
+ccb56a38|	amadd_db.d $t0, $t1, $t2
+cc356a38|	amadd_db.w $t0, $t1, $t2
+ccb56238|	amand.d $t0, $t1, $t2
+cc356238|	amand.w $t0, $t1, $t2
+ccb56b38|	amand_db.d $t0, $t1, $t2
+cc356b38|	amand_db.w $t0, $t1, $t2
+ccb56538|	ammax.d $t0, $t1, $t2
+ccb56738|	ammax.du $t0, $t1, $t2
+cc356538|	ammax.w $t0, $t1, $t2
+cc356738|	ammax.wu $t0, $t1, $t2
+ccb56e38|	ammax_db.d $t0, $t1, $t2
+ccb57038|	ammax_db.du $t0, $t1, $t2
+cc356e38|	ammax_db.w $t0, $t1, $t2
+cc357038|	ammax_db.wu $t0, $t1, $t2
+ccb56638|	ammin.d $t0, $t1, $t2
+ccb56838|	ammin.du $t0, $t1, $t2
+cc356638|	ammin.w $t0, $t1, $t2
+cc356838|	ammin.wu $t0, $t1, $t2
+ccb56f38|	ammin_db.d $t0, $t1, $t2
+ccb57138|	ammin_db.du $t0, $t1, $t2
+cc356f38|	ammin_db.w $t0, $t1, $t2
+cc357138|	ammin_db.wu $t0, $t1, $t2
+ccb56338|	amor.d $t0, $t1, $t2
+cc356338|	amor.w $t0, $t1, $t2
+ccb56c38|	amor_db.d $t0, $t1, $t2
+cc356c38|	amor_db.w $t0, $t1, $t2
+ccb56038|	amswap.d $t0, $t1, $t2
+cc356038|	amswap.w $t0, $t1, $t2
+ccb56938|	amswap_db.d $t0, $t1, $t2
+cc356938|	amswap_db.w $t0, $t1, $t2
+ccb56438|	amxor.d $t0, $t1, $t2
+cc356438|	amxor.w $t0, $t1, $t2
+ccb56d38|	amxor_db.d $t0, $t1, $t2
+cc356d38|	amxor_db.w $t0, $t1, $t2
+acb91400|	and $t0, $t1, $t2
+ac414003|	andi $t0, $t1, 0x10
+acb91600|	andn $t0, $t1, $t2
+00004003|	nop
+a0b90100|	asrtgt.d $t1, $t2
+a0390100|	asrtle.d $t1, $t2
+00100050|	b 16
+20100048|	bceqz $fcc1, 16
+20110048|	bcnez $fcc1, 16
+1ff1ff4b|	bcnez $fcc0, -16
+8d110058|	beq $t0, $t1, 16
+a0110040|	beqz $t1, 16
+9ff1ff43|	beqz $t0, -16
+8d110064|	bge $t0, $t1, 16
+8d11006c|	bgeu $t0, $t1, 16
+ac490000|	bitrev.4b $t0, $t1
+ac4d0000|	bitrev.8b $t0, $t1
+ac550000|	bitrev.d $t0, $t1
+ac510000|	bitrev.w $t0, $t1
+00100054|	bl 16
+8d110060|	blt $t0, $t1, 16
+8d110068|	bltu $t0, $t1, 16
+8d11005c|	bne $t0, $t1, 16
+a0110044|	bnez $t1, 16
+00002a00|	break 0x0
+ac158a00|	bstrins.d $t0, $t1, 0xa, 0x5
+ac156a00|	bstrins.w $t0, $t1, 0xa, 0x5
+ac15ca00|	bstrpick.d $t0, $t1, 0xa, 0x5
+ac956a00|	bstrpick.w $t0, $t1, 0xa, 0x5
+ac390d00|	bytepick.d $t0, $t1, $t2, 0x2
+ac390900|	bytepick.w $t0, $t1, $t2, 0x2
+84010406|	cacop 0x4, $t0, 256
+ac210000|	clo.d $t0, $t1
+ac110000|	clo.w $t0, $t1
+ac250000|	clz.d $t0, $t1
+ac150000|	clz.w $t0, $t1
+ac6d0000|	cpucfg $t0, $t1
+ac392400|	crc.w.b.w $t0, $t1, $t2
+acb92500|	crc.w.d.w $t0, $t1, $t2
+acb92400|	crc.w.h.w $t0, $t1, $t2
+ac392500|	crc.w.w.w $t0, $t1, $t2
+ac392600|	crcc.w.b.w $t0, $t1, $t2
+acb92700|	crcc.w.d.w $t0, $t1, $t2
+acb92600|	crcc.w.h.w $t0, $t1, $t2
+ac392700|	crcc.w.w.w $t0, $t1, $t2
+0c040004|	csrrd $t0, 0x1
+2c040004|	csrwr $t0, 0x1
+ac050004|	csrxchg $t0, $t1, 0x1
+ac290000|	cto.d $t0, $t1
+ac190000|	cto.w $t0, $t1
+ac2d0000|	ctz.d $t0, $t1
+ac1d0000|	ctz.w $t0, $t1
+00007238|	dbar 0x0
+10802a00|	dbcl 0x10
+ac392200|	div.d $t0, $t1, $t2
+ac392300|	div.du $t0, $t1, $t2
+ac392000|	div.w $t0, $t1, $t2
+ac392100|	div.wu $t0, $t1, $t2
+00384806|	ertn
+ac5d0000|	ext.w.b $t0, $t1
+ac590000|	ext.w.h $t0, $t1
+28091401|	fabs.d $ft0, $ft1
+28051401|	fabs.s $ft0, $ft1
+28250101|	fadd.d $ft0, $ft1, $ft1
+28a50001|	fadd.s $ft0, $ft1, $ft1
+28391401|	fclass.d $ft0, $ft1
+28351401|	fclass.s $ft0, $ft1
+2029200c|	fcmp.caf.d $fcc0, $ft1, $ft2
+2029100c|	fcmp.caf.s $fcc0, $ft1, $ft2
+2029220c|	fcmp.ceq.d $fcc0, $ft1, $ft2
+2029120c|	fcmp.ceq.s $fcc0, $ft1, $ft2
+2029230c|	fcmp.cle.d $fcc0, $ft1, $ft2
+2029130c|	fcmp.cle.s $fcc0, $ft1, $ft2
+2029210c|	fcmp.clt.d $fcc0, $ft1, $ft2
+2029110c|	fcmp.clt.s $fcc0, $ft1, $ft2
+2029280c|	fcmp.cne.d $fcc0, $ft1, $ft2
+2029180c|	fcmp.cne.s $fcc0, $ft1, $ft2
+20292a0c|	fcmp.cor.d $fcc0, $ft1, $ft2
+20291a0c|	fcmp.cor.s $fcc0, $ft1, $ft2
+2029260c|	fcmp.cueq.d $fcc0, $ft1, $ft2
+2029160c|	fcmp.cueq.s $fcc0, $ft1, $ft2
+2029270c|	fcmp.cule.d $fcc0, $ft1, $ft2
+2029170c|	fcmp.cule.s $fcc0, $ft1, $ft2
+2029250c|	fcmp.cult.d $fcc0, $ft1, $ft2
+2029150c|	fcmp.cult.s $fcc0, $ft1, $ft2
+20292c0c|	fcmp.cune.d $fcc0, $ft1, $ft2
+20291c0c|	fcmp.cune.s $fcc0, $ft1, $ft2
+2029240c|	fcmp.cun.d $fcc0, $ft1, $ft2
+2029140c|	fcmp.cun.s $fcc0, $ft1, $ft2
+20a9200c|	fcmp.saf.d $fcc0, $ft1, $ft2
+20a9100c|	fcmp.saf.s $fcc0, $ft1, $ft2
+20a9220c|	fcmp.seq.d $fcc0, $ft1, $ft2
+20a9120c|	fcmp.seq.s $fcc0, $ft1, $ft2
+20a9230c|	fcmp.sle.d $fcc0, $ft1, $ft2
+20a9130c|	fcmp.sle.s $fcc0, $ft1, $ft2
+20a9210c|	fcmp.slt.d $fcc0, $ft1, $ft2
+20a9110c|	fcmp.slt.s $fcc0, $ft1, $ft2
+20a9280c|	fcmp.sne.d $fcc0, $ft1, $ft2
+20a9180c|	fcmp.sne.s $fcc0, $ft1, $ft2
+20a92a0c|	fcmp.sor.d $fcc0, $ft1, $ft2
+20a91a0c|	fcmp.sor.s $fcc0, $ft1, $ft2
+20a9260c|	fcmp.sueq.d $fcc0, $ft1, $ft2
+20a9160c|	fcmp.sueq.s $fcc0, $ft1, $ft2
+20a9270c|	fcmp.sule.d $fcc0, $ft1, $ft2
+20a9170c|	fcmp.sule.s $fcc0, $ft1, $ft2
+20a9250c|	fcmp.sult.d $fcc0, $ft1, $ft2
+20a9150c|	fcmp.sult.s $fcc0, $ft1, $ft2
+20a92c0c|	fcmp.sune.d $fcc0, $ft1, $ft2
+20a91c0c|	fcmp.sune.s $fcc0, $ft1, $ft2
+20a9240c|	fcmp.sun.d $fcc0, $ft1, $ft2
+20a9140c|	fcmp.sun.s $fcc0, $ft1, $ft2
+28291301|	fcopysign.d $ft0, $ft1, $ft2
+28a91201|	fcopysign.s $ft0, $ft1, $ft2
+28251901|	fcvt.d.s $ft0, $ft1
+28191901|	fcvt.s.d $ft0, $ft1
+28290701|	fdiv.d $ft0, $ft1, $ft2
+28a90601|	fdiv.s $ft0, $ft1, $ft2
+28291d01|	ffint.d.l $ft0, $ft1
+28211d01|	ffint.d.w $ft0, $ft1
+28191d01|	ffint.s.l $ft0, $ft1
+28111d01|	ffint.s.w $ft0, $ft1
+a841802b|	fld.d $ft0, $t1, 16
+a841002b|	fld.s $ft0, $t1, 16
+a8b97438|	fldgt.d $ft0, $t1, $t2
+a8397438|	fldgt.s $ft0, $t1, $t2
+a8b97538|	fldle.d $ft0, $t1, $t2
+a8397538|	fldle.s $ft0, $t1, $t2
+a8393438|	fldx.d $ft0, $t1, $t2
+a8393038|	fldx.s $ft0, $t1, $t2
+28291401|	flogb.d $ft0, $ft1
+28251401|	flogb.s $ft0, $ft1
+28a92508|	fmadd.d $ft0, $ft1, $ft2, $ft3
+28a91508|	fmadd.s $ft0, $ft1, $ft2, $ft3
+28290901|	fmax.d $ft0, $ft1, $ft2
+28a90801|	fmax.s $ft0, $ft1, $ft2
+28290d01|	fmaxa.d $ft0, $ft1, $ft2
+28a90c01|	fmaxa.s $ft0, $ft1, $ft2
+28290b01|	fmin.d $ft0, $ft1, $ft2
+28a90a01|	fmin.s $ft0, $ft1, $ft2
+28290f01|	fmina.d $ft0, $ft1, $ft2
+28a90e01|	fmina.s $ft0, $ft1, $ft2
+48991401|	fmov.d $ft0, $ft2
+48951401|	fmov.s $ft0, $ft2
+28a96508|	fmsub.d $ft0, $ft1, $ft2, $ft3
+28a95508|	fmsub.s $ft0, $ft1, $ft2, $ft3
+28290501|	fmul.d $ft0, $ft1, $ft2
+28a90401|	fmul.s $ft0, $ft1, $ft2
+28191401|	fneg.d $ft0, $ft1
+28151401|	fneg.s $ft0, $ft1
+28a9a508|	fnmadd.d $ft0, $ft1, $ft2, $ft3
+28a99508|	fnmadd.s $ft0, $ft1, $ft2, $ft3
+28a9e508|	fnmsub.d $ft0, $ft1, $ft2, $ft3
+28a9d508|	fnmsub.s $ft0, $ft1, $ft2, $ft3
+28591401|	frecip.d $ft0, $ft1
+28551401|	frecip.s $ft0, $ft1
+28491e01|	frint.d $ft0, $ft1
+28451e01|	frint.s $ft0, $ft1
+28691401|	frsqrt.d $ft0, $ft1
+28651401|	frsqrt.s $ft0, $ft1
+28291101|	fscaleb.d $ft0, $ft1, $ft2
+28a91001|	fscaleb.s $ft0, $ft1, $ft2
+28a9000d|	fsel $ft0, $ft1, $ft2, $fcc1
+28491401|	fsqrt.d $ft0, $ft1
+28451401|	fsqrt.s $ft0, $ft1
+a841c02b|	fst.d $ft0, $t1, 16
+a841402b|	fst.s $ft0, $t1, 16
+a8b97638|	fstgt.d $ft0, $t1, $t2
+a8397638|	fstgt.s $ft0, $t1, $t2
+a8b97738|	fstle.d $ft0, $t1, $t2
+a8397738|	fstle.s $ft0, $t1, $t2
+a8393c38|	fstx.d $ft0, $t1, $t2
+a8393838|	fstx.s $ft0, $t1, $t2
+28290301|	fsub.d $ft0, $ft1, $ft2
+28a90201|	fsub.s $ft0, $ft1, $ft2
+28291b01|	ftint.l.d $ft0, $ft1
+28251b01|	ftint.l.s $ft0, $ft1
+28091b01|	ftint.w.d $ft0, $ft1
+28051b01|	ftint.w.s $ft0, $ft1
+28291a01|	ftintrm.l.d $ft0, $ft1
+28251a01|	ftintrm.l.s $ft0, $ft1
+28091a01|	ftintrm.w.d $ft0, $ft1
+28051a01|	ftintrm.w.s $ft0, $ft1
+28e91a01|	ftintrne.l.d $ft0, $ft1
+28e51a01|	ftintrne.l.s $ft0, $ft1
+28c91a01|	ftintrne.w.d $ft0, $ft1
+28c51a01|	ftintrne.w.s $ft0, $ft1
+28691a01|	ftintrp.l.d $ft0, $ft1
+28651a01|	ftintrp.l.s $ft0, $ft1
+28491a01|	ftintrp.w.d $ft0, $ft1
+28451a01|	ftintrp.w.s $ft0, $ft1
+28a91a01|	ftintrz.l.d $ft0, $ft1
+28a51a01|	ftintrz.l.s $ft0, $ft1
+28891a01|	ftintrz.w.d $ft0, $ft1
+28851a01|	ftintrz.w.s $ft0, $ft1
+00807238|	ibar 0x0
+10804806|	idle 0x10
+ac014806|	iocsrrd.b $t0, $t1
+ac054806|	iocsrrd.h $t0, $t1
+ac094806|	iocsrrd.w $t0, $t1
+ac0d4806|	iocsrrd.d $t0, $t1
+ac114806|	iocsrwr.b $t0, $t1
+ac154806|	iocsrwr.h $t0, $t1
+ac194806|	iocsrwr.w $t0, $t1
+ac1d4806|	iocsrwr.d $t0, $t1
+82b54906|	invtlb 0x2, $t0, $t1
+ac11004c|	jirl $t0, $t1, 16
+ac410028|	ld.b $t0, $t1, 16
+ac41002a|	ld.bu $t0, $t1, 16
+ac41c028|	ld.d $t0, $t1, 16
+ac414028|	ld.h $t0, $t1, 16
+ac41402a|	ld.hu $t0, $t1, 16
+ac418028|	ld.w $t0, $t1, 16
+ac41802a|	ld.wu $t0, $t1, 16
+ac414006|	lddir $t0, $t1, 0x10
+ac397838|	ldgt.b $t0, $t1, $t2
+acb97938|	ldgt.d $t0, $t1, $t2
+acb97838|	ldgt.h $t0, $t1, $t2
+ac397938|	ldgt.w $t0, $t1, $t2
+ac397a38|	ldle.b $t0, $t1, $t2
+acb97b38|	ldle.d $t0, $t1, $t2
+acb97a38|	ldle.h $t0, $t1, $t2
+ac397b38|	ldle.w $t0, $t1, $t2
+ac110026|	ldptr.d $t0, $t1, 16
+ac01e024|	ldptr.w $t0, $t1, -8192
+ac05f024|	ldptr.w $t0, $t1, -4092
+acfd1f24|	ldptr.w $t0, $t1, 8188
+acfdff24|	ldptr.w $t0, $t1, -4
+ac050024|	ldptr.w $t0, $t1, 4
+ac110024|	ldptr.w $t0, $t1, 16
+80094406|	ldpte $t0, 0x2
+ac390038|	ldx.b $t0, $t1, $t2
+ac392038|	ldx.bu $t0, $t1, $t2
+ac390c38|	ldx.d $t0, $t1, $t2
+ac390438|	ldx.h $t0, $t1, $t2
+ac392438|	ldx.hu $t0, $t1, $t2
+ac390838|	ldx.w $t0, $t1, $t2
+ac392838|	ldx.wu $t0, $t1, $t2
+ac110022|	ll.d $t0, $t1, 16
+ac110020|	ll.w $t0, $t1, 16
+0c020014|	lu12i.w $t0, 16
+0c000015|	lu12i.w $t0, -524288
+ecffff15|	lu12i.w $t0, -1
+ecffff14|	lu12i.w $t0, 524287
+0c020016|	lu32i.d $t0, 16
+ac410003|	lu52i.d $t0, $t1, 16
+ac391300|	maskeqz $t0, $t1, $t2
+acb91300|	masknez $t0, $t1, $t2
+acb92200|	mod.d $t0, $t1, $t2
+acb92300|	mod.du $t0, $t1, $t2
+acb92000|	mod.w $t0, $t1, $t2
+acb92100|	mod.wu $t0, $t1, $t2
+28d41401|	movcf2fr $ft0, $fcc1
+2cdc1401|	movcf2gr $t0, $fcc1
+0cc81401|	movfcsr2gr $t0, $fcsr0
+20d11401|	movfr2cf $fcc0, $ft1
+2cb91401|	movfr2gr.d $t0, $ft1
+2cb51401|	movfr2gr.s $t0, $ft1
+2cbd1401|	movfrh2gr.s $t0, $ft1
+a0d91401|	movgr2cf $fcc0, $t1
+80c11401|	movgr2fcsr $fcsr0, $t0
+a8a91401|	movgr2fr.d $ft0, $t1
+a8a51401|	movgr2fr.w $ft0, $t1
+a8ad1401|	movgr2frh.w $ft0, $t1
+acb91d00|	mul.d $t0, $t1, $t2
+ac391c00|	mul.w $t0, $t1, $t2
+ac391e00|	mulh.d $t0, $t1, $t2
+acb91e00|	mulh.du $t0, $t1, $t2
+acb91c00|	mulh.w $t0, $t1, $t2
+ac391d00|	mulh.wu $t0, $t1, $t2
+ac391f00|	mulw.d.w $t0, $t1, $t2
+acb91f00|	mulw.d.wu $t0, $t1, $t2
+ac391400|	nor $t0, $t1, $t2
+ac391500|	or $t0, $t1, $t2
+ac418003|	ori $t0, $t1, 0x10
+ac391600|	orn $t0, $t1, $t2
+0c020018|	pcaddi $t0, 16
+0c02001c|	pcaddu12i $t0, 16
+0c02001e|	pcaddu18i $t0, 16
+0c02001a|	pcalau12i $t0, 16
+a041c02a|	preld 0x0, $t1, 16
+a0392c38|	preldx 0x0, $t1, $t2
+ac690000|	rdtime.d $t0, $t1
+ac650000|	rdtimeh.w $t0, $t1
+ac610000|	rdtimel.w $t0, $t1
+ac310000|	revb.2h $t0, $t1
+ac390000|	revb.2w $t0, $t1
+ac350000|	revb.4h $t0, $t1
+ac3d0000|	revb.d $t0, $t1
+ac410000|	revh.2w $t0, $t1
+ac450000|	revh.d $t0, $t1
+acb91b00|	rotr.d $t0, $t1, $t2
+ac391b00|	rotr.w $t0, $t1, $t2
+ac414d00|	rotri.d $t0, $t1, 0x10
+acc14c00|	rotri.w $t0, $t1, 0x10
+ac110023|	sc.d $t0, $t1, 16
+ac110021|	sc.w $t0, $t1, 16
+acb91800|	sll.d $t0, $t1, $t2
+ac391700|	sll.w $t0, $t1, $t2
+ac414100|	slli.d $t0, $t1, 0x10
+acc14000|	slli.w $t0, $t1, 0x10
+ac391200|	slt $t0, $t1, $t2
+ac410002|	slti $t0, $t1, 16
+acb91200|	sltu $t0, $t1, $t2
+ac414002|	sltui $t0, $t1, 16
+acb91900|	sra.d $t0, $t1, $t2
+ac391800|	sra.w $t0, $t1, $t2
+ac414900|	srai.d $t0, $t1, 0x10
+acc14800|	srai.w $t0, $t1, 0x10
+ac391900|	srl.d $t0, $t1, $t2
+acb91700|	srl.w $t0, $t1, $t2
+ac414500|	srli.d $t0, $t1, 0x10
+acc14400|	srli.w $t0, $t1, 0x10
+ac410029|	st.b $t0, $t1, 16
+ac41c029|	st.d $t0, $t1, 16
+ac414029|	st.h $t0, $t1, 16
+ac418029|	st.w $t0, $t1, 16
+ac397c38|	stgt.b $t0, $t1, $t2
+acb97d38|	stgt.d $t0, $t1, $t2
+acb97c38|	stgt.h $t0, $t1, $t2
+ac397d38|	stgt.w $t0, $t1, $t2
+ac397e38|	stle.b $t0, $t1, $t2
+acb97f38|	stle.d $t0, $t1, $t2
+acb97e38|	stle.h $t0, $t1, $t2
+ac397f38|	stle.w $t0, $t1, $t2
+ac110027|	stptr.d $t0, $t1, 16
+ac110025|	stptr.w $t0, $t1, 16
+ac391038|	stx.b $t0, $t1, $t2
+ac391c38|	stx.d $t0, $t1, $t2
+ac391438|	stx.h $t0, $t1, $t2
+ac391838|	stx.w $t0, $t1, $t2
+acb91100|	sub.d $t0, $t1, $t2
+ac391100|	sub.w $t0, $t1, $t2
+00002b00|	syscall 0x0
+00204806|	tlbclr
+00344806|	tlbfill
+00244806|	tlbflush
+002c4806|	tlbrd
+00284806|	tlbsrch
+00304806|	tlbwr
+acb91500|	xor $t0, $t1, $t2
+ac41c003|	xori $t0, $t1, 0x10
+cc355d38|	amadd.b $t0, $t1, $t2
+cc355f38|	amadd_db.b $t0, $t1, $t2
+ccb55f38|	amadd_db.h $t0, $t1, $t2
+ccb55d38|	amadd.h $t0, $t1, $t2
+cc355838|	amcas.b $t0, $t1, $t2
+ccb55938|	amcas.d $t0, $t1, $t2
+cc355a38|	amcas_db.b $t0, $t1, $t2
+ccb55b38|	amcas_db.d $t0, $t1, $t2
+ccb55a38|	amcas_db.h $t0, $t1, $t2
+cc355b38|	amcas_db.w $t0, $t1, $t2
+ccb55838|	amcas.h $t0, $t1, $t2
+cc355938|	amcas.w $t0, $t1, $t2
+cc355c38|	amswap.b $t0, $t1, $t2
+cc355e38|	amswap_db.b $t0, $t1, $t2
+ccb55e38|	amswap_db.h $t0, $t1, $t2
+ccb55c38|	amswap.h $t0, $t1, $t2
+28791401|	frecipe.d $ft0, $ft1
+28751401|	frecipe.s $ft0, $ft1
+28891401|	frsqrte.d $ft0, $ft1
+28851401|	frsqrte.s $ft0, $ft1
+ac895738|	llacq.d $t0, $t1
+ac815738|	llacq.w $t0, $t1
+ac8d5738|	screl.d $t0, $t1
+ac855738|	screl.w $t0, $t1
diff --git a/loong64/loong64asm/testdata/plan9cases.txt b/loong64/loong64asm/testdata/plan9cases.txt
new file mode 100644
index 00000000..53f5d450
--- /dev/null
+++ b/loong64/loong64asm/testdata/plan9cases.txt
@@ -0,0 +1,365 @@
+a6101000|	ADD R4, R5, R6
+a6901000|	ADDV R4, R5, R6
+a5101000|	ADD R4, R5
+a5901000|	ADDV R4, R5
+85fcbf02|	ADD $-1, R4, R5
+84fcbf02|	ADD $-1, R4
+85fcff02|	ADDV $-1, R4, R5
+84fcff02|	ADDV $-1, R4
+ac391000|	ADD R14, R13, R12
+acb91000|	ADDV R14, R13, R12
+ac41c002|	ADDV $16, R13, R12
+ac01e002|	ADDV $-2048, R13, R12
+acfdff02|	ADDV $-1, R13, R12
+ac05e002|	ADDV $-2047, R13, R12
+acf9ff02|	ADDV $-2, R13, R12
+ac418002|	ADD $16, R13, R12
+a6101100|	SUB R4, R5, R6
+a6901100|	SUBV R4, R5, R6
+a5101100|	SUB R4, R5
+a5901100|	SUBV R4, R5
+05101100|	NEGW R4, R5
+05901100|	NEGV R4, R5
+84781200|	SGT R30, R4
+85781200|	SGT R30, R4, R5
+84f81200|	SGTU R30, R4
+85f81200|	SGTU R30, R4, R5
+a6901400|	AND R4, R5, R6
+a5901400|	AND R4, R5
+85044003|	AND $1, R4, R5
+84044003|	AND $1, R4
+a5101c00|	MUL R4, R5
+a6101c00|	MUL R4, R5, R6
+a5901d00|	MULV R4, R5
+a6901d00|	MULV R4, R5, R6
+a5101e00|	MULHV R4, R5
+a6101e00|	MULHV R4, R5, R6
+a5901e00|	MULHVU R4, R5
+a6901e00|	MULHVU R4, R5, R6
+28290501|	MULD F10, F9, F8
+28a90401|	MULF F10, F9, F8
+a5102000|	DIV R4, R5
+a6102000|	DIV R4, R5, R6
+a5102100|	DIVU R4, R5
+a6102100|	DIVU R4, R5, R6
+a5102200|	DIVV R4, R5
+a6102200|	DIVV R4, R5, R6
+a5102300|	DIVVU R4, R5
+a6102300|	DIVVU R4, R5, R6
+28290701|	DIVD F10, F9, F8
+28a90601|	DIVF F10, F9, F8
+a5902000|	REM R4, R5
+a6902000|	REM R4, R5, R6
+a5902100|	REMU R4, R5
+a6902100|	REMU R4, R5, R6
+a5902200|	REMV R4, R5
+a6902200|	REMV R4, R5, R6
+a5902300|	REMVU R4, R5
+a6902300|	REMVU R4, R5, R6
+04020014|	LU12IW $16, R4
+24000014|	LU12IW $1, R4
+85001700|	MOVW R4, R5
+85001500|	MOVV R4, R5
+85fc4303|	MOVBU R4, R5
+1e020014|	LU12IW $16, R30
+85781000|	ADD R30, R4, R5
+de038003|	OR $0, R30
+ac391400|	NOR R14, R13, R12
+acb91500|	XOR R14, R13, R12
+ac41c003|	XOR $16, R13, R12
+85f81000|	ADDV R30, R4, R5
+8500cf00|	MOVHU R4, R5
+a5101700|	SLL R4, R5
+a6101700|	SLL R4, R5, R6
+a5901700|	SRL R4, R5
+a6901700|	SRL R4, R5, R6
+a5101800|	SRA R4, R5
+a6101800|	SRA R4, R5, R6
+a5101b00|	ROTR R4, R5
+a6101b00|	ROTR R4, R5, R6
+a5901800|	SLLV R4, R5
+a6901800|	SLLV R4, R5, R6
+a5901b00|	ROTRV R4, R5
+a6901b00|	ROTRV R4, R5, R6
+85904000|	SLL $4, R4, R5
+84904000|	SLL $4, R4
+85904400|	SRL $4, R4, R5
+84904400|	SRL $4, R4
+85904800|	SRA $4, R4, R5
+84904800|	SRA $4, R4
+85904c00|	ROTR $4, R4, R5
+84904c00|	ROTR $4, R4
+85104100|	SLLV $4, R4, R5
+84104100|	SLLV $4, R4
+85104d00|	ROTRV $4, R4, R5
+84104d00|	ROTRV $4, R4
+a6101300|	MASKEQZ R4, R5, R6
+a6901300|	MASKNEZ R4, R5, R6
+00050048|	BFPT 1(PC)
+00040048|	BFPF 1(PC)
+80040058|	BEQ R4, 1(PC)
+8d110058|	BEQ R12, R13, 4(PC)
+a0110040|	BEQ R13, 4(PC)
+9ff1ff43|	BEQ R12, -4(PC)
+8504005c|	BNE R4, R5, 1(PC)
+8004005c|	BNE R4, 1(PC)
+8004005c|	BNE R4, 1(PC)
+a0140058|	BEQ R5, 5(PC)
+a0110058|	BEQ R13, 4(PC)
+00ebff5f|	BNE R24, -6(PC)
+801d005c|	BNE R12, 7(PC)
+85040060|	BLT R4, R5, 1(PC)
+80fcff63|	BLTZ R4, -1(PC)
+05040060|	BGTZ R5, 1(PC)
+80040060|	BLTZ R4, 1(PC)
+47d5ff6b|	BLTU R10, R7, -11(PC)
+802c0068|	BLTU R4, 11(PC)
+85040064|	BGE R4, R5, 1(PC)
+80fcff67|	BGEZ R4, -1(PC)
+47d5ff6f|	BGEU R10, R7, -11(PC)
+802c006c|	BGEU R4, 11(PC)
+04d8ff67|	BLEZ R4, -10(PC)
+00040058|	JMP 1(PC)
+8000004c|	JMP (R4)
+00340050|	JMP 13(PC)
+00100050|	JMP 4(PC)
+00100054|	CALL 4(PC)
+8100004c|	CALL (R4)
+00140054|	CALL 5(PC)
+a4048029|	MOVW R4, 1(R5)
+a404c029|	MOVV R4, 1(R5)
+a4040029|	MOVB R4, 1(R5)
+ac410029|	MOVB R12, 16(R13)
+a4040021|	SC R4, 1(R5)
+a4040023|	SCV R4, 1(R5)
+a4040028|	MOVB 1(R5), R4
+a404002a|	MOVBU 1(R5), R4
+a4044028|	MOVH 1(R5), R4
+a404402a|	MOVHU 1(R5), R4
+a4048028|	MOVW 1(R5), R4
+a404802a|	MOVWU 1(R5), R4
+a404c028|	MOVV 1(R5), R4
+a4040020|	LL 1(R5), R4
+a4040022|	LLV 1(R5), R4
+a4fc3f20|	LL -1(R5), R4
+a4fc3f22|	LLV -1(R5), R4
+00002a00|	BREAK
+00007238|	DBAR
+00002b00|	SYSCALL
+00004003|	NOOP
+a5900001|	ADDF F4, F5
+a6900001|	ADDF F4, F5, F6
+28a50001|	ADDF F9, F9, F8
+28250101|	ADDD F9, F9, F8
+85041401|	ABSF F4, F5
+85081401|	ABSD F4, F5
+85141401|	NEGF F4, F5
+85181401|	NEGD F4, F5
+85441401|	SQRTF F4, F5
+85481401|	SQRTD F4, F5
+a090110c|	CMPGTF F4, F5, FCC0
+a090210c|	CMPGTD F4, F5, FCC0
+a090130c|	CMPGEF F4, F5, FCC0
+a090230c|	CMPGED F4, F5, FCC0
+a010220c|	CMPEQD F4, F5, FCC0
+2029120c|	CMPEQF F10, F9, FCC0
+85241901|	MOVFD F4, F5
+85181901|	MOVDF F4, F5
+85101d01|	FFINTFW F4, F5
+85201d01|	FFINTDW F4, F5
+85181d01|	FFINTFV F4, F5
+85281d01|	FFINTDV F4, F5
+85041b01|	FTINTWF F4, F5
+85241b01|	FTINTVF F4, F5
+85081b01|	FTINTWD F4, F5
+85281b01|	FTINTVD F4, F5
+85881a01|	FTINTRZWD F4, F5
+85841a01|	FTINTRZWF F4, F5
+85a81a01|	FTINTRZVD F4, F5
+85a41a01|	FTINTRZVF F4, F5
+85941401|	MOVF F4, F5
+85981401|	MOVD F4, F5
+85a41401|	MOVW R4, F5
+85b41401|	MOVW F4, R5
+85a81401|	MOVV R4, F5
+85b81401|	MOVV F4, R5
+04dc1401|	MOVV FCC0, R4
+80d81401|	MOVV R4, FCC0
+6440002b|	MOVF 16(R3), F4
+6440802b|	MOVD 16(R3), F4
+a404002b|	MOVF 1(R5), F4
+a404802b|	MOVD 1(R5), F4
+6460402b|	MOVF F4, 24(R3)
+6460c02b|	MOVD F4, 24(R3)
+a404402b|	MOVF F4, 1(R5)
+a404c02b|	MOVD F4, 1(R5)
+5e020014|	LU12IW $18, R30
+64a95214|	LU12IW $169291, R4
+84d08703|	OR $500, R4
+84428614|	LU12IW $274964, R4
+84848c03|	OR $801, R4
+24640816|	LU32ID $17185, R4
+84000003|	LU52ID $0, R4
+24a93615|	LU12IW $-412343, R4
+84849003|	OR $1057, R4
+8464c817|	LU32ID $-113884, R4
+848c0203|	LU52ID $163, R4
+80600000|	RDTIMELW R4, R0
+80640000|	RDTIMEHW R4, R0
+85680000|	RDTIMED R4, R5
+1e00001a|	PCALAU12I $0, R30
+0c02001c|	PCADDU12I $16, R12
+c4038029|	MOVW R4, 0(R30)
+ac395c38|	AMSWAPB R14, (R13), R12
+acb95c38|	AMSWAPH R14, (R13), R12
+ac396038|	AMSWAPW R14, (R13), R12
+acb96038|	AMSWAPV R14, (R13), R12
+ac395838|	AMCASB R14, (R13), R12
+acb95838|	AMCASH R14, (R13), R12
+ac395938|	AMCASW R14, (R13), R12
+acb95938|	AMCASV R14, (R13), R12
+ac396138|	AMADDW R14, (R13), R12
+acb96138|	AMADDV R14, (R13), R12
+ac396238|	AMANDW R14, (R13), R12
+acb96238|	AMANDV R14, (R13), R12
+ac396338|	AMORW R14, (R13), R12
+acb96338|	AMORV R14, (R13), R12
+ac396438|	AMXORW R14, (R13), R12
+acb96438|	AMXORV R14, (R13), R12
+ac396538|	AMMAXW R14, (R13), R12
+acb96538|	AMMAXV R14, (R13), R12
+ac396638|	AMMINW R14, (R13), R12
+acb96638|	AMMINV R14, (R13), R12
+ac396738|	AMMAXWU R14, (R13), R12
+acb96738|	AMMAXVU R14, (R13), R12
+ac396838|	AMMINWU R14, (R13), R12
+acb96838|	AMMINVU R14, (R13), R12
+ac395e38|	AMSWAPDBB R14, (R13), R12
+acb95e38|	AMSWAPDBH R14, (R13), R12
+ac396938|	AMSWAPDBW R14, (R13), R12
+acb96938|	AMSWAPDBV R14, (R13), R12
+ac395a38|	AMCASDBB R14, (R13), R12
+acb95a38|	AMCASDBH R14, (R13), R12
+ac395b38|	AMCASDBW R14, (R13), R12
+acb95b38|	AMCASDBV R14, (R13), R12
+ac396a38|	AMADDDBW R14, (R13), R12
+acb96a38|	AMADDDBV R14, (R13), R12
+ac396b38|	AMANDDBW R14, (R13), R12
+acb96b38|	AMANDDBV R14, (R13), R12
+ac396c38|	AMORDBW R14, (R13), R12
+acb96c38|	AMORDBV R14, (R13), R12
+ac396d38|	AMXORDBW R14, (R13), R12
+acb96d38|	AMXORDBV R14, (R13), R12
+ac396e38|	AMMAXDBW R14, (R13), R12
+acb96e38|	AMMAXDBV R14, (R13), R12
+ac396f38|	AMMINDBW R14, (R13), R12
+acb96f38|	AMMINDBV R14, (R13), R12
+ac397038|	AMMAXDBWU R14, (R13), R12
+acb97038|	AMMAXDBVU R14, (R13), R12
+ac397138|	AMMINDBWU R14, (R13), R12
+acb97138|	AMMINDBVU R14, (R13), R12
+856c0000|	CPUCFG R4, R5
+85481e01|	FRINTD F4, F5
+83c01401|	MOVV R4, FCSR3
+64c81401|	MOVV FCSR3, R4
+80d01401|	MOVV F4, FCC0
+04d41401|	MOVV FCC0, F4
+a6901201|	FCOPYSGF F4, F5, F6
+a6101301|	FCOPYSGD F4, F5, F6
+85500000|	BITREVW R4, R5
+85540000|	BITREVV R4, R5
+a6102400|	CRCWBW R4, R5, R6
+a6902400|	CRCWHW R4, R5, R6
+a6102500|	CRCWWW R4, R5, R6
+a6902500|	CRCWVW R4, R5, R6
+a6102600|	CRCCWBW R4, R5, R6
+a6902600|	CRCCWHW R4, R5, R6
+a6102700|	CRCCWWW R4, R5, R6
+a6902700|	CRCCWVW R4, R5, R6
+cc350038|	MOVB (R14)(R13), R12
+cc352038|	MOVBU (R14)(R13), R12
+cc350438|	MOVH (R14)(R13), R12
+cc352438|	MOVHU (R14)(R13), R12
+cc350838|	MOVW (R14)(R13), R12
+cc352838|	MOVWU (R14)(R13), R12
+cc350c38|	MOVV (R14)(R13), R12
+cc351038|	MOVB R12, (R14)(R13)
+cc351438|	MOVH R12, (R14)(R13)
+cc351838|	MOVW R12, (R14)(R13)
+cc351c38|	MOVV R12, (R14)(R13)
+c2353038|	MOVF (R14)(R13), F2
+c2353438|	MOVD (R14)(R13), F2
+c2353838|	MOVF F2, (R14)(R13)
+c2353c38|	MOVD F2, (R14)(R13)
+a010120c|	CMPEQF F4, F5, FCC0
+a190110c|	CMPGTF F4, F5, FCC1
+a290210c|	CMPGTD F4, F5, FCC2
+a390130c|	CMPGEF F4, F5, FCC3
+a490230c|	CMPGED F4, F5, FCC4
+a510220c|	CMPEQD F4, F5, FCC5
+85100000|	CLOW R4, R5
+85200000|	CLOV R4, R5
+85140000|	CLZW R4, R5
+85240000|	CLZV R4, R5
+85180000|	CTOW R4, R5
+85280000|	CTOV R4, R5
+851c0000|	CTZW R4, R5
+852c0000|	CTZV R4, R5
+853c0000|	REVBV R4, R5
+85380000|	REVB2W R4, R5
+85340000|	REVB4H R4, R5
+85300000|	REVB2H R4, R5
+a6900a01|	FMINF F4, F5, F6
+a5900a01|	FMINF F4, F5
+a6100b01|	FMIND F4, F5, F6
+a5100b01|	FMIND F4, F5
+a6900801|	FMAXF F4, F5, F6
+a5900801|	FMAXF F4, F5
+a6100901|	FMAXD F4, F5, F6
+a5100901|	FMAXD F4, F5
+85341401|	FCLASSF F4, F5
+85381401|	FCLASSD F4, F5
+02041a01|	FTINTRMWF F0, F2
+02081a01|	FTINTRMWD F0, F2
+02241a01|	FTINTRMVF F0, F2
+02281a01|	FTINTRMVD F0, F2
+02441a01|	FTINTRPWF F0, F2
+02481a01|	FTINTRPWD F0, F2
+02641a01|	FTINTRPVF F0, F2
+02681a01|	FTINTRPVD F0, F2
+02841a01|	FTINTRZWF F0, F2
+02881a01|	FTINTRZWD F0, F2
+02a41a01|	FTINTRZVF F0, F2
+02a81a01|	FTINTRZVD F0, F2
+02c41a01|	FTINTRNEWF F0, F2
+02c81a01|	FTINTRNEWD F0, F2
+02e41a01|	FTINTRNEVF F0, F2
+02e81a01|	FTINTRNEVD F0, F2
+01101d01|	FFINTFW F0, F1
+01181d01|	FFINTFV F0, F1
+01201d01|	FFINTDW F0, F1
+01281d01|	FFINTDV F0, F1
+01041b01|	FTINTWF F0, F1
+01081b01|	FTINTWD F0, F1
+01241b01|	FTINTVF F0, F1
+01281b01|	FTINTVD F0, F1
+c7901208|	FMADDF F5, F4, F6, F7
+c7902208|	FMADDD F5, F4, F6, F7
+c7905208|	FMSUBF F5, F4, F6, F7
+c7906208|	FMSUBD F5, F4, F6, F7
+c7909208|	FNMADDF F5, F4, F6, F7
+c790a208|	FNMADDD F5, F4, F6, F7
+c790d208|	FNMSUBF F5, F4, F6, F7
+c790e208|	FNMSUBD F5, F4, F6, F7
+85806000|	BSTRPICKW $0, R4, $0, R5
+85807f00|	BSTRPICKW $31, R4, $0, R5
+85986f00|	BSTRPICKW $15, R4, $6, R5
+8500c000|	BSTRPICKV $0, R4, $0, R5
+8500ff00|	BSTRPICKV $63, R4, $0, R5
+8518cf00|	BSTRPICKV $15, R4, $6, R5
+85006000|	BSTRINSW $0, R4, $0, R5
+85007f00|	BSTRINSW $31, R4, $0, R5
+85186f00|	BSTRINSW $15, R4, $6, R5
+85008000|	BSTRINSV $0, R4, $0, R5
+8500bf00|	BSTRINSV $63, R4, $0, R5
+85188f00|	BSTRINSV $15, R4, $6, R5
diff --git a/loong64/loong64spec/spec.go b/loong64/loong64spec/spec.go
new file mode 100644
index 00000000..4c32961c
--- /dev/null
+++ b/loong64/loong64spec/spec.go
@@ -0,0 +1,528 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// loong64spec reads the "LoongArch-Vol1-EN.pdf" [1] to collect instruction
+// encoding details and output to tables.go.
+//
+// usage: go run spec.go LoongArch-Vol1-EN.pdf
+//
+// [1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"log"
+	"math"
+	"os"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+
+	"rsc.io/pdf"
+)
+
+func mergeMap(m1 map[string]string, m2 map[string]string) {
+	for k := range m2 {
+		m1[k] = m2[k]
+	}
+}
+
+func main() {
+	log.SetFlags(0)
+	log.SetPrefix("loong64spec: ")
+
+	if len(os.Args) != 2 {
+		fmt.Fprintf(os.Stderr, "usage: loong64spec LoongArch-Vol1-EN.pdf\n")
+		os.Exit(2)
+	}
+	f, err := pdf.Open(os.Args[1])
+	if err != nil {
+		log.Fatal(err)
+	}
+	var prologue bytes.Buffer
+	prologue.Write([]byte("// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n"))
+
+	var op_f bytes.Buffer
+	op_f.Write([]byte("const (\n\t_ Op = iota\n"))
+
+	var opstr_f bytes.Buffer
+	opstr_f.Write([]byte("var opstr = [...]string{\n"))
+
+	var instFormats_f bytes.Buffer
+	instFormats_f.Write([]byte("var instFormats = [...]instFormat{\n"))
+
+	// Scan document looking for instructions.
+	n := f.NumPage()
+	var ops []string
+	opstrs := map[string]string{}
+	instFormatComments := map[string]string{}
+	instFormats := map[string]string{}
+	var fp int
+	for pageNum := 1; pageNum <= n; pageNum++ {
+		p := f.Page(pageNum)
+		if fp == 0 {
+			if !isFirstPage(p) {
+				continue
+			}
+			fp = pageNum
+		}
+		cPageOps, cPageOpstrs, cPageInstFormatComments, cPageInstFormats := parsePage(pageNum, p, fp == pageNum)
+		ops = append(ops, cPageOps...)
+		mergeMap(opstrs, cPageOpstrs)
+		mergeMap(instFormatComments, cPageInstFormatComments)
+		mergeMap(instFormats, cPageInstFormats)
+	}
+
+	sort.Strings(ops)
+
+	for _, op := range ops {
+		// 1. write op
+		op_f.Write([]byte(fmt.Sprintf("\t%s\n", op)))
+		// 2. write opstr
+		opstr_f.Write([]byte(fmt.Sprintf("\t%s\n", opstrs[op])))
+		// 3. write instFormat
+		instFormats_f.Write([]byte(fmt.Sprintf("\t%s\n\t%s\n", instFormatComments[op], instFormats[op])))
+	}
+
+	op_f.Write([]byte(")\n\n"))
+	opstr_f.Write([]byte("}\n\n"))
+	instFormats_f.Write([]byte("}\n"))
+
+	fileTables, err := os.Create("tables.go")
+	defer fileTables.Close()
+
+	fileTables.Write(prologue.Bytes())
+	fileTables.Write(op_f.Bytes())
+	fileTables.Write(opstr_f.Bytes())
+	fileTables.Write(instFormats_f.Bytes())
+
+	fileTables.Close()
+}
+
+func isFirstPage(page pdf.Page) bool {
+	content := page.Content()
+	appendixb := "AppendixB"
+	ct := ""
+	for _, t := range content.Text {
+		ct += t.S
+		if ct == "AppendixB" {
+			return true
+		}
+		if strings.HasPrefix(appendixb, ct) {
+			continue
+		} else {
+			return false
+		}
+	}
+	return false
+}
+
+func getArg(name string) (length int, argName string) {
+	switch {
+	case strings.Contains("arg_fd", name):
+		return 5, "arg_fd"
+	case strings.Contains("arg_fj", name):
+		return 5, "arg_fj"
+	case strings.Contains("arg_fk", name):
+		return 5, "arg_fk"
+	case strings.Contains("arg_fa", name):
+		return 5, "arg_fa"
+	case strings.Contains("arg_rd", name):
+		return 5, "arg_rd"
+	case strings.Contains("arg_rj", name) || name == "rj!=0,1":
+		return 5, "arg_rj"
+	case strings.Contains("arg_rk", name):
+		return 5, "arg_rk"
+	case name == "csr":
+		return 14, "arg_csr_23_10"
+	case strings.Contains("arg_cd", name):
+		return 5, "arg_cd"
+	case strings.Contains("arg_cj", name):
+		return 5, "arg_cj"
+	case strings.Contains("arg_ca", name):
+		return 5, "arg_ca"
+	case strings.Contains(name, "sa"):
+		length, _ := strconv.Atoi(strings.Split(name, "sa")[1])
+		if length == 2 {
+			argName = "arg_sa2_16_15"
+		} else {
+			argName = "arg_sa3_17_15"
+		}
+		return length, argName
+	case strings.Contains("arg_seq_17_10", name):
+		return 8, "arg_seq_17_10"
+	case strings.Contains("arg_op_4_0", name):
+		return 5, "arg_op_4_0"
+	case strings.Contains(name, "ui"):
+		length, _ := strconv.Atoi(strings.Split(name, "ui")[1])
+		if length == 5 {
+			argName = "arg_ui5_14_10"
+		} else if length == 6 {
+			argName = "arg_ui6_15_10"
+		} else {
+			argName = "arg_ui12_21_10"
+		}
+		return length, argName
+	case strings.Contains("arg_lsbw", name):
+		return 5, "arg_lsbw"
+	case strings.Contains("arg_msbw", name):
+		return 5, "arg_msbw"
+	case strings.Contains("arg_lsbd", name):
+		return 6, "arg_lsbd"
+	case strings.Contains("arg_msbd", name):
+		return 6, "arg_msbd"
+	case strings.Contains(name, "si"):
+		length, _ := strconv.Atoi(strings.Split(name, "si")[1])
+		if length == 12 {
+			argName = "arg_si12_21_10"
+		} else if length == 14 {
+			argName = "arg_si14_23_10"
+		} else if length == 16 {
+			argName = "arg_si16_25_10"
+		} else {
+			argName = "arg_si20_24_5"
+		}
+		return length, argName
+	case strings.Contains(name, "offs"):
+		splitName := strings.Split(name, ":")
+		left, _ := strconv.Atoi(strings.Split(splitName[0], "[")[1])
+		right, _ := strconv.Atoi(strings.Split(splitName[1], "]")[0])
+		return left - right + 1, "offs"
+	default:
+		return 0, ""
+	}
+}
+
+func binstrToHex(str string) string {
+	rst := 0
+	hex := "0x"
+	charArray := []byte(str)
+	for i := 0; i < 32; {
+		rst = 1*(int(charArray[i+3])-48) + 2*(int(charArray[i+2])-48) + 4*(int(charArray[i+1])-48) + 8*(int(charArray[i])-48)
+		switch rst {
+		case 10:
+			hex = hex + "a"
+		case 11:
+			hex = hex + "b"
+		case 12:
+			hex = hex + "c"
+		case 13:
+			hex = hex + "d"
+		case 14:
+			hex = hex + "e"
+		case 15:
+			hex = hex + "f"
+		default:
+			hex += strconv.Itoa(rst)
+		}
+
+		i = i + 4
+	}
+	return hex
+}
+
+/*
+Here we deal with the instruction FCMP.cond.S/D, which has the following format:
+
+	| 31 - 20 | 19 - 15 | 14 - 10 | 9 - 5 | 4 | 3 | 2 - 0 |
+	|---------|---------|---------|-------|---|---|-------|
+	|   op    |  cond   |    fk   |   fj  | 0 | 0 |  cd   |
+
+The `cond` field has these possible values:
+
+	"CAF": "00",
+	"CUN": "08",
+	"CEQ": "04",
+	"CUEQ": "0c",
+	"CLT": "02",
+	"CULT": "0a",
+	"CLE": "06",
+	"CULE": "0e",
+	"CNE": "10",
+	"COR": "14",
+	"CUNE": "18",
+	"SAF": "01",
+	"SUN": "09",
+	"SEQ": "05",
+	"SUEQ": "0d",
+	"SLT": "03",
+	"SULT": "0b",
+	"SLE": "07",
+	"SULE": "0f",
+	"SNE": "11",
+	"SOR": "15",
+	"SUNE": "19",
+
+These values are the hexadecimal numbers of bits 19 to 15, the same as
+described in the instruction set manual.
+
+The following code defines a map, the values in it represent the hexadecimal
+encoding of the cond field in the entire instruction. In this case, the upper
+4 bits and the lowest 1 bit are encoded separately, so the encoding is
+different from the encoding described above.
+*/
+func dealWithFcmp(ds string) (fcmpConditions map[string]map[string]string) {
+	conds := map[string]string{
+		"CAF":  "00",
+		"CUN":  "40",
+		"CEQ":  "20",
+		"CUEQ": "60",
+		"CLT":  "10",
+		"CULT": "50",
+		"CLE":  "30",
+		"CULE": "70",
+		"CNE":  "80",
+		"COR":  "a0",
+		"CUNE": "c0",
+		"SAF":  "08",
+		"SUN":  "48",
+		"SEQ":  "28",
+		"SUEQ": "68",
+		"SLT":  "18",
+		"SULT": "58",
+		"SLE":  "38",
+		"SULE": "78",
+		"SNE":  "88",
+		"SOR":  "a8",
+		"SUNE": "c8",
+	}
+	fcmpConditions = make(map[string]map[string]string)
+	for k, v := range conds {
+		op := fmt.Sprintf("FCMP_%s_%s", k, ds)
+		opstr := fmt.Sprintf("FCMP_%s_%s:\t\"FCMP.%s.%s\",", k, ds, k, ds)
+		instFormatComment := fmt.Sprintf("// FCMP.%s.%s cd, fj, fk", k, ds)
+		var instFormat string
+		if ds == "D" {
+			instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c2%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
+		} else {
+			instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c1%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
+		}
+
+		fcmpConditions[op] = make(map[string]string)
+		fcmpConditions[op]["op"] = op
+		fcmpConditions[op]["opstr"] = opstr
+		fcmpConditions[op]["instFormatComment"] = instFormatComment
+		fcmpConditions[op]["instFormat"] = instFormat
+	}
+	return
+}
+
+func findWords(chars []pdf.Text) (words []pdf.Text) {
+	for i := 0; i < len(chars); {
+		xRange := []float64{chars[i].X, chars[i].X}
+		j := i + 1
+
+		// Find all chars on one line.
+		for j < len(chars) && chars[j].Y == chars[i].Y {
+			xRange[1] = chars[j].X
+			j++
+		}
+
+		// we need to note that the word may change line(Y) but belong to one cell. So, after loop over all continued
+		// chars whose Y are same, check if the next char's X belong to the range of xRange, if true, means it should
+		// be contact to current word, because the next word's X should bigger than current one.
+		for j < len(chars) && chars[j].X >= xRange[0] && chars[j].X <= xRange[1] {
+			j++
+		}
+
+		var end float64
+		// Split line into words (really, phrases).
+		for k := i; k < j; {
+			ck := &chars[k]
+			s := ck.S
+			end = ck.X + ck.W
+			charSpace := ck.FontSize / 6
+			wordSpace := ck.FontSize * 2 / 3
+			l := k + 1
+			for l < j {
+				// Grow word.
+				cl := &chars[l]
+
+				if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
+					s += cl.S
+					end = cl.X + cl.W
+					l++
+					continue
+				}
+				// Add space to phrase before next word.
+				if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
+					s += " " + cl.S
+					end = cl.X + cl.W
+					l++
+					continue
+				}
+				break
+			}
+			f := ck.Font
+			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
+			k = l
+		}
+		i = j
+	}
+
+	return words
+}
+
+func parsePage(num int, p pdf.Page, isFP bool) (ops []string, opstrs map[string]string, instFormatComments map[string]string, instFormats map[string]string) {
+	opstrs = make(map[string]string)
+	instFormatComments = make(map[string]string)
+	instFormats = make(map[string]string)
+
+	content := p.Content()
+
+	var text []pdf.Text
+	for _, t := range content.Text {
+		text = append(text, t)
+	}
+
+	// table name(70), table header(64), page num(3)
+	if isFP {
+		text = text[134 : len(text)-3]
+	} else {
+		text = text[64 : len(text)-3]
+	}
+
+	text = findWords(text)
+
+	for i := 0; i < len(text); {
+		var fcmpConditions map[string]map[string]string
+		if strings.HasPrefix(text[i].S, "FCMP") {
+			fcmpConditions = dealWithFcmp(strings.Split(text[i].S, ".")[2])
+
+			for fc, inst := range fcmpConditions {
+				ops = append(ops, inst["op"])
+				opstrs[fc] = inst["opstr"]
+				instFormatComments[fc] = inst["instFormatComment"]
+				instFormats[fc] = inst["instFormat"]
+			}
+			t := i + 1
+			for ; text[t].Y == text[i].Y; t++ {
+				continue
+			}
+			i = t
+			continue
+		}
+
+		op := strings.Replace(text[i].S, ".", "_", -1)
+		opstr := fmt.Sprintf("%s:\t\"%s\",", op, text[i].S)
+		instFormatComment := ""
+		binValue := ""
+		binMask := ""
+		instArgs := ""
+		offs := false
+		var offArgs []string
+
+		j := i + 1
+		for ; j < len(text) && text[j].Y == text[i].Y; j++ {
+
+			// Some instruction has no arguments, so the next word(text[j].S) is not the arguments string but 0/1 bit, it shouldn't be skipped.
+			if res, _ := regexp.MatchString("^\\d+$", text[j].S); j == i+1 && res == false {
+				instFormatComment = fmt.Sprintf("// %s %s", text[i].S, strings.Replace(text[j].S, ",", ", ", -1))
+				continue
+			}
+			if text[j].S == "0" || text[j].S == "1" {
+				binValue += text[j].S
+				binMask += "1"
+			} else {
+				argLen, argName := getArg(text[j].S)
+
+				// Get argument's length failed, compute it by other arguments.
+				if argLen == 0 {
+					left := 31 - len(binValue)
+					right := 0
+					l := j + 1
+					if l < len(text) && text[l].Y == text[j].Y {
+						for ; text[l].Y == text[j].Y; l++ {
+							if text[l].S == "0" || text[l].S == "1" {
+								right += 1
+							} else {
+								tArgLen, _ := getArg(text[l].S)
+								if tArgLen == 0 {
+									fmt.Fprintf(os.Stderr, "there are more than two args whose length is unknown.\n")
+								}
+								right += tArgLen
+							}
+						}
+					}
+					argLen = left - right + 1
+					argName = "arg_" + text[j].S + "_" + strconv.FormatInt(int64(left), 10) + "_" + strconv.FormatInt(int64(right), 10)
+				}
+
+				for k := 0; k < argLen; k++ {
+					binValue += "0"
+					binMask += "0"
+				}
+
+				if argName != "offs" {
+					if instArgs != "" {
+						instArgs = ", " + instArgs
+					}
+					instArgs = argName + instArgs
+				} else {
+					offs = true
+					offArgs = append(offArgs, text[j].S)
+				}
+			}
+		}
+
+		// The real offset is a combination of two offsets in the binary code of the instruction, for example: BEQZ
+		if offs && offArgs != nil {
+			var left int
+			var right int
+			if len(offArgs) == 1 {
+				left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[0], "[")[1])
+				right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
+			} else if len(offArgs) == 2 {
+				left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[1], ":")[0], "[")[1])
+				right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
+			}
+
+			if instArgs == "" {
+				instArgs = fmt.Sprintf("arg_offset_%d_%d", left, right)
+			} else {
+				instArgs += fmt.Sprintf(", arg_offset_%d_%d", left, right)
+			}
+		}
+
+		ops = append(ops, op)
+		opstrs[op] = opstr
+		if instFormatComment == "" {
+			instFormatComment = "// " + text[i].S
+		} else if strings.HasPrefix(op, "AM") {
+			instFormatComment = fmt.Sprintf("// %s rd, rk, rj", text[i].S)
+		}
+		instFormatComments[op] = instFormatComment
+		// The parameter order of some instructions is inconsistent in encoding and syntax, such as BSTRINS.*
+		if instArgs != "" {
+			args := strings.Split(instFormatComment, " ")[2:]
+			tInstArgs := strings.Split(instArgs, ", ")
+			newOrderedInstArgs := []string{}
+			for _, a := range args {
+				a = strings.Split(a, ",")[0]
+				for _, aa := range tInstArgs {
+					if strings.Contains(aa, a) {
+						newOrderedInstArgs = append(newOrderedInstArgs, aa)
+						break
+					} else if a == "rd" && aa == "arg_fd" {
+						newOrderedInstArgs = append(newOrderedInstArgs, "arg_rd")
+						break
+					}
+				}
+			}
+			instArgs = strings.Join(newOrderedInstArgs, ", ")
+		}
+		if strings.HasPrefix(op, "AM") {
+			instArgs = "arg_rd, arg_rk, arg_rj"
+		}
+		instFormat := fmt.Sprintf("{mask: %s, value: %s, op: %s, args: instArgs{%s}},", binstrToHex(binMask), binstrToHex(binValue), op, instArgs)
+		instFormats[op] = instFormat
+
+		i = j // next instruction
+	}
+
+	return
+}

From b3635f5142e10850f935740529702e766bb68fd6 Mon Sep 17 00:00:00 2001
From: limeidan <limeidan@loongson.cn>
Date: Thu, 29 Aug 2024 10:33:42 +0800
Subject: [PATCH 028/200] loong64: fix the expression of code generate line

Change-Id: Id4615a28320c2acdc41fc1fc21a19943fec3b23f
Reviewed-on: https://go-review.googlesource.com/c/arch/+/609475
Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 loong64/loong64asm/tables.go | 2 +-
 loong64/loong64spec/spec.go  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/loong64/loong64asm/tables.go b/loong64/loong64asm/tables.go
index c85d47c2..ad34195b 100644
--- a/loong64/loong64asm/tables.go
+++ b/loong64/loong64asm/tables.go
@@ -1,4 +1,4 @@
-// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.
+// Code generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.
 
 // Copyright 2024 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
diff --git a/loong64/loong64spec/spec.go b/loong64/loong64spec/spec.go
index 4c32961c..3e69a24e 100644
--- a/loong64/loong64spec/spec.go
+++ b/loong64/loong64spec/spec.go
@@ -44,7 +44,7 @@ func main() {
 		log.Fatal(err)
 	}
 	var prologue bytes.Buffer
-	prologue.Write([]byte("// Generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n"))
+	prologue.Write([]byte("// Code generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n"))
 
 	var op_f bytes.Buffer
 	op_f.Write([]byte("const (\n\t_ Op = iota\n"))

From 8644b45fb7514c947c3fdacee68c67801accee22 Mon Sep 17 00:00:00 2001
From: limeidan <limeidan@loongson.cn>
Date: Thu, 29 Aug 2024 10:48:03 +0800
Subject: [PATCH 029/200] loong64: add the mapping of platform instruction JIRL
 to plan9 instruction RET

Change-Id: Ifec777b10bc9a5a8e5e9b4fd6bd2077205ad4151
Reviewed-on: https://go-review.googlesource.com/c/arch/+/609495
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
---
 loong64/loong64asm/inst.go                 | 4 +++-
 loong64/loong64asm/plan9x.go               | 7 ++++++-
 loong64/loong64asm/testdata/gnucases.txt   | 2 ++
 loong64/loong64asm/testdata/plan9cases.txt | 2 ++
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/loong64/loong64asm/inst.go b/loong64/loong64asm/inst.go
index 1ac5c797..362d73ba 100644
--- a/loong64/loong64asm/inst.go
+++ b/loong64/loong64asm/inst.go
@@ -40,7 +40,9 @@ func (i Inst) String() string {
 		}
 
 	case JIRL:
-		if i.Args[0].(Reg) == R0 && i.Args[2].(OffsetSimm).Imm == 0 {
+		if i.Args[0].(Reg) == R0 && i.Args[1].(Reg) == R1 && i.Args[2].(OffsetSimm).Imm == 0 {
+			return "ret"
+		} else if i.Args[0].(Reg) == R0 && i.Args[2].(OffsetSimm).Imm == 0 {
 			return "jr " + args[1]
 		}
 
diff --git a/loong64/loong64asm/plan9x.go b/loong64/loong64asm/plan9x.go
index 5db32903..c18e217c 100644
--- a/loong64/loong64asm/plan9x.go
+++ b/loong64/loong64asm/plan9x.go
@@ -96,8 +96,13 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		rd := inst.Args[0].(Reg)
 		rj := inst.Args[1].(Reg)
 		regno := uint16(rj) & 31
-		if rd == R0 {
+		off := inst.Args[2].(OffsetSimm).Imm
+		if rd == R0 && rj == R1 && off == 0 {
+			return fmt.Sprintf("RET")
+		} else if rd == R0 && off == 0 {
 			return fmt.Sprintf("JMP (R%d)", regno)
+		} else if rd == R0 {
+			return fmt.Sprintf("JMP %d(R%d)", off, regno)
 		}
 		return fmt.Sprintf("CALL (R%d)", regno)
 
diff --git a/loong64/loong64asm/testdata/gnucases.txt b/loong64/loong64asm/testdata/gnucases.txt
index 2cfd5ea1..90ad9b7e 100644
--- a/loong64/loong64asm/testdata/gnucases.txt
+++ b/loong64/loong64asm/testdata/gnucases.txt
@@ -253,6 +253,8 @@ ac154806|	iocsrwr.h $t0, $t1
 ac194806|	iocsrwr.w $t0, $t1
 ac1d4806|	iocsrwr.d $t0, $t1
 82b54906|	invtlb 0x2, $t0, $t1
+2000004c|	ret
+2008004c|	jirl $zero, $ra, 8
 ac11004c|	jirl $t0, $t1, 16
 ac410028|	ld.b $t0, $t1, 16
 ac41002a|	ld.bu $t0, $t1, 16
diff --git a/loong64/loong64asm/testdata/plan9cases.txt b/loong64/loong64asm/testdata/plan9cases.txt
index 53f5d450..c6a5bd57 100644
--- a/loong64/loong64asm/testdata/plan9cases.txt
+++ b/loong64/loong64asm/testdata/plan9cases.txt
@@ -119,6 +119,8 @@ a0110058|	BEQ R13, 4(PC)
 47d5ff6f|	BGEU R10, R7, -11(PC)
 802c006c|	BGEU R4, 11(PC)
 04d8ff67|	BLEZ R4, -10(PC)
+2000004c|	RET
+2008004c|	JMP 8(R1)
 00040058|	JMP 1(PC)
 8000004c|	JMP (R4)
 00340050|	JMP 13(PC)

From 292026d483a88b3bc4d859f7a2d5df3794cf6da5 Mon Sep 17 00:00:00 2001
From: limeidan <limeidan@loongson.cn>
Date: Thu, 5 Sep 2024 20:24:51 +0800
Subject: [PATCH 030/200] loong64: fix self-assignment error

Change-Id: Icdc21032f37ebd56f5bbbbe058637efbc7cef1f0
Reviewed-on: https://go-review.googlesource.com/c/arch/+/611135
Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 loong64/loong64asm/plan9x.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loong64/loong64asm/plan9x.go b/loong64/loong64asm/plan9x.go
index c18e217c..4e3c4f1e 100644
--- a/loong64/loong64asm/plan9x.go
+++ b/loong64/loong64asm/plan9x.go
@@ -47,7 +47,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 			op = "MOVHU"
 			args = append(args[1:2], args[0:1]...)
 		} else {
-			args[0], args[1], args[2], args[3] = args[2], args[1], args[3], args[0]
+			args[0], args[2], args[3] = args[2], args[3], args[0]
 		}
 
 	case BCNEZ, BCEQZ:

From 7874f23b9c060aa1d0aaa13b7352d59335c30184 Mon Sep 17 00:00:00 2001
From: Lin Runze <lrzlin9@gmail.com>
Date: Sun, 4 Aug 2024 19:19:12 +0800
Subject: [PATCH 031/200] riscv64: implement RV64GC_zba_zbb_zbs GNU/Plan9
 format disassembler

Support decoding RV64GC_zba_zbb_zbs instructions as GNU & Plan9 format,
relies on riscv64spec/spec.go to generate instruction tables

Change-Id: I3b2793a7dd9faa3ac18d85361a8627eba0923068
Reviewed-on: https://go-review.googlesource.com/c/arch/+/602915
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
---
 riscv64/riscv64asm/arg.go        |  116 +++
 riscv64/riscv64asm/csr_string.go |  577 ++++++++++++
 riscv64/riscv64asm/decode.go     |  550 +++++++++++
 riscv64/riscv64asm/gnu.go        |  328 +++++++
 riscv64/riscv64asm/inst.go       |  495 ++++++++++
 riscv64/riscv64asm/plan9x.go     |  377 ++++++++
 riscv64/riscv64asm/tables.go     | 1474 ++++++++++++++++++++++++++++++
 riscv64/riscv64spec/spec.go      |   16 +-
 8 files changed, 3925 insertions(+), 8 deletions(-)
 create mode 100644 riscv64/riscv64asm/arg.go
 create mode 100644 riscv64/riscv64asm/csr_string.go
 create mode 100644 riscv64/riscv64asm/decode.go
 create mode 100644 riscv64/riscv64asm/gnu.go
 create mode 100644 riscv64/riscv64asm/inst.go
 create mode 100644 riscv64/riscv64asm/plan9x.go
 create mode 100644 riscv64/riscv64asm/tables.go

diff --git a/riscv64/riscv64asm/arg.go b/riscv64/riscv64asm/arg.go
new file mode 100644
index 00000000..7898c273
--- /dev/null
+++ b/riscv64/riscv64asm/arg.go
@@ -0,0 +1,116 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv64asm
+
+// Naming for Go decoder arguments:
+//
+// - arg_rd: a general purpose register rd encoded in rd[11:7] field
+//
+// - arg_rs1: a general purpose register rs1 encoded in rs1[19:15] field
+//
+// - arg_rs2: a general purpose register rs2 encoded in rs2[24:20] field
+//
+// - arg_rs3: a general purpose register rs3 encoded in rs3[31:27] field
+//
+// - arg_fd: a floating point register rd encoded in rd[11:7] field
+//
+// - arg_fs1: a floating point register rs1 encoded in rs1[19:15] field
+//
+// - arg_fs2: a floating point register rs2 encoded in rs2[24:20] field
+//
+// - arg_fs3: a floating point register rs3 encoded in rs3[31:27] field
+//
+// - arg_csr: a control status register encoded in csr[31:20] field
+//
+// - arg_rs1_mem: source register with offset in load commands
+//
+// - arg_rs1_store: source register with offset in store commands
+//
+// - arg_rs1_amo: source register with offset in atomic commands
+//
+// - arg_pred: predecessor memory ordering information encoded in pred[27:24] field
+//             For details, please refer to chapter 2.7 of ISA manual volume 1
+//
+// - arg_succ: successor memory ordering information encoded in succ[23:20] field
+//             For details, please refer to chapter 2.7 of ISA manual volume 1
+//
+// - arg_zimm: a unsigned immediate encoded in zimm[19:15] field
+//
+// - arg_imm12: an I-type immediate encoded in imm12[31:20] field
+//
+// - arg_simm12: a S-type immediate encoded in simm12[31:25|11:7] field
+//
+// - arg_bimm12: a B-type immediate encoded in bimm12[31:25|11:7] field
+//
+// - arg_imm20: an U-type immediate encoded in imm20[31:12] field
+//
+// - arg_jimm20: a J-type immediate encoded in jimm20[31:12] field
+//
+// - arg_shamt5: a shift amount encoded in shamt5[24:20] field
+//
+// - arg_shamt6: a shift amount encoded in shamt6[25:20] field
+//
+
+type argType uint16
+
+const (
+	_ argType = iota
+	arg_rd
+	arg_rs1
+	arg_rs2
+	arg_rs3
+	arg_fd
+	arg_fs1
+	arg_fs2
+	arg_fs3
+	arg_csr
+
+	arg_rs1_amo
+	arg_rs1_mem
+	arg_rs1_store
+
+	arg_pred
+	arg_succ
+
+	arg_zimm
+	arg_imm12
+	arg_simm12
+	arg_bimm12
+	arg_imm20
+	arg_jimm20
+	arg_shamt5
+	arg_shamt6
+
+	// RISC-V Compressed Extension Args
+	arg_rd_p
+	arg_fd_p
+	arg_rs1_p
+	arg_rd_rs1_p
+	arg_fs2_p
+	arg_rs2_p
+	arg_rd_n0
+	arg_rs1_n0
+	arg_rd_rs1_n0
+	arg_c_rs1_n0
+	arg_c_rs2_n0
+	arg_c_fs2
+	arg_c_rs2
+	arg_rd_n2
+
+	arg_c_imm6
+	arg_c_nzimm6
+	arg_c_nzuimm6
+	arg_c_uimm7
+	arg_c_uimm8
+	arg_c_uimm8sp_s
+	arg_c_uimm8sp
+	arg_c_uimm9sp_s
+	arg_c_uimm9sp
+	arg_c_bimm9
+	arg_c_nzimm10
+	arg_c_nzuimm10
+	arg_c_imm12
+	arg_c_nzimm18
+)
diff --git a/riscv64/riscv64asm/csr_string.go b/riscv64/riscv64asm/csr_string.go
new file mode 100644
index 00000000..addf91aa
--- /dev/null
+++ b/riscv64/riscv64asm/csr_string.go
@@ -0,0 +1,577 @@
+// Code generated by "stringer -type=CSR"; DO NOT EDIT.
+
+package riscv64asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[USTATUS-0]
+	_ = x[FFLAGS-1]
+	_ = x[FRM-2]
+	_ = x[FCSR-3]
+	_ = x[UIE-4]
+	_ = x[UTVEC-5]
+	_ = x[UTVT-7]
+	_ = x[VSTART-8]
+	_ = x[VXSAT-9]
+	_ = x[VXRM-10]
+	_ = x[VCSR-15]
+	_ = x[USCRATCH-64]
+	_ = x[UEPC-65]
+	_ = x[UCAUSE-66]
+	_ = x[UTVAL-67]
+	_ = x[UIP-68]
+	_ = x[UNXTI-69]
+	_ = x[UINTSTATUS-70]
+	_ = x[USCRATCHCSW-72]
+	_ = x[USCRATCHCSWL-73]
+	_ = x[SSTATUS-256]
+	_ = x[SEDELEG-258]
+	_ = x[SIDELEG-259]
+	_ = x[SIE-260]
+	_ = x[STVEC-261]
+	_ = x[SCOUNTEREN-262]
+	_ = x[STVT-263]
+	_ = x[SSCRATCH-320]
+	_ = x[SEPC-321]
+	_ = x[SCAUSE-322]
+	_ = x[STVAL-323]
+	_ = x[SIP-324]
+	_ = x[SNXTI-325]
+	_ = x[SINTSTATUS-326]
+	_ = x[SSCRATCHCSW-328]
+	_ = x[SSCRATCHCSWL-329]
+	_ = x[SATP-384]
+	_ = x[VSSTATUS-512]
+	_ = x[VSIE-516]
+	_ = x[VSTVEC-517]
+	_ = x[VSSCRATCH-576]
+	_ = x[VSEPC-577]
+	_ = x[VSCAUSE-578]
+	_ = x[VSTVAL-579]
+	_ = x[VSIP-580]
+	_ = x[VSATP-640]
+	_ = x[MSTATUS-768]
+	_ = x[MISA-769]
+	_ = x[MEDELEG-770]
+	_ = x[MIDELEG-771]
+	_ = x[MIE-772]
+	_ = x[MTVEC-773]
+	_ = x[MCOUNTEREN-774]
+	_ = x[MTVT-775]
+	_ = x[MSTATUSH-784]
+	_ = x[MCOUNTINHIBIT-800]
+	_ = x[MHPMEVENT3-803]
+	_ = x[MHPMEVENT4-804]
+	_ = x[MHPMEVENT5-805]
+	_ = x[MHPMEVENT6-806]
+	_ = x[MHPMEVENT7-807]
+	_ = x[MHPMEVENT8-808]
+	_ = x[MHPMEVENT9-809]
+	_ = x[MHPMEVENT10-810]
+	_ = x[MHPMEVENT11-811]
+	_ = x[MHPMEVENT12-812]
+	_ = x[MHPMEVENT13-813]
+	_ = x[MHPMEVENT14-814]
+	_ = x[MHPMEVENT15-815]
+	_ = x[MHPMEVENT16-816]
+	_ = x[MHPMEVENT17-817]
+	_ = x[MHPMEVENT18-818]
+	_ = x[MHPMEVENT19-819]
+	_ = x[MHPMEVENT20-820]
+	_ = x[MHPMEVENT21-821]
+	_ = x[MHPMEVENT22-822]
+	_ = x[MHPMEVENT23-823]
+	_ = x[MHPMEVENT24-824]
+	_ = x[MHPMEVENT25-825]
+	_ = x[MHPMEVENT26-826]
+	_ = x[MHPMEVENT27-827]
+	_ = x[MHPMEVENT28-828]
+	_ = x[MHPMEVENT29-829]
+	_ = x[MHPMEVENT30-830]
+	_ = x[MHPMEVENT31-831]
+	_ = x[MSCRATCH-832]
+	_ = x[MEPC-833]
+	_ = x[MCAUSE-834]
+	_ = x[MTVAL-835]
+	_ = x[MIP-836]
+	_ = x[MNXTI-837]
+	_ = x[MINTSTATUS-838]
+	_ = x[MSCRATCHCSW-840]
+	_ = x[MSCRATCHCSWL-841]
+	_ = x[MTINST-842]
+	_ = x[MTVAL2-843]
+	_ = x[PMPCFG0-928]
+	_ = x[PMPCFG1-929]
+	_ = x[PMPCFG2-930]
+	_ = x[PMPCFG3-931]
+	_ = x[PMPADDR0-944]
+	_ = x[PMPADDR1-945]
+	_ = x[PMPADDR2-946]
+	_ = x[PMPADDR3-947]
+	_ = x[PMPADDR4-948]
+	_ = x[PMPADDR5-949]
+	_ = x[PMPADDR6-950]
+	_ = x[PMPADDR7-951]
+	_ = x[PMPADDR8-952]
+	_ = x[PMPADDR9-953]
+	_ = x[PMPADDR10-954]
+	_ = x[PMPADDR11-955]
+	_ = x[PMPADDR12-956]
+	_ = x[PMPADDR13-957]
+	_ = x[PMPADDR14-958]
+	_ = x[PMPADDR15-959]
+	_ = x[HSTATUS-1536]
+	_ = x[HEDELEG-1538]
+	_ = x[HIDELEG-1539]
+	_ = x[HIE-1540]
+	_ = x[HTIMEDELTA-1541]
+	_ = x[HCOUNTEREN-1542]
+	_ = x[HGEIE-1543]
+	_ = x[HTIMEDELTAH-1557]
+	_ = x[HTVAL-1603]
+	_ = x[HIP-1604]
+	_ = x[HVIP-1605]
+	_ = x[HTINST-1610]
+	_ = x[HGATP-1664]
+	_ = x[TSELECT-1952]
+	_ = x[TDATA1-1953]
+	_ = x[TDATA2-1954]
+	_ = x[TDATA3-1955]
+	_ = x[TINFO-1956]
+	_ = x[TCONTROL-1957]
+	_ = x[MCONTEXT-1960]
+	_ = x[MNOISE-1961]
+	_ = x[SCONTEXT-1962]
+	_ = x[DCSR-1968]
+	_ = x[DPC-1969]
+	_ = x[DSCRATCH0-1970]
+	_ = x[DSCRATCH1-1971]
+	_ = x[MCYCLE-2816]
+	_ = x[MINSTRET-2818]
+	_ = x[MHPMCOUNTER3-2819]
+	_ = x[MHPMCOUNTER4-2820]
+	_ = x[MHPMCOUNTER5-2821]
+	_ = x[MHPMCOUNTER6-2822]
+	_ = x[MHPMCOUNTER7-2823]
+	_ = x[MHPMCOUNTER8-2824]
+	_ = x[MHPMCOUNTER9-2825]
+	_ = x[MHPMCOUNTER10-2826]
+	_ = x[MHPMCOUNTER11-2827]
+	_ = x[MHPMCOUNTER12-2828]
+	_ = x[MHPMCOUNTER13-2829]
+	_ = x[MHPMCOUNTER14-2830]
+	_ = x[MHPMCOUNTER15-2831]
+	_ = x[MHPMCOUNTER16-2832]
+	_ = x[MHPMCOUNTER17-2833]
+	_ = x[MHPMCOUNTER18-2834]
+	_ = x[MHPMCOUNTER19-2835]
+	_ = x[MHPMCOUNTER20-2836]
+	_ = x[MHPMCOUNTER21-2837]
+	_ = x[MHPMCOUNTER22-2838]
+	_ = x[MHPMCOUNTER23-2839]
+	_ = x[MHPMCOUNTER24-2840]
+	_ = x[MHPMCOUNTER25-2841]
+	_ = x[MHPMCOUNTER26-2842]
+	_ = x[MHPMCOUNTER27-2843]
+	_ = x[MHPMCOUNTER28-2844]
+	_ = x[MHPMCOUNTER29-2845]
+	_ = x[MHPMCOUNTER30-2846]
+	_ = x[MHPMCOUNTER31-2847]
+	_ = x[MCYCLEH-2944]
+	_ = x[MINSTRETH-2946]
+	_ = x[MHPMCOUNTER3H-2947]
+	_ = x[MHPMCOUNTER4H-2948]
+	_ = x[MHPMCOUNTER5H-2949]
+	_ = x[MHPMCOUNTER6H-2950]
+	_ = x[MHPMCOUNTER7H-2951]
+	_ = x[MHPMCOUNTER8H-2952]
+	_ = x[MHPMCOUNTER9H-2953]
+	_ = x[MHPMCOUNTER10H-2954]
+	_ = x[MHPMCOUNTER11H-2955]
+	_ = x[MHPMCOUNTER12H-2956]
+	_ = x[MHPMCOUNTER13H-2957]
+	_ = x[MHPMCOUNTER14H-2958]
+	_ = x[MHPMCOUNTER15H-2959]
+	_ = x[MHPMCOUNTER16H-2960]
+	_ = x[MHPMCOUNTER17H-2961]
+	_ = x[MHPMCOUNTER18H-2962]
+	_ = x[MHPMCOUNTER19H-2963]
+	_ = x[MHPMCOUNTER20H-2964]
+	_ = x[MHPMCOUNTER21H-2965]
+	_ = x[MHPMCOUNTER22H-2966]
+	_ = x[MHPMCOUNTER23H-2967]
+	_ = x[MHPMCOUNTER24H-2968]
+	_ = x[MHPMCOUNTER25H-2969]
+	_ = x[MHPMCOUNTER26H-2970]
+	_ = x[MHPMCOUNTER27H-2971]
+	_ = x[MHPMCOUNTER28H-2972]
+	_ = x[MHPMCOUNTER29H-2973]
+	_ = x[MHPMCOUNTER30H-2974]
+	_ = x[MHPMCOUNTER31H-2975]
+	_ = x[CYCLE-3072]
+	_ = x[TIME-3073]
+	_ = x[INSTRET-3074]
+	_ = x[HPMCOUNTER3-3075]
+	_ = x[HPMCOUNTER4-3076]
+	_ = x[HPMCOUNTER5-3077]
+	_ = x[HPMCOUNTER6-3078]
+	_ = x[HPMCOUNTER7-3079]
+	_ = x[HPMCOUNTER8-3080]
+	_ = x[HPMCOUNTER9-3081]
+	_ = x[HPMCOUNTER10-3082]
+	_ = x[HPMCOUNTER11-3083]
+	_ = x[HPMCOUNTER12-3084]
+	_ = x[HPMCOUNTER13-3085]
+	_ = x[HPMCOUNTER14-3086]
+	_ = x[HPMCOUNTER15-3087]
+	_ = x[HPMCOUNTER16-3088]
+	_ = x[HPMCOUNTER17-3089]
+	_ = x[HPMCOUNTER18-3090]
+	_ = x[HPMCOUNTER19-3091]
+	_ = x[HPMCOUNTER20-3092]
+	_ = x[HPMCOUNTER21-3093]
+	_ = x[HPMCOUNTER22-3094]
+	_ = x[HPMCOUNTER23-3095]
+	_ = x[HPMCOUNTER24-3096]
+	_ = x[HPMCOUNTER25-3097]
+	_ = x[HPMCOUNTER26-3098]
+	_ = x[HPMCOUNTER27-3099]
+	_ = x[HPMCOUNTER28-3100]
+	_ = x[HPMCOUNTER29-3101]
+	_ = x[HPMCOUNTER30-3102]
+	_ = x[HPMCOUNTER31-3103]
+	_ = x[VL-3104]
+	_ = x[VTYPE-3105]
+	_ = x[VLENB-3106]
+	_ = x[CYCLEH-3200]
+	_ = x[TIMEH-3201]
+	_ = x[INSTRETH-3202]
+	_ = x[HPMCOUNTER3H-3203]
+	_ = x[HPMCOUNTER4H-3204]
+	_ = x[HPMCOUNTER5H-3205]
+	_ = x[HPMCOUNTER6H-3206]
+	_ = x[HPMCOUNTER7H-3207]
+	_ = x[HPMCOUNTER8H-3208]
+	_ = x[HPMCOUNTER9H-3209]
+	_ = x[HPMCOUNTER10H-3210]
+	_ = x[HPMCOUNTER11H-3211]
+	_ = x[HPMCOUNTER12H-3212]
+	_ = x[HPMCOUNTER13H-3213]
+	_ = x[HPMCOUNTER14H-3214]
+	_ = x[HPMCOUNTER15H-3215]
+	_ = x[HPMCOUNTER16H-3216]
+	_ = x[HPMCOUNTER17H-3217]
+	_ = x[HPMCOUNTER18H-3218]
+	_ = x[HPMCOUNTER19H-3219]
+	_ = x[HPMCOUNTER20H-3220]
+	_ = x[HPMCOUNTER21H-3221]
+	_ = x[HPMCOUNTER22H-3222]
+	_ = x[HPMCOUNTER23H-3223]
+	_ = x[HPMCOUNTER24H-3224]
+	_ = x[HPMCOUNTER25H-3225]
+	_ = x[HPMCOUNTER26H-3226]
+	_ = x[HPMCOUNTER27H-3227]
+	_ = x[HPMCOUNTER28H-3228]
+	_ = x[HPMCOUNTER29H-3229]
+	_ = x[HPMCOUNTER30H-3230]
+	_ = x[HPMCOUNTER31H-3231]
+	_ = x[HGEIP-3602]
+	_ = x[MVENDORID-3857]
+	_ = x[MARCHID-3858]
+	_ = x[MIMPID-3859]
+	_ = x[MHARTID-3860]
+	_ = x[MENTROPY-3861]
+}
+
+const _CSR_name = "USTATUSFFLAGSFRMFCSRUIEUTVECUTVTVSTARTVXSATVXRMVCSRUSCRATCHUEPCUCAUSEUTVALUIPUNXTIUINTSTATUSUSCRATCHCSWUSCRATCHCSWLSSTATUSSEDELEGSIDELEGSIESTVECSCOUNTERENSTVTSSCRATCHSEPCSCAUSESTVALSIPSNXTISINTSTATUSSSCRATCHCSWSSCRATCHCSWLSATPVSSTATUSVSIEVSTVECVSSCRATCHVSEPCVSCAUSEVSTVALVSIPVSATPMSTATUSMISAMEDELEGMIDELEGMIEMTVECMCOUNTERENMTVTMSTATUSHMCOUNTINHIBITMHPMEVENT3MHPMEVENT4MHPMEVENT5MHPMEVENT6MHPMEVENT7MHPMEVENT8MHPMEVENT9MHPMEVENT10MHPMEVENT11MHPMEVENT12MHPMEVENT13MHPMEVENT14MHPMEVENT15MHPMEVENT16MHPMEVENT17MHPMEVENT18MHPMEVENT19MHPMEVENT20MHPMEVENT21MHPMEVENT22MHPMEVENT23MHPMEVENT24MHPMEVENT25MHPMEVENT26MHPMEVENT27MHPMEVENT28MHPMEVENT29MHPMEVENT30MHPMEVENT31MSCRATCHMEPCMCAUSEMTVALMIPMNXTIMINTSTATUSMSCRATCHCSWMSCRATCHCSWLMTINSTMTVAL2PMPCFG0PMPCFG1PMPCFG2PMPCFG3PMPADDR0PMPADDR1PMPADDR2PMPADDR3PMPADDR4PMPADDR5PMPADDR6PMPADDR7PMPADDR8PMPADDR9PMPADDR10PMPADDR11PMPADDR12PMPADDR13PMPADDR14PMPADDR15HSTATUSHEDELEGHIDELEGHIEHTIMEDELTAHCOUNTERENHGEIEHTIMEDELTAHHTVALHIPHVIPHTINSTHGATPTSELECTTDATA1TDATA2TDATA3TINFOTCONTROLMCONTEXTMNOISESCONTEXTDCSRDPCDSCRATCH0DSCRATCH1MCYCLEMINSTRETMHPMCOUNTER3MHPMCOUNTER4MHPMCOUNTER5MHPMCOUNTER6MHPMCOUNTER7MHPMCOUNTER8MHPMCOUNTER9MHPMCOUNTER10MHPMCOUNTER11MHPMCOUNTER12MHPMCOUNTER13MHPMCOUNTER14MHPMCOUNTER15MHPMCOUNTER16MHPMCOUNTER17MHPMCOUNTER18MHPMCOUNTER19MHPMCOUNTER20MHPMCOUNTER21MHPMCOUNTER22MHPMCOUNTER23MHPMCOUNTER24MHPMCOUNTER25MHPMCOUNTER26MHPMCOUNTER27MHPMCOUNTER28MHPMCOUNTER29MHPMCOUNTER30MHPMCOUNTER31MCYCLEHMINSTRETHMHPMCOUNTER3HMHPMCOUNTER4HMHPMCOUNTER5HMHPMCOUNTER6HMHPMCOUNTER7HMHPMCOUNTER8HMHPMCOUNTER9HMHPMCOUNTER10HMHPMCOUNTER11HMHPMCOUNTER12HMHPMCOUNTER13HMHPMCOUNTER14HMHPMCOUNTER15HMHPMCOUNTER16HMHPMCOUNTER17HMHPMCOUNTER18HMHPMCOUNTER19HMHPMCOUNTER20HMHPMCOUNTER21HMHPMCOUNTER22HMHPMCOUNTER23HMHPMCOUNTER24HMHPMCOUNTER25HMHPMCOUNTER26HMHPMCOUNTER27HMHPMCOUNTER28HMHPMCOUNTER29HMHPMCOUNTER30HMHPMCOUNTER31HCYCLETIMEINSTRETHPMCOUNTER3HPMCOUNTER4HPMCOUNTER5HPMCOUNTER6HPMCOUNTER7HPMCOUNTER8HPMCOUNTER9HPMCOUNTER10HPMCOUNTER11HPMCOUNTER12HPMCOUNTER13HPMCOUNTER14HPMCOUNTER15HPMCOUNTER16HPMCOUNTER17HPMCOUNTER18HPMCOUNTER19HPMCOUNTER20HPMCOUNTER21HPMCOUNTER22HPMCOUNTER23HPMCOUNTER24HPMCOUNTER25HPMCOUNTER26HPMCOUNTER27HPMCOUNTER28HPMCOUNTER29HPMCOUNTER30HPMCOUNTER31VLVTYPEVLENBCYCLEHTIMEHINSTRETHHPMCOUNTER3HHPMCOUNTER4HHPMCOUNTER5HHPMCOUNTER6HHPMCOUNTER7HHPMCOUNTER8HHPMCOUNTER9HHPMCOUNTER10HHPMCOUNTER11HHPMCOUNTER12HHPMCOUNTER13HHPMCOUNTER14HHPMCOUNTER15HHPMCOUNTER16HHPMCOUNTER17HHPMCOUNTER18HHPMCOUNTER19HHPMCOUNTER20HHPMCOUNTER21HHPMCOUNTER22HHPMCOUNTER23HHPMCOUNTER24HHPMCOUNTER25HHPMCOUNTER26HHPMCOUNTER27HHPMCOUNTER28HHPMCOUNTER29HHPMCOUNTER30HHPMCOUNTER31HHGEIPMVENDORIDMARCHIDMIMPIDMHARTIDMENTROPY"
+
+var _CSR_map = map[CSR]string{
+	0:    _CSR_name[0:7],
+	1:    _CSR_name[7:13],
+	2:    _CSR_name[13:16],
+	3:    _CSR_name[16:20],
+	4:    _CSR_name[20:23],
+	5:    _CSR_name[23:28],
+	7:    _CSR_name[28:32],
+	8:    _CSR_name[32:38],
+	9:    _CSR_name[38:43],
+	10:   _CSR_name[43:47],
+	15:   _CSR_name[47:51],
+	64:   _CSR_name[51:59],
+	65:   _CSR_name[59:63],
+	66:   _CSR_name[63:69],
+	67:   _CSR_name[69:74],
+	68:   _CSR_name[74:77],
+	69:   _CSR_name[77:82],
+	70:   _CSR_name[82:92],
+	72:   _CSR_name[92:103],
+	73:   _CSR_name[103:115],
+	256:  _CSR_name[115:122],
+	258:  _CSR_name[122:129],
+	259:  _CSR_name[129:136],
+	260:  _CSR_name[136:139],
+	261:  _CSR_name[139:144],
+	262:  _CSR_name[144:154],
+	263:  _CSR_name[154:158],
+	320:  _CSR_name[158:166],
+	321:  _CSR_name[166:170],
+	322:  _CSR_name[170:176],
+	323:  _CSR_name[176:181],
+	324:  _CSR_name[181:184],
+	325:  _CSR_name[184:189],
+	326:  _CSR_name[189:199],
+	328:  _CSR_name[199:210],
+	329:  _CSR_name[210:222],
+	384:  _CSR_name[222:226],
+	512:  _CSR_name[226:234],
+	516:  _CSR_name[234:238],
+	517:  _CSR_name[238:244],
+	576:  _CSR_name[244:253],
+	577:  _CSR_name[253:258],
+	578:  _CSR_name[258:265],
+	579:  _CSR_name[265:271],
+	580:  _CSR_name[271:275],
+	640:  _CSR_name[275:280],
+	768:  _CSR_name[280:287],
+	769:  _CSR_name[287:291],
+	770:  _CSR_name[291:298],
+	771:  _CSR_name[298:305],
+	772:  _CSR_name[305:308],
+	773:  _CSR_name[308:313],
+	774:  _CSR_name[313:323],
+	775:  _CSR_name[323:327],
+	784:  _CSR_name[327:335],
+	800:  _CSR_name[335:348],
+	803:  _CSR_name[348:358],
+	804:  _CSR_name[358:368],
+	805:  _CSR_name[368:378],
+	806:  _CSR_name[378:388],
+	807:  _CSR_name[388:398],
+	808:  _CSR_name[398:408],
+	809:  _CSR_name[408:418],
+	810:  _CSR_name[418:429],
+	811:  _CSR_name[429:440],
+	812:  _CSR_name[440:451],
+	813:  _CSR_name[451:462],
+	814:  _CSR_name[462:473],
+	815:  _CSR_name[473:484],
+	816:  _CSR_name[484:495],
+	817:  _CSR_name[495:506],
+	818:  _CSR_name[506:517],
+	819:  _CSR_name[517:528],
+	820:  _CSR_name[528:539],
+	821:  _CSR_name[539:550],
+	822:  _CSR_name[550:561],
+	823:  _CSR_name[561:572],
+	824:  _CSR_name[572:583],
+	825:  _CSR_name[583:594],
+	826:  _CSR_name[594:605],
+	827:  _CSR_name[605:616],
+	828:  _CSR_name[616:627],
+	829:  _CSR_name[627:638],
+	830:  _CSR_name[638:649],
+	831:  _CSR_name[649:660],
+	832:  _CSR_name[660:668],
+	833:  _CSR_name[668:672],
+	834:  _CSR_name[672:678],
+	835:  _CSR_name[678:683],
+	836:  _CSR_name[683:686],
+	837:  _CSR_name[686:691],
+	838:  _CSR_name[691:701],
+	840:  _CSR_name[701:712],
+	841:  _CSR_name[712:724],
+	842:  _CSR_name[724:730],
+	843:  _CSR_name[730:736],
+	928:  _CSR_name[736:743],
+	929:  _CSR_name[743:750],
+	930:  _CSR_name[750:757],
+	931:  _CSR_name[757:764],
+	944:  _CSR_name[764:772],
+	945:  _CSR_name[772:780],
+	946:  _CSR_name[780:788],
+	947:  _CSR_name[788:796],
+	948:  _CSR_name[796:804],
+	949:  _CSR_name[804:812],
+	950:  _CSR_name[812:820],
+	951:  _CSR_name[820:828],
+	952:  _CSR_name[828:836],
+	953:  _CSR_name[836:844],
+	954:  _CSR_name[844:853],
+	955:  _CSR_name[853:862],
+	956:  _CSR_name[862:871],
+	957:  _CSR_name[871:880],
+	958:  _CSR_name[880:889],
+	959:  _CSR_name[889:898],
+	1536: _CSR_name[898:905],
+	1538: _CSR_name[905:912],
+	1539: _CSR_name[912:919],
+	1540: _CSR_name[919:922],
+	1541: _CSR_name[922:932],
+	1542: _CSR_name[932:942],
+	1543: _CSR_name[942:947],
+	1557: _CSR_name[947:958],
+	1603: _CSR_name[958:963],
+	1604: _CSR_name[963:966],
+	1605: _CSR_name[966:970],
+	1610: _CSR_name[970:976],
+	1664: _CSR_name[976:981],
+	1952: _CSR_name[981:988],
+	1953: _CSR_name[988:994],
+	1954: _CSR_name[994:1000],
+	1955: _CSR_name[1000:1006],
+	1956: _CSR_name[1006:1011],
+	1957: _CSR_name[1011:1019],
+	1960: _CSR_name[1019:1027],
+	1961: _CSR_name[1027:1033],
+	1962: _CSR_name[1033:1041],
+	1968: _CSR_name[1041:1045],
+	1969: _CSR_name[1045:1048],
+	1970: _CSR_name[1048:1057],
+	1971: _CSR_name[1057:1066],
+	2816: _CSR_name[1066:1072],
+	2818: _CSR_name[1072:1080],
+	2819: _CSR_name[1080:1092],
+	2820: _CSR_name[1092:1104],
+	2821: _CSR_name[1104:1116],
+	2822: _CSR_name[1116:1128],
+	2823: _CSR_name[1128:1140],
+	2824: _CSR_name[1140:1152],
+	2825: _CSR_name[1152:1164],
+	2826: _CSR_name[1164:1177],
+	2827: _CSR_name[1177:1190],
+	2828: _CSR_name[1190:1203],
+	2829: _CSR_name[1203:1216],
+	2830: _CSR_name[1216:1229],
+	2831: _CSR_name[1229:1242],
+	2832: _CSR_name[1242:1255],
+	2833: _CSR_name[1255:1268],
+	2834: _CSR_name[1268:1281],
+	2835: _CSR_name[1281:1294],
+	2836: _CSR_name[1294:1307],
+	2837: _CSR_name[1307:1320],
+	2838: _CSR_name[1320:1333],
+	2839: _CSR_name[1333:1346],
+	2840: _CSR_name[1346:1359],
+	2841: _CSR_name[1359:1372],
+	2842: _CSR_name[1372:1385],
+	2843: _CSR_name[1385:1398],
+	2844: _CSR_name[1398:1411],
+	2845: _CSR_name[1411:1424],
+	2846: _CSR_name[1424:1437],
+	2847: _CSR_name[1437:1450],
+	2944: _CSR_name[1450:1457],
+	2946: _CSR_name[1457:1466],
+	2947: _CSR_name[1466:1479],
+	2948: _CSR_name[1479:1492],
+	2949: _CSR_name[1492:1505],
+	2950: _CSR_name[1505:1518],
+	2951: _CSR_name[1518:1531],
+	2952: _CSR_name[1531:1544],
+	2953: _CSR_name[1544:1557],
+	2954: _CSR_name[1557:1571],
+	2955: _CSR_name[1571:1585],
+	2956: _CSR_name[1585:1599],
+	2957: _CSR_name[1599:1613],
+	2958: _CSR_name[1613:1627],
+	2959: _CSR_name[1627:1641],
+	2960: _CSR_name[1641:1655],
+	2961: _CSR_name[1655:1669],
+	2962: _CSR_name[1669:1683],
+	2963: _CSR_name[1683:1697],
+	2964: _CSR_name[1697:1711],
+	2965: _CSR_name[1711:1725],
+	2966: _CSR_name[1725:1739],
+	2967: _CSR_name[1739:1753],
+	2968: _CSR_name[1753:1767],
+	2969: _CSR_name[1767:1781],
+	2970: _CSR_name[1781:1795],
+	2971: _CSR_name[1795:1809],
+	2972: _CSR_name[1809:1823],
+	2973: _CSR_name[1823:1837],
+	2974: _CSR_name[1837:1851],
+	2975: _CSR_name[1851:1865],
+	3072: _CSR_name[1865:1870],
+	3073: _CSR_name[1870:1874],
+	3074: _CSR_name[1874:1881],
+	3075: _CSR_name[1881:1892],
+	3076: _CSR_name[1892:1903],
+	3077: _CSR_name[1903:1914],
+	3078: _CSR_name[1914:1925],
+	3079: _CSR_name[1925:1936],
+	3080: _CSR_name[1936:1947],
+	3081: _CSR_name[1947:1958],
+	3082: _CSR_name[1958:1970],
+	3083: _CSR_name[1970:1982],
+	3084: _CSR_name[1982:1994],
+	3085: _CSR_name[1994:2006],
+	3086: _CSR_name[2006:2018],
+	3087: _CSR_name[2018:2030],
+	3088: _CSR_name[2030:2042],
+	3089: _CSR_name[2042:2054],
+	3090: _CSR_name[2054:2066],
+	3091: _CSR_name[2066:2078],
+	3092: _CSR_name[2078:2090],
+	3093: _CSR_name[2090:2102],
+	3094: _CSR_name[2102:2114],
+	3095: _CSR_name[2114:2126],
+	3096: _CSR_name[2126:2138],
+	3097: _CSR_name[2138:2150],
+	3098: _CSR_name[2150:2162],
+	3099: _CSR_name[2162:2174],
+	3100: _CSR_name[2174:2186],
+	3101: _CSR_name[2186:2198],
+	3102: _CSR_name[2198:2210],
+	3103: _CSR_name[2210:2222],
+	3104: _CSR_name[2222:2224],
+	3105: _CSR_name[2224:2229],
+	3106: _CSR_name[2229:2234],
+	3200: _CSR_name[2234:2240],
+	3201: _CSR_name[2240:2245],
+	3202: _CSR_name[2245:2253],
+	3203: _CSR_name[2253:2265],
+	3204: _CSR_name[2265:2277],
+	3205: _CSR_name[2277:2289],
+	3206: _CSR_name[2289:2301],
+	3207: _CSR_name[2301:2313],
+	3208: _CSR_name[2313:2325],
+	3209: _CSR_name[2325:2337],
+	3210: _CSR_name[2337:2350],
+	3211: _CSR_name[2350:2363],
+	3212: _CSR_name[2363:2376],
+	3213: _CSR_name[2376:2389],
+	3214: _CSR_name[2389:2402],
+	3215: _CSR_name[2402:2415],
+	3216: _CSR_name[2415:2428],
+	3217: _CSR_name[2428:2441],
+	3218: _CSR_name[2441:2454],
+	3219: _CSR_name[2454:2467],
+	3220: _CSR_name[2467:2480],
+	3221: _CSR_name[2480:2493],
+	3222: _CSR_name[2493:2506],
+	3223: _CSR_name[2506:2519],
+	3224: _CSR_name[2519:2532],
+	3225: _CSR_name[2532:2545],
+	3226: _CSR_name[2545:2558],
+	3227: _CSR_name[2558:2571],
+	3228: _CSR_name[2571:2584],
+	3229: _CSR_name[2584:2597],
+	3230: _CSR_name[2597:2610],
+	3231: _CSR_name[2610:2623],
+	3602: _CSR_name[2623:2628],
+	3857: _CSR_name[2628:2637],
+	3858: _CSR_name[2637:2644],
+	3859: _CSR_name[2644:2650],
+	3860: _CSR_name[2650:2657],
+	3861: _CSR_name[2657:2665],
+}
+
+func (i CSR) String() string {
+	if str, ok := _CSR_map[i]; ok {
+		return str
+	}
+	return "CSR(" + strconv.FormatInt(int64(i), 10) + ")"
+}
diff --git a/riscv64/riscv64asm/decode.go b/riscv64/riscv64asm/decode.go
new file mode 100644
index 00000000..d78fef9e
--- /dev/null
+++ b/riscv64/riscv64asm/decode.go
@@ -0,0 +1,550 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv64asm
+
+import (
+	"encoding/binary"
+	"errors"
+)
+
+type argTypeList [6]argType
+
+// An instFormat describes the format of an instruction encoding.
+type instFormat struct {
+	mask  uint32
+	value uint32
+	op    Op
+	// args describe how to decode the instruction arguments.
+	// args is stored as a fixed-size array.
+	// if there are fewer than len(args) arguments, args[i] == 0 marks
+	// the end of the argument list.
+	args argTypeList
+}
+
+var (
+	errShort   = errors.New("truncated instruction")
+	errUnknown = errors.New("unknown instruction")
+)
+
+var decoderCover []bool
+
+func init() {
+	decoderCover = make([]bool, len(instFormats))
+}
+
+// Decode decodes the 4 bytes in src as a single instruction.
+func Decode(src []byte) (Inst, error) {
+	length := len(src)
+	if length < 2 {
+		return Inst{}, errShort
+	}
+
+	var x uint32
+	// Non-RVC instructions always starts with 0x11
+	// So check whether src[0] & 3 == 3
+	if src[0]&3 == 3 {
+		if length < 4 {
+			return Inst{}, errShort
+		}
+		length = 4
+		x = binary.LittleEndian.Uint32(src)
+	} else {
+		length = 2
+		x = uint32(binary.LittleEndian.Uint16(src))
+	}
+
+Search:
+	for i, f := range instFormats {
+		if (x & f.mask) != f.value {
+			continue
+		}
+
+		// Decode args.
+		var args Args
+		for j, aop := range f.args {
+			if aop == 0 {
+				break
+			}
+			arg := decodeArg(aop, x, i)
+			if arg == nil && f.op != C_NOP {
+				// Cannot decode argument.
+				continue Search
+			}
+			args[j] = arg
+		}
+
+		if length == 2 {
+			args = convertCompressedIns(&f, args)
+		}
+
+		decoderCover[i] = true
+		inst := Inst{
+			Op:   f.op,
+			Args: args,
+			Enc:  x,
+			Len:  length,
+		}
+		return inst, nil
+	}
+	return Inst{}, errUnknown
+}
+
+// decodeArg decodes the arg described by aop from the instruction bits x.
+// It returns nil if x cannot be decoded according to aop.
+func decodeArg(aop argType, x uint32, index int) Arg {
+	switch aop {
+	case arg_rd:
+		return X0 + Reg((x>>7)&((1<<5)-1))
+
+	case arg_rs1:
+		return X0 + Reg((x>>15)&((1<<5)-1))
+
+	case arg_rs2:
+		return X0 + Reg((x>>20)&((1<<5)-1))
+
+	case arg_rs3:
+		return X0 + Reg((x>>27)&((1<<5)-1))
+
+	case arg_fd:
+		return F0 + Reg((x>>7)&((1<<5)-1))
+
+	case arg_fs1:
+		return F0 + Reg((x>>15)&((1<<5)-1))
+
+	case arg_fs2:
+		return F0 + Reg((x>>20)&((1<<5)-1))
+
+	case arg_fs3:
+		return F0 + Reg((x>>27)&((1<<5)-1))
+
+	case arg_rs1_amo:
+		return AmoReg{X0 + Reg((x>>15)&((1<<5)-1))}
+
+	case arg_rs1_mem:
+		imm := x >> 20
+		// Sign-extend
+		if imm>>uint32(12-1) == 1 {
+			imm |= 0xfffff << 12
+		}
+		return RegOffset{X0 + Reg((x>>15)&((1<<5)-1)), Simm{int32(imm), true, 12}}
+
+	case arg_rs1_store:
+		imm := (x<<20)>>27 | (x>>25)<<5
+		// Sign-extend
+		if imm>>uint32(12-1) == 1 {
+			imm |= 0xfffff << 12
+		}
+		return RegOffset{X0 + Reg((x>>15)&((1<<5)-1)), Simm{int32(imm), true, 12}}
+
+	case arg_pred:
+		imm := x << 4 >> 28
+		return MemOrder(uint8(imm))
+
+	case arg_succ:
+		imm := x << 8 >> 28
+		return MemOrder(uint8(imm))
+
+	case arg_csr:
+		imm := x >> 20
+		return CSR(imm)
+
+	case arg_zimm:
+		imm := x << 12 >> 27
+		return Uimm{imm, true}
+
+	case arg_shamt5:
+		imm := x << 7 >> 27
+		return Uimm{imm, false}
+
+	case arg_shamt6:
+		imm := x << 6 >> 26
+		return Uimm{imm, false}
+
+	case arg_imm12:
+		imm := x >> 20
+		// Sign-extend
+		if imm>>uint32(12-1) == 1 {
+			imm |= 0xfffff << 12
+		}
+		return Simm{int32(imm), true, 12}
+
+	case arg_imm20:
+		imm := x >> 12
+		return Uimm{imm, false}
+
+	case arg_jimm20:
+		imm := (x>>31)<<20 | (x<<1)>>22<<1 | (x<<11)>>31<<11 | (x<<12)>>24<<12
+		// Sign-extend
+		if imm>>uint32(21-1) == 1 {
+			imm |= 0x7ff << 21
+		}
+		return Simm{int32(imm), true, 21}
+
+	case arg_simm12:
+		imm := (x<<20)>>27 | (x>>25)<<5
+		// Sign-extend
+		if imm>>uint32(12-1) == 1 {
+			imm |= 0xfffff << 12
+		}
+		return Simm{int32(imm), true, 12}
+
+	case arg_bimm12:
+		imm := (x<<20)>>28<<1 | (x<<1)>>26<<5 | (x<<24)>>31<<11 | (x>>31)<<12
+		// Sign-extend
+		if imm>>uint32(13-1) == 1 {
+			imm |= 0x7ffff << 13
+		}
+		return Simm{int32(imm), true, 13}
+
+	case arg_rd_p, arg_rs2_p:
+		return X8 + Reg((x>>2)&((1<<3)-1))
+
+	case arg_fd_p, arg_fs2_p:
+		return F8 + Reg((x>>2)&((1<<3)-1))
+
+	case arg_rs1_p, arg_rd_rs1_p:
+		return X8 + Reg((x>>7)&((1<<3)-1))
+
+	case arg_rd_n0, arg_rs1_n0, arg_rd_rs1_n0, arg_c_rs1_n0:
+		if X0+Reg((x>>7)&((1<<5)-1)) == X0 {
+			return nil
+		}
+		return X0 + Reg((x>>7)&((1<<5)-1))
+
+	case arg_c_rs2_n0:
+		if X0+Reg((x>>2)&((1<<5)-1)) == X0 {
+			return nil
+		}
+		return X0 + Reg((x>>2)&((1<<5)-1))
+
+	case arg_c_fs2:
+		return F0 + Reg((x>>2)&((1<<5)-1))
+
+	case arg_c_rs2:
+		return X0 + Reg((x>>2)&((1<<5)-1))
+
+	case arg_rd_n2:
+		if X0+Reg((x>>7)&((1<<5)-1)) == X0 || X0+Reg((x>>7)&((1<<5)-1)) == X2 {
+			return nil
+		}
+		return X0 + Reg((x>>7)&((1<<5)-1))
+
+	case arg_c_imm6:
+		imm := (x<<25)>>27 | (x<<19)>>31<<5
+		// Sign-extend
+		if imm>>uint32(6-1) == 1 {
+			imm |= 0x3ffffff << 6
+		}
+		return Simm{int32(imm), true, 6}
+
+	case arg_c_nzimm6:
+		imm := (x<<25)>>27 | (x<<19)>>31<<5
+		// Sign-extend
+		if imm>>uint32(6-1) == 1 {
+			imm |= 0x3ffffff << 6
+		}
+		if int32(imm) == 0 {
+			return nil
+		}
+		return Simm{int32(imm), true, 6}
+
+	case arg_c_nzuimm6:
+		imm := (x<<25)>>27 | (x<<19)>>31<<5
+		if int32(imm) == 0 {
+			return nil
+		}
+		return Uimm{imm, false}
+
+	case arg_c_uimm7:
+		imm := (x<<26)>>31<<6 | (x<<25)>>31<<2 | (x<<19)>>29<<3
+		return Uimm{imm, false}
+
+	case arg_c_uimm8:
+		imm := (x<<25)>>30<<6 | (x<<19)>>29<<3
+		return Uimm{imm, false}
+
+	case arg_c_uimm8sp_s:
+		imm := (x<<23)>>30<<6 | (x<<19)>>28<<2
+		return Uimm{imm, false}
+
+	case arg_c_uimm8sp:
+		imm := (x<<25)>>29<<2 | (x<<19)>>31<<5 | (x<<28)>>30<<6
+		return Uimm{imm, false}
+
+	case arg_c_uimm9sp_s:
+		imm := (x<<22)>>29<<6 | (x<<19)>>29<<3
+		return Uimm{imm, false}
+
+	case arg_c_uimm9sp:
+		imm := (x<<25)>>30<<3 | (x<<19)>>31<<5 | (x<<27)>>29<<6
+		return Uimm{imm, false}
+
+	case arg_c_bimm9:
+		imm := (x<<29)>>31<<5 | (x<<27)>>30<<1 | (x<<25)>>30<<6 | (x<<19)>>31<<8 | (x<<20)>>30<<3
+		// Sign-extend
+		if imm>>uint32(9-1) == 1 {
+			imm |= 0x7fffff << 9
+		}
+		return Simm{int32(imm), true, 9}
+
+	case arg_c_nzimm10:
+		imm := (x<<29)>>31<<5 | (x<<27)>>30<<7 | (x<<26)>>31<<6 | (x<<25)>>31<<4 | (x<<19)>>31<<9
+		// Sign-extend
+		if imm>>uint32(10-1) == 1 {
+			imm |= 0x3fffff << 10
+		}
+		if int32(imm) == 0 {
+			return nil
+		}
+		return Simm{int32(imm), true, 10}
+
+	case arg_c_nzuimm10:
+		imm := (x<<26)>>31<<3 | (x<<25)>>31<<2 | (x<<21)>>28<<6 | (x<<19)>>30<<4
+		if int32(imm) == 0 {
+			return nil
+		}
+		return Uimm{imm, false}
+
+	case arg_c_imm12:
+		imm := (x<<29)>>31<<5 | (x<<26)>>28<<1 | (x<<25)>>31<<7 | (x<<24)>>31<<6 | (x<<23)>>31<<10 | (x<<21)>>30<<8 | (x<<20)>>31<<4 | (x<<19)>>31<<11
+		// Sign-extend
+		if imm>>uint32(12-1) == 1 {
+			imm |= 0xfffff << 12
+		}
+		return Simm{int32(imm), true, 12}
+
+	case arg_c_nzimm18:
+		imm := (x<<25)>>27<<12 | (x<<19)>>31<<17
+		// Sign-extend
+		if imm>>uint32(18-1) == 1 {
+			imm |= 0x3fff << 18
+		}
+		if int32(imm) == 0 {
+			return nil
+		}
+		return Simm{int32(imm), true, 18}
+
+	default:
+		return nil
+	}
+}
+
+// convertCompressedIns rewrites the RVC Instruction to regular Instructions
+func convertCompressedIns(f *instFormat, args Args) Args {
+	var newargs Args
+	switch f.op {
+	case C_ADDI4SPN:
+		f.op = ADDI
+		newargs[0] = args[0]
+		newargs[1] = Reg(X2)
+		newargs[2] = Simm{int32(args[1].(Uimm).Imm), true, 12}
+
+	case C_LW:
+		f.op = LW
+		newargs[0] = args[0]
+		newargs[1] = RegOffset{args[1].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}}
+
+	case C_SW:
+		f.op = SW
+		newargs[0] = args[1]
+		newargs[1] = RegOffset{args[0].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}}
+
+	case C_NOP:
+		f.op = ADDI
+		newargs[0] = X0
+		newargs[1] = X0
+		newargs[2] = Simm{0, true, 12}
+
+	case C_ADDI:
+		f.op = ADDI
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = Simm{args[1].(Simm).Imm, true, 12}
+
+	case C_LI:
+		f.op = ADDI
+		newargs[0] = args[0]
+		newargs[1] = Reg(X0)
+		newargs[2] = Simm{args[1].(Simm).Imm, true, 12}
+
+	case C_ADDI16SP:
+		f.op = ADDI
+		newargs[0] = Reg(X2)
+		newargs[1] = Reg(X2)
+		newargs[2] = Simm{args[0].(Simm).Imm, true, 12}
+
+	case C_LUI:
+		f.op = LUI
+		newargs[0] = args[0]
+		newargs[1] = Uimm{uint32(args[1].(Simm).Imm >> 12), false}
+
+	case C_ANDI:
+		f.op = ANDI
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = Simm{args[1].(Simm).Imm, true, 12}
+
+	case C_SUB:
+		f.op = SUB
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = args[1]
+
+	case C_XOR:
+		f.op = XOR
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = args[1]
+
+	case C_OR:
+		f.op = OR
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = args[1]
+
+	case C_AND:
+		f.op = AND
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = args[1]
+
+	case C_J:
+		f.op = JAL
+		newargs[0] = Reg(X0)
+		newargs[1] = Simm{args[0].(Simm).Imm, true, 21}
+
+	case C_BEQZ:
+		f.op = BEQ
+		newargs[0] = args[0]
+		newargs[1] = Reg(X0)
+		newargs[2] = Simm{args[1].(Simm).Imm, true, 13}
+
+	case C_BNEZ:
+		f.op = BNE
+		newargs[0] = args[0]
+		newargs[1] = Reg(X0)
+		newargs[2] = Simm{args[1].(Simm).Imm, true, 13}
+
+	case C_LWSP:
+		f.op = LW
+		newargs[0] = args[0]
+		newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}}
+
+	case C_JR:
+		f.op = JALR
+		newargs[0] = Reg(X0)
+		newargs[1] = RegOffset{args[0].(Reg), Simm{0, true, 12}}
+
+	case C_MV:
+		f.op = ADD
+		newargs[0] = args[0]
+		newargs[1] = Reg(X0)
+		newargs[2] = args[1]
+
+	case C_EBREAK:
+		f.op = EBREAK
+
+	case C_JALR:
+		f.op = JALR
+		newargs[0] = Reg(X1)
+		newargs[1] = RegOffset{args[0].(Reg), Simm{0, true, 12}}
+
+	case C_ADD:
+		f.op = ADD
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = args[1]
+
+	case C_SWSP:
+		f.op = SW
+		newargs[0] = args[0]
+		newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}}
+
+	// riscv64 compressed instructions
+	case C_LD:
+		f.op = LD
+		newargs[0] = args[0]
+		newargs[1] = RegOffset{args[1].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}}
+
+	case C_SD:
+		f.op = SD
+		newargs[0] = args[1]
+		newargs[1] = RegOffset{args[0].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}}
+
+	case C_ADDIW:
+		f.op = ADDIW
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = Simm{args[1].(Simm).Imm, true, 12}
+
+	case C_SRLI:
+		f.op = SRLI
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = args[1]
+
+	case C_SRAI:
+		f.op = SRAI
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = args[1]
+
+	case C_SUBW:
+		f.op = SUBW
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = args[1]
+
+	case C_ADDW:
+		f.op = ADDW
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = args[1]
+
+	case C_SLLI:
+		f.op = SLLI
+		newargs[0] = args[0]
+		newargs[1] = args[0]
+		newargs[2] = args[1]
+
+	case C_LDSP:
+		f.op = LD
+		newargs[0] = args[0]
+		newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}}
+
+	case C_SDSP:
+		f.op = SD
+		newargs[0] = args[0]
+		newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}}
+
+	// riscv double precision floating point compressed instructions
+	case C_FLD:
+		f.op = FLD
+		newargs[0] = args[0]
+		newargs[1] = RegOffset{args[1].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}}
+
+	case C_FSD:
+		f.op = FSD
+		newargs[0] = args[1]
+		newargs[1] = RegOffset{args[0].(Reg), Simm{int32(args[2].(Uimm).Imm), true, 12}}
+
+	case C_FLDSP:
+		f.op = FLD
+		newargs[0] = args[0]
+		newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}}
+
+	case C_FSDSP:
+		f.op = FSD
+		newargs[0] = args[0]
+		newargs[1] = RegOffset{Reg(X2), Simm{int32(args[1].(Uimm).Imm), true, 12}}
+
+	case C_UNIMP:
+		f.op = CSRRW
+		newargs[0] = Reg(X0)
+		newargs[1] = CSR(CYCLE)
+		newargs[2] = Reg(X0)
+	}
+	return newargs
+}
diff --git a/riscv64/riscv64asm/gnu.go b/riscv64/riscv64asm/gnu.go
new file mode 100644
index 00000000..d6b3dc04
--- /dev/null
+++ b/riscv64/riscv64asm/gnu.go
@@ -0,0 +1,328 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv64asm
+
+import (
+	"strings"
+)
+
+// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils.
+// This form typically matches the syntax defined in the RISC-V Instruction Set Manual. See
+// https://github.com/riscv/riscv-isa-manual/releases/download/Ratified-IMAFDQC/riscv-spec-20191213.pdf
+func GNUSyntax(inst Inst) string {
+	op := strings.ToLower(inst.Op.String())
+	var args []string
+	for _, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		args = append(args, strings.ToLower(a.String()))
+	}
+
+	switch inst.Op {
+	case ADDI, ADDIW, ANDI, ORI, SLLI, SLLIW, SRAI, SRAIW, SRLI, SRLIW, XORI:
+		if inst.Op == ADDI {
+			if inst.Args[1].(Reg) == X0 && inst.Args[0].(Reg) != X0 {
+				op = "li"
+				args[1] = args[2]
+				args = args[:len(args)-1]
+				break
+			}
+
+			if inst.Args[2].(Simm).Imm == 0 {
+				if inst.Args[0].(Reg) == X0 && inst.Args[1].(Reg) == X0 {
+					op = "nop"
+					args = nil
+				} else {
+					op = "mv"
+					args = args[:len(args)-1]
+				}
+			}
+		}
+
+		if inst.Op == ADDIW && inst.Args[2].(Simm).Imm == 0 {
+			op = "sext.w"
+			args = args[:len(args)-1]
+		}
+
+		if inst.Op == XORI && inst.Args[2].(Simm).String() == "-1" {
+			op = "not"
+			args = args[:len(args)-1]
+		}
+
+	case ADD:
+		if inst.Args[1].(Reg) == X0 {
+			op = "mv"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+
+	case BEQ:
+		if inst.Args[1].(Reg) == X0 {
+			op = "beqz"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+
+	case BGE:
+		if inst.Args[1].(Reg) == X0 {
+			op = "bgez"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		} else if inst.Args[0].(Reg) == X0 {
+			op = "blez"
+			args[0], args[1] = args[1], args[2]
+			args = args[:len(args)-1]
+		}
+
+	case BLT:
+		if inst.Args[1].(Reg) == X0 {
+			op = "bltz"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		} else if inst.Args[0].(Reg) == X0 {
+			op = "bgtz"
+			args[0], args[1] = args[1], args[2]
+			args = args[:len(args)-1]
+		}
+
+	case BNE:
+		if inst.Args[1].(Reg) == X0 {
+			op = "bnez"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+
+	case CSRRC:
+		if inst.Args[0].(Reg) == X0 {
+			op = "csrc"
+			args[0], args[1] = args[1], args[2]
+			args = args[:len(args)-1]
+		}
+
+	case CSRRCI:
+		if inst.Args[0].(Reg) == X0 {
+			op = "csrci"
+			args[0], args[1] = args[1], args[2]
+			args = args[:len(args)-1]
+		}
+
+	case CSRRS:
+		if inst.Args[2].(Reg) == X0 {
+			switch inst.Args[1].(CSR) {
+			case FCSR:
+				op = "frcsr"
+				args = args[:len(args)-2]
+
+			case FFLAGS:
+				op = "frflags"
+				args = args[:len(args)-2]
+
+			case FRM:
+				op = "frrm"
+				args = args[:len(args)-2]
+
+			// rdcycleh, rdinstreth and rdtimeh are RV-32 only instructions.
+			// So not included there.
+			case CYCLE:
+				op = "rdcycle"
+				args = args[:len(args)-2]
+
+			case INSTRET:
+				op = "rdinstret"
+				args = args[:len(args)-2]
+
+			case TIME:
+				op = "rdtime"
+				args = args[:len(args)-2]
+
+			default:
+				op = "csrr"
+				args = args[:len(args)-1]
+			}
+		} else if inst.Args[0].(Reg) == X0 {
+			op = "csrs"
+			args[0], args[1] = args[1], args[2]
+			args = args[:len(args)-1]
+		}
+
+	case CSRRSI:
+		if inst.Args[0].(Reg) == X0 {
+			op = "csrsi"
+			args[0], args[1] = args[1], args[2]
+			args = args[:len(args)-1]
+		}
+
+	case CSRRW:
+		switch inst.Args[1].(CSR) {
+		case FCSR:
+			op = "fscsr"
+			if inst.Args[0].(Reg) == X0 {
+				args[0] = args[2]
+				args = args[:len(args)-2]
+			} else {
+				args[1] = args[2]
+				args = args[:len(args)-1]
+			}
+
+		case FFLAGS:
+			op = "fsflags"
+			if inst.Args[0].(Reg) == X0 {
+				args[0] = args[2]
+				args = args[:len(args)-2]
+			} else {
+				args[1] = args[2]
+				args = args[:len(args)-1]
+			}
+
+		case FRM:
+			op = "fsrm"
+			if inst.Args[0].(Reg) == X0 {
+				args[0] = args[2]
+				args = args[:len(args)-2]
+			} else {
+				args[1] = args[2]
+				args = args[:len(args)-1]
+			}
+
+		case CYCLE:
+			if inst.Args[0].(Reg) == X0 && inst.Args[2].(Reg) == X0 {
+				op = "unimp"
+				args = nil
+			}
+
+		default:
+			if inst.Args[0].(Reg) == X0 {
+				op = "csrw"
+				args[0], args[1] = args[1], args[2]
+				args = args[:len(args)-1]
+			}
+		}
+
+	case CSRRWI:
+		if inst.Args[0].(Reg) == X0 {
+			op = "csrwi"
+			args[0], args[1] = args[1], args[2]
+			args = args[:len(args)-1]
+		}
+
+	// When both pred and succ equals to iorw, the GNU objdump will omit them.
+	case FENCE:
+		if inst.Args[0].(MemOrder).String() == "iorw" &&
+			inst.Args[1].(MemOrder).String() == "iorw" {
+			args = nil
+		}
+
+	case FSGNJX_D:
+		if inst.Args[1].(Reg) == inst.Args[2].(Reg) {
+			op = "fabs.d"
+			args = args[:len(args)-1]
+		}
+
+	case FSGNJX_S:
+		if inst.Args[1].(Reg) == inst.Args[2].(Reg) {
+			op = "fabs.s"
+			args = args[:len(args)-1]
+		}
+
+	case FSGNJ_D:
+		if inst.Args[1].(Reg) == inst.Args[2].(Reg) {
+			op = "fmv.d"
+			args = args[:len(args)-1]
+		}
+
+	case FSGNJ_S:
+		if inst.Args[1].(Reg) == inst.Args[2].(Reg) {
+			op = "fmv.s"
+			args = args[:len(args)-1]
+		}
+
+	case FSGNJN_D:
+		if inst.Args[1].(Reg) == inst.Args[2].(Reg) {
+			op = "fneg.d"
+			args = args[:len(args)-1]
+		}
+
+	case FSGNJN_S:
+		if inst.Args[1].(Reg) == inst.Args[2].(Reg) {
+			op = "fneg.s"
+			args = args[:len(args)-1]
+		}
+
+	case JAL:
+		if inst.Args[0].(Reg) == X0 {
+			op = "j"
+			args[0] = args[1]
+			args = args[:len(args)-1]
+		} else if inst.Args[0].(Reg) == X1 {
+			op = "jal"
+			args[0] = args[1]
+			args = args[:len(args)-1]
+		}
+
+	case JALR:
+		if inst.Args[0].(Reg) == X1 && inst.Args[1].(RegOffset).Ofs.Imm == 0 {
+			args[0] = inst.Args[1].(RegOffset).OfsReg.String()
+			args = args[:len(args)-1]
+		}
+
+		if inst.Args[0].(Reg) == X0 {
+			if inst.Args[1].(RegOffset).OfsReg == X1 && inst.Args[1].(RegOffset).Ofs.Imm == 0 {
+				op = "ret"
+				args = nil
+			} else if inst.Args[1].(RegOffset).Ofs.Imm == 0 {
+				op = "jr"
+				args[0] = inst.Args[1].(RegOffset).OfsReg.String()
+				args = args[:len(args)-1]
+			} else {
+				op = "jr"
+				args[0] = inst.Args[1].(RegOffset).String()
+				args = args[:len(args)-1]
+			}
+		}
+
+	case SLTIU:
+		if inst.Args[2].(Simm).String() == "1" {
+			op = "seqz"
+			args = args[:len(args)-1]
+		}
+
+	case SLT:
+		if inst.Args[1].(Reg) == X0 {
+			op = "sgtz"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		} else if inst.Args[2].(Reg) == X0 {
+			op = "sltz"
+			args = args[:len(args)-1]
+		}
+
+	case SLTU:
+		if inst.Args[1].(Reg) == X0 {
+			op = "snez"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+
+	case SUB:
+		if inst.Args[1].(Reg) == X0 {
+			op = "neg"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+
+	case SUBW:
+		if inst.Args[1].(Reg) == X0 {
+			op = "negw"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+	}
+
+	if args != nil {
+		op += " " + strings.Join(args, ",")
+	}
+	return op
+}
diff --git a/riscv64/riscv64asm/inst.go b/riscv64/riscv64asm/inst.go
new file mode 100644
index 00000000..3c13567c
--- /dev/null
+++ b/riscv64/riscv64asm/inst.go
@@ -0,0 +1,495 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv64asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+// An Op is a RISC-V opcode.
+type Op uint16
+
+// NOTE: The actual Op values are defined in tables.go.
+func (op Op) String() string {
+	if op >= Op(len(opstr)) || opstr[op] == "" {
+		return fmt.Sprintf("Op(%d)", op)
+	}
+
+	return opstr[op]
+}
+
+// An Arg is a single instruction argument.
+type Arg interface {
+	String() string
+}
+
+// An Args holds the instruction arguments.
+// If an instruction has fewer than 6 arguments,
+// the final elements in the array are nil.
+type Args [6]Arg
+
+// An Inst is a single instruction.
+type Inst struct {
+	Op   Op     // Opcode mnemonic.
+	Enc  uint32 // Raw encoding bits.
+	Args Args   // Instruction arguments, in RISC-V mamual order.
+	Len  int    // Length of encoded instruction in bytes
+}
+
+func (i Inst) String() string {
+	var args []string
+	for _, arg := range i.Args {
+		if arg == nil {
+			break
+		}
+		args = append(args, arg.String())
+	}
+
+	if len(args) == 0 {
+		return i.Op.String()
+	}
+	return i.Op.String() + " " + strings.Join(args, ",")
+}
+
+// A Reg is a single register.
+// The zero value denotes X0, not the absence of a register.
+type Reg uint16
+
+const (
+	// General-purpose register
+	X0 Reg = iota
+	X1
+	X2
+	X3
+	X4
+	X5
+	X6
+	X7
+	X8
+	X9
+	X10
+	X11
+	X12
+	X13
+	X14
+	X15
+	X16
+	X17
+	X18
+	X19
+	X20
+	X21
+	X22
+	X23
+	X24
+	X25
+	X26
+	X27
+	X28
+	X29
+	X30
+	X31
+
+	//Float point register
+	F0
+	F1
+	F2
+	F3
+	F4
+	F5
+	F6
+	F7
+	F8
+	F9
+	F10
+	F11
+	F12
+	F13
+	F14
+	F15
+	F16
+	F17
+	F18
+	F19
+	F20
+	F21
+	F22
+	F23
+	F24
+	F25
+	F26
+	F27
+	F28
+	F29
+	F30
+	F31
+)
+
+func (r Reg) String() string {
+	switch {
+	case r >= X0 && r <= X31:
+		return fmt.Sprintf("x%d", r)
+
+	case r >= F0 && r <= F31:
+		return fmt.Sprintf("f%d", r-F0)
+
+	default:
+		return fmt.Sprintf("Unknown(%d)", r)
+	}
+}
+
+// A CSR is a single control and status register.
+// Use stringer to generate CSR match table.
+//
+//go:generate stringer -type=CSR
+type CSR uint16
+
+const (
+	// Control status register
+	USTATUS        CSR = 0x0000
+	FFLAGS         CSR = 0x0001
+	FRM            CSR = 0x0002
+	FCSR           CSR = 0x0003
+	UIE            CSR = 0x0004
+	UTVEC          CSR = 0x0005
+	UTVT           CSR = 0x0007
+	VSTART         CSR = 0x0008
+	VXSAT          CSR = 0x0009
+	VXRM           CSR = 0x000a
+	VCSR           CSR = 0x000f
+	USCRATCH       CSR = 0x0040
+	UEPC           CSR = 0x0041
+	UCAUSE         CSR = 0x0042
+	UTVAL          CSR = 0x0043
+	UIP            CSR = 0x0044
+	UNXTI          CSR = 0x0045
+	UINTSTATUS     CSR = 0x0046
+	USCRATCHCSW    CSR = 0x0048
+	USCRATCHCSWL   CSR = 0x0049
+	SSTATUS        CSR = 0x0100
+	SEDELEG        CSR = 0x0102
+	SIDELEG        CSR = 0x0103
+	SIE            CSR = 0x0104
+	STVEC          CSR = 0x0105
+	SCOUNTEREN     CSR = 0x0106
+	STVT           CSR = 0x0107
+	SSCRATCH       CSR = 0x0140
+	SEPC           CSR = 0x0141
+	SCAUSE         CSR = 0x0142
+	STVAL          CSR = 0x0143
+	SIP            CSR = 0x0144
+	SNXTI          CSR = 0x0145
+	SINTSTATUS     CSR = 0x0146
+	SSCRATCHCSW    CSR = 0x0148
+	SSCRATCHCSWL   CSR = 0x0149
+	SATP           CSR = 0x0180
+	VSSTATUS       CSR = 0x0200
+	VSIE           CSR = 0x0204
+	VSTVEC         CSR = 0x0205
+	VSSCRATCH      CSR = 0x0240
+	VSEPC          CSR = 0x0241
+	VSCAUSE        CSR = 0x0242
+	VSTVAL         CSR = 0x0243
+	VSIP           CSR = 0x0244
+	VSATP          CSR = 0x0280
+	MSTATUS        CSR = 0x0300
+	MISA           CSR = 0x0301
+	MEDELEG        CSR = 0x0302
+	MIDELEG        CSR = 0x0303
+	MIE            CSR = 0x0304
+	MTVEC          CSR = 0x0305
+	MCOUNTEREN     CSR = 0x0306
+	MTVT           CSR = 0x0307
+	MSTATUSH       CSR = 0x0310
+	MCOUNTINHIBIT  CSR = 0x0320
+	MHPMEVENT3     CSR = 0x0323
+	MHPMEVENT4     CSR = 0x0324
+	MHPMEVENT5     CSR = 0x0325
+	MHPMEVENT6     CSR = 0x0326
+	MHPMEVENT7     CSR = 0x0327
+	MHPMEVENT8     CSR = 0x0328
+	MHPMEVENT9     CSR = 0x0329
+	MHPMEVENT10    CSR = 0x032a
+	MHPMEVENT11    CSR = 0x032b
+	MHPMEVENT12    CSR = 0x032c
+	MHPMEVENT13    CSR = 0x032d
+	MHPMEVENT14    CSR = 0x032e
+	MHPMEVENT15    CSR = 0x032f
+	MHPMEVENT16    CSR = 0x0330
+	MHPMEVENT17    CSR = 0x0331
+	MHPMEVENT18    CSR = 0x0332
+	MHPMEVENT19    CSR = 0x0333
+	MHPMEVENT20    CSR = 0x0334
+	MHPMEVENT21    CSR = 0x0335
+	MHPMEVENT22    CSR = 0x0336
+	MHPMEVENT23    CSR = 0x0337
+	MHPMEVENT24    CSR = 0x0338
+	MHPMEVENT25    CSR = 0x0339
+	MHPMEVENT26    CSR = 0x033a
+	MHPMEVENT27    CSR = 0x033b
+	MHPMEVENT28    CSR = 0x033c
+	MHPMEVENT29    CSR = 0x033d
+	MHPMEVENT30    CSR = 0x033e
+	MHPMEVENT31    CSR = 0x033f
+	MSCRATCH       CSR = 0x0340
+	MEPC           CSR = 0x0341
+	MCAUSE         CSR = 0x0342
+	MTVAL          CSR = 0x0343
+	MIP            CSR = 0x0344
+	MNXTI          CSR = 0x0345
+	MINTSTATUS     CSR = 0x0346
+	MSCRATCHCSW    CSR = 0x0348
+	MSCRATCHCSWL   CSR = 0x0349
+	MTINST         CSR = 0x034a
+	MTVAL2         CSR = 0x034b
+	PMPCFG0        CSR = 0x03a0
+	PMPCFG1        CSR = 0x03a1
+	PMPCFG2        CSR = 0x03a2
+	PMPCFG3        CSR = 0x03a3
+	PMPADDR0       CSR = 0x03b0
+	PMPADDR1       CSR = 0x03b1
+	PMPADDR2       CSR = 0x03b2
+	PMPADDR3       CSR = 0x03b3
+	PMPADDR4       CSR = 0x03b4
+	PMPADDR5       CSR = 0x03b5
+	PMPADDR6       CSR = 0x03b6
+	PMPADDR7       CSR = 0x03b7
+	PMPADDR8       CSR = 0x03b8
+	PMPADDR9       CSR = 0x03b9
+	PMPADDR10      CSR = 0x03ba
+	PMPADDR11      CSR = 0x03bb
+	PMPADDR12      CSR = 0x03bc
+	PMPADDR13      CSR = 0x03bd
+	PMPADDR14      CSR = 0x03be
+	PMPADDR15      CSR = 0x03bf
+	HSTATUS        CSR = 0x0600
+	HEDELEG        CSR = 0x0602
+	HIDELEG        CSR = 0x0603
+	HIE            CSR = 0x0604
+	HTIMEDELTA     CSR = 0x0605
+	HCOUNTEREN     CSR = 0x0606
+	HGEIE          CSR = 0x0607
+	HTIMEDELTAH    CSR = 0x0615
+	HTVAL          CSR = 0x0643
+	HIP            CSR = 0x0644
+	HVIP           CSR = 0x0645
+	HTINST         CSR = 0x064a
+	HGATP          CSR = 0x0680
+	TSELECT        CSR = 0x07a0
+	TDATA1         CSR = 0x07a1
+	TDATA2         CSR = 0x07a2
+	TDATA3         CSR = 0x07a3
+	TINFO          CSR = 0x07a4
+	TCONTROL       CSR = 0x07a5
+	MCONTEXT       CSR = 0x07a8
+	MNOISE         CSR = 0x07a9
+	SCONTEXT       CSR = 0x07aa
+	DCSR           CSR = 0x07b0
+	DPC            CSR = 0x07b1
+	DSCRATCH0      CSR = 0x07b2
+	DSCRATCH1      CSR = 0x07b3
+	MCYCLE         CSR = 0x0b00
+	MINSTRET       CSR = 0x0b02
+	MHPMCOUNTER3   CSR = 0x0b03
+	MHPMCOUNTER4   CSR = 0x0b04
+	MHPMCOUNTER5   CSR = 0x0b05
+	MHPMCOUNTER6   CSR = 0x0b06
+	MHPMCOUNTER7   CSR = 0x0b07
+	MHPMCOUNTER8   CSR = 0x0b08
+	MHPMCOUNTER9   CSR = 0x0b09
+	MHPMCOUNTER10  CSR = 0x0b0a
+	MHPMCOUNTER11  CSR = 0x0b0b
+	MHPMCOUNTER12  CSR = 0x0b0c
+	MHPMCOUNTER13  CSR = 0x0b0d
+	MHPMCOUNTER14  CSR = 0x0b0e
+	MHPMCOUNTER15  CSR = 0x0b0f
+	MHPMCOUNTER16  CSR = 0x0b10
+	MHPMCOUNTER17  CSR = 0x0b11
+	MHPMCOUNTER18  CSR = 0x0b12
+	MHPMCOUNTER19  CSR = 0x0b13
+	MHPMCOUNTER20  CSR = 0x0b14
+	MHPMCOUNTER21  CSR = 0x0b15
+	MHPMCOUNTER22  CSR = 0x0b16
+	MHPMCOUNTER23  CSR = 0x0b17
+	MHPMCOUNTER24  CSR = 0x0b18
+	MHPMCOUNTER25  CSR = 0x0b19
+	MHPMCOUNTER26  CSR = 0x0b1a
+	MHPMCOUNTER27  CSR = 0x0b1b
+	MHPMCOUNTER28  CSR = 0x0b1c
+	MHPMCOUNTER29  CSR = 0x0b1d
+	MHPMCOUNTER30  CSR = 0x0b1e
+	MHPMCOUNTER31  CSR = 0x0b1f
+	MCYCLEH        CSR = 0x0b80
+	MINSTRETH      CSR = 0x0b82
+	MHPMCOUNTER3H  CSR = 0x0b83
+	MHPMCOUNTER4H  CSR = 0x0b84
+	MHPMCOUNTER5H  CSR = 0x0b85
+	MHPMCOUNTER6H  CSR = 0x0b86
+	MHPMCOUNTER7H  CSR = 0x0b87
+	MHPMCOUNTER8H  CSR = 0x0b88
+	MHPMCOUNTER9H  CSR = 0x0b89
+	MHPMCOUNTER10H CSR = 0x0b8a
+	MHPMCOUNTER11H CSR = 0x0b8b
+	MHPMCOUNTER12H CSR = 0x0b8c
+	MHPMCOUNTER13H CSR = 0x0b8d
+	MHPMCOUNTER14H CSR = 0x0b8e
+	MHPMCOUNTER15H CSR = 0x0b8f
+	MHPMCOUNTER16H CSR = 0x0b90
+	MHPMCOUNTER17H CSR = 0x0b91
+	MHPMCOUNTER18H CSR = 0x0b92
+	MHPMCOUNTER19H CSR = 0x0b93
+	MHPMCOUNTER20H CSR = 0x0b94
+	MHPMCOUNTER21H CSR = 0x0b95
+	MHPMCOUNTER22H CSR = 0x0b96
+	MHPMCOUNTER23H CSR = 0x0b97
+	MHPMCOUNTER24H CSR = 0x0b98
+	MHPMCOUNTER25H CSR = 0x0b99
+	MHPMCOUNTER26H CSR = 0x0b9a
+	MHPMCOUNTER27H CSR = 0x0b9b
+	MHPMCOUNTER28H CSR = 0x0b9c
+	MHPMCOUNTER29H CSR = 0x0b9d
+	MHPMCOUNTER30H CSR = 0x0b9e
+	MHPMCOUNTER31H CSR = 0x0b9f
+	CYCLE          CSR = 0x0c00
+	TIME           CSR = 0x0c01
+	INSTRET        CSR = 0x0c02
+	HPMCOUNTER3    CSR = 0x0c03
+	HPMCOUNTER4    CSR = 0x0c04
+	HPMCOUNTER5    CSR = 0x0c05
+	HPMCOUNTER6    CSR = 0x0c06
+	HPMCOUNTER7    CSR = 0x0c07
+	HPMCOUNTER8    CSR = 0x0c08
+	HPMCOUNTER9    CSR = 0x0c09
+	HPMCOUNTER10   CSR = 0x0c0a
+	HPMCOUNTER11   CSR = 0x0c0b
+	HPMCOUNTER12   CSR = 0x0c0c
+	HPMCOUNTER13   CSR = 0x0c0d
+	HPMCOUNTER14   CSR = 0x0c0e
+	HPMCOUNTER15   CSR = 0x0c0f
+	HPMCOUNTER16   CSR = 0x0c10
+	HPMCOUNTER17   CSR = 0x0c11
+	HPMCOUNTER18   CSR = 0x0c12
+	HPMCOUNTER19   CSR = 0x0c13
+	HPMCOUNTER20   CSR = 0x0c14
+	HPMCOUNTER21   CSR = 0x0c15
+	HPMCOUNTER22   CSR = 0x0c16
+	HPMCOUNTER23   CSR = 0x0c17
+	HPMCOUNTER24   CSR = 0x0c18
+	HPMCOUNTER25   CSR = 0x0c19
+	HPMCOUNTER26   CSR = 0x0c1a
+	HPMCOUNTER27   CSR = 0x0c1b
+	HPMCOUNTER28   CSR = 0x0c1c
+	HPMCOUNTER29   CSR = 0x0c1d
+	HPMCOUNTER30   CSR = 0x0c1e
+	HPMCOUNTER31   CSR = 0x0c1f
+	VL             CSR = 0x0c20
+	VTYPE          CSR = 0x0c21
+	VLENB          CSR = 0x0c22
+	CYCLEH         CSR = 0x0c80
+	TIMEH          CSR = 0x0c81
+	INSTRETH       CSR = 0x0c82
+	HPMCOUNTER3H   CSR = 0x0c83
+	HPMCOUNTER4H   CSR = 0x0c84
+	HPMCOUNTER5H   CSR = 0x0c85
+	HPMCOUNTER6H   CSR = 0x0c86
+	HPMCOUNTER7H   CSR = 0x0c87
+	HPMCOUNTER8H   CSR = 0x0c88
+	HPMCOUNTER9H   CSR = 0x0c89
+	HPMCOUNTER10H  CSR = 0x0c8a
+	HPMCOUNTER11H  CSR = 0x0c8b
+	HPMCOUNTER12H  CSR = 0x0c8c
+	HPMCOUNTER13H  CSR = 0x0c8d
+	HPMCOUNTER14H  CSR = 0x0c8e
+	HPMCOUNTER15H  CSR = 0x0c8f
+	HPMCOUNTER16H  CSR = 0x0c90
+	HPMCOUNTER17H  CSR = 0x0c91
+	HPMCOUNTER18H  CSR = 0x0c92
+	HPMCOUNTER19H  CSR = 0x0c93
+	HPMCOUNTER20H  CSR = 0x0c94
+	HPMCOUNTER21H  CSR = 0x0c95
+	HPMCOUNTER22H  CSR = 0x0c96
+	HPMCOUNTER23H  CSR = 0x0c97
+	HPMCOUNTER24H  CSR = 0x0c98
+	HPMCOUNTER25H  CSR = 0x0c99
+	HPMCOUNTER26H  CSR = 0x0c9a
+	HPMCOUNTER27H  CSR = 0x0c9b
+	HPMCOUNTER28H  CSR = 0x0c9c
+	HPMCOUNTER29H  CSR = 0x0c9d
+	HPMCOUNTER30H  CSR = 0x0c9e
+	HPMCOUNTER31H  CSR = 0x0c9f
+	HGEIP          CSR = 0x0e12
+	MVENDORID      CSR = 0x0f11
+	MARCHID        CSR = 0x0f12
+	MIMPID         CSR = 0x0f13
+	MHARTID        CSR = 0x0f14
+	MENTROPY       CSR = 0x0f15
+)
+
+// An Uimm is an unsigned immediate number
+type Uimm struct {
+	Imm     uint32 // 32-bit unsigned integer
+	Decimal bool   // Print format of the immediate, either decimal or hexadecimal
+}
+
+func (ui Uimm) String() string {
+	if ui.Decimal {
+		return fmt.Sprintf("%d", ui.Imm)
+	}
+	return fmt.Sprintf("%#x", ui.Imm)
+}
+
+// A Simm is a signed immediate number
+type Simm struct {
+	Imm     int32 // 32-bit signed integer
+	Decimal bool  // Print format of the immediate, either decimal or hexadecimal
+	Width   uint8 // Actual width of the Simm
+}
+
+func (si Simm) String() string {
+	if si.Decimal {
+		return fmt.Sprintf("%d", si.Imm)
+	}
+	return fmt.Sprintf("%#x", si.Imm)
+}
+
+// An AmoReg is an atomic address register used in AMO instructions
+type AmoReg struct {
+	reg Reg // Avoid promoted String method
+}
+
+func (amoReg AmoReg) String() string {
+	return fmt.Sprintf("(%s)", amoReg.reg)
+}
+
+// A RegOffset is a register with offset value
+type RegOffset struct {
+	OfsReg Reg
+	Ofs    Simm
+}
+
+func (regofs RegOffset) String() string {
+	return fmt.Sprintf("%s(%s)", regofs.Ofs, regofs.OfsReg)
+}
+
+// A MemOrder is a memory order hint in fence instruction
+type MemOrder uint8
+
+func (memOrder MemOrder) String() string {
+	var str string
+	if memOrder<<7>>7 == 1 {
+		str += "i"
+	}
+	if memOrder>>1<<7>>7 == 1 {
+		str += "o"
+	}
+	if memOrder>>2<<7>>7 == 1 {
+		str += "r"
+	}
+	if memOrder>>3<<7>>7 == 1 {
+		str += "w"
+	}
+	return str
+}
diff --git a/riscv64/riscv64asm/plan9x.go b/riscv64/riscv64asm/plan9x.go
new file mode 100644
index 00000000..367122d9
--- /dev/null
+++ b/riscv64/riscv64asm/plan9x.go
@@ -0,0 +1,377 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv64asm
+
+import (
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+)
+
+// GoSyntax returns the Go assembler syntax for the instruction.
+// The syntax was originally defined by Plan 9.
+// The pc is the program counter of the instruction, used for
+// expanding PC-relative addresses into absolute ones.
+// The symname function queries the symbol table for the program
+// being disassembled. Given a target address it returns the name
+// and base address of the symbol containing the target, if any;
+// otherwise it returns "", 0.
+// The reader text should read from the text segment using text addresses
+// as offsets; it is used to display pc-relative loads as constant loads.
+func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64), text io.ReaderAt) string {
+	if symname == nil {
+		symname = func(uint64) (string, uint64) { return "", 0 }
+	}
+
+	var args []string
+	for _, a := range inst.Args {
+		if a == nil {
+			break
+		}
+		args = append(args, plan9Arg(&inst, pc, symname, a))
+	}
+
+	op := inst.Op.String()
+
+	switch inst.Op {
+
+	case AMOADD_D, AMOADD_D_AQ, AMOADD_D_RL, AMOADD_D_AQRL, AMOADD_W, AMOADD_W_AQ,
+		AMOADD_W_RL, AMOADD_W_AQRL, AMOAND_D, AMOAND_D_AQ, AMOAND_D_RL, AMOAND_D_AQRL,
+		AMOAND_W, AMOAND_W_AQ, AMOAND_W_RL, AMOAND_W_AQRL, AMOMAXU_D, AMOMAXU_D_AQ,
+		AMOMAXU_D_RL, AMOMAXU_D_AQRL, AMOMAXU_W, AMOMAXU_W_AQ, AMOMAXU_W_RL, AMOMAXU_W_AQRL,
+		AMOMAX_D, AMOMAX_D_AQ, AMOMAX_D_RL, AMOMAX_D_AQRL, AMOMAX_W, AMOMAX_W_AQ, AMOMAX_W_RL,
+		AMOMAX_W_AQRL, AMOMINU_D, AMOMINU_D_AQ, AMOMINU_D_RL, AMOMINU_D_AQRL, AMOMINU_W,
+		AMOMINU_W_AQ, AMOMINU_W_RL, AMOMINU_W_AQRL, AMOMIN_D, AMOMIN_D_AQ, AMOMIN_D_RL,
+		AMOMIN_D_AQRL, AMOMIN_W, AMOMIN_W_AQ, AMOMIN_W_RL, AMOMIN_W_AQRL, AMOOR_D, AMOOR_D_AQ,
+		AMOOR_D_RL, AMOOR_D_AQRL, AMOOR_W, AMOOR_W_AQ, AMOOR_W_RL, AMOOR_W_AQRL, AMOSWAP_D,
+		AMOSWAP_D_AQ, AMOSWAP_D_RL, AMOSWAP_D_AQRL, AMOSWAP_W, AMOSWAP_W_AQ, AMOSWAP_W_RL,
+		AMOSWAP_W_AQRL, AMOXOR_D, AMOXOR_D_AQ, AMOXOR_D_RL, AMOXOR_D_AQRL, AMOXOR_W,
+		AMOXOR_W_AQ, AMOXOR_W_RL, AMOXOR_W_AQRL, SC_D, SC_D_AQ, SC_D_RL, SC_D_AQRL,
+		SC_W, SC_W_AQ, SC_W_RL, SC_W_AQRL:
+		// Atomic instructions have special operand order.
+		args[2], args[1] = args[1], args[2]
+
+	case ADDI:
+		if inst.Args[2].(Simm).Imm == 0 {
+			op = "MOV"
+			args = args[:len(args)-1]
+		}
+
+	case ADDIW:
+		if inst.Args[2].(Simm).Imm == 0 {
+			op = "MOVW"
+			args = args[:len(args)-1]
+		}
+
+	case ANDI:
+		if inst.Args[2].(Simm).Imm == 255 {
+			op = "MOVBU"
+			args = args[:len(args)-1]
+		}
+
+	case BEQ:
+		if inst.Args[1].(Reg) == X0 {
+			op = "BEQZ"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+		for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+			args[i], args[j] = args[j], args[i]
+		}
+
+	case BGE:
+		if inst.Args[1].(Reg) == X0 {
+			op = "BGEZ"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+		for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+			args[i], args[j] = args[j], args[i]
+		}
+
+	case BLT:
+		if inst.Args[1].(Reg) == X0 {
+			op = "BLTZ"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+		for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+			args[i], args[j] = args[j], args[i]
+		}
+
+	case BNE:
+		if inst.Args[1].(Reg) == X0 {
+			op = "BNEZ"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+		for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+			args[i], args[j] = args[j], args[i]
+		}
+
+	case BLTU, BGEU:
+		for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+			args[i], args[j] = args[j], args[i]
+		}
+
+	case CSRRW:
+		switch inst.Args[1].(CSR) {
+		case FCSR:
+			op = "FSCSR"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		case FFLAGS:
+			op = "FSFLAGS"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		case FRM:
+			op = "FSRM"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		case CYCLE:
+			if inst.Args[0].(Reg) == X0 && inst.Args[2].(Reg) == X0 {
+				op = "UNIMP"
+				args = nil
+			}
+		}
+
+	case CSRRS:
+		if inst.Args[2].(Reg) == X0 {
+			switch inst.Args[1].(CSR) {
+			case FCSR:
+				op = "FRCSR"
+				args = args[:len(args)-2]
+			case FFLAGS:
+				op = "FRFLAGS"
+				args = args[:len(args)-2]
+			case FRM:
+				op = "FRRM"
+				args = args[:len(args)-2]
+			case CYCLE:
+				op = "RDCYCLE"
+				args = args[:len(args)-2]
+			case CYCLEH:
+				op = "RDCYCLEH"
+				args = args[:len(args)-2]
+			case INSTRET:
+				op = "RDINSTRET"
+				args = args[:len(args)-2]
+			case INSTRETH:
+				op = "RDINSTRETH"
+				args = args[:len(args)-2]
+			case TIME:
+				op = "RDTIME"
+				args = args[:len(args)-2]
+			case TIMEH:
+				op = "RDTIMEH"
+				args = args[:len(args)-2]
+			}
+		}
+
+	// Fence instruction in plan9 doesn't have any operands.
+	case FENCE:
+		args = nil
+
+	case FMADD_D, FMADD_H, FMADD_Q, FMADD_S, FMSUB_D, FMSUB_H,
+		FMSUB_Q, FMSUB_S, FNMADD_D, FNMADD_H, FNMADD_Q, FNMADD_S,
+		FNMSUB_D, FNMSUB_H, FNMSUB_Q, FNMSUB_S:
+		args[1], args[3] = args[3], args[1]
+
+	case FSGNJ_S:
+		if inst.Args[2] == inst.Args[1] {
+			op = "MOVF"
+			args = args[:len(args)-1]
+		}
+
+	case FSGNJ_D:
+		if inst.Args[2] == inst.Args[1] {
+			op = "MOVD"
+			args = args[:len(args)-1]
+		}
+
+	case FSGNJX_S:
+		if inst.Args[2] == inst.Args[1] {
+			op = "FABSS"
+			args = args[:len(args)-1]
+		}
+
+	case FSGNJX_D:
+		if inst.Args[2] == inst.Args[1] {
+			op = "FABSD"
+			args = args[:len(args)-1]
+		}
+
+	case FSGNJN_S:
+		if inst.Args[2] == inst.Args[1] {
+			op = "FNEGS"
+			args = args[:len(args)-1]
+		}
+
+	case FSGNJN_D:
+		if inst.Args[2] == inst.Args[1] {
+			op = "FNESD"
+			args = args[:len(args)-1]
+		}
+
+	case LD, SD:
+		op = "MOV"
+		if inst.Op == SD {
+			args[0], args[1] = args[1], args[0]
+		}
+
+	case LB, SB:
+		op = "MOVB"
+		if inst.Op == SB {
+			args[0], args[1] = args[1], args[0]
+		}
+
+	case LH, SH:
+		op = "MOVH"
+		if inst.Op == SH {
+			args[0], args[1] = args[1], args[0]
+		}
+
+	case LW, SW:
+		op = "MOVW"
+		if inst.Op == SW {
+			args[0], args[1] = args[1], args[0]
+		}
+
+	case LBU:
+		op = "MOVBU"
+
+	case LHU:
+		op = "MOVHU"
+
+	case LWU:
+		op = "MOVWU"
+
+	case FLW, FSW:
+		op = "MOVF"
+		if inst.Op == FLW {
+			args[0], args[1] = args[1], args[0]
+		}
+
+	case FLD, FSD:
+		op = "MOVD"
+		if inst.Op == FLD {
+			args[0], args[1] = args[1], args[0]
+		}
+
+	case SUB:
+		if inst.Args[1].(Reg) == X0 {
+			op = "NEG"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+
+	case XORI:
+		if inst.Args[2].(Simm).String() == "-1" {
+			op = "NOT"
+			args = args[:len(args)-1]
+		}
+
+	case SLTIU:
+		if inst.Args[2].(Simm).Imm == 1 {
+			op = "SEQZ"
+			args = args[:len(args)-1]
+		}
+
+	case SLTU:
+		if inst.Args[1].(Reg) == X0 {
+			op = "SNEZ"
+			args[1] = args[2]
+			args = args[:len(args)-1]
+		}
+
+	case JAL:
+		if inst.Args[0].(Reg) == X0 {
+			op = "JMP"
+			args[0] = args[1]
+			args = args[:len(args)-1]
+		} else if inst.Args[0].(Reg) == X1 {
+			op = "CALL"
+			args[0] = args[1]
+			args = args[:len(args)-1]
+		} else {
+			args[0], args[1] = args[1], args[0]
+		}
+
+	case JALR:
+		if inst.Args[0].(Reg) == X0 {
+			if inst.Args[1].(RegOffset).OfsReg == X1 && inst.Args[1].(RegOffset).Ofs.Imm == 0 {
+				op = "RET"
+				args = nil
+				break
+			}
+			op = "JMP"
+			args[0] = args[1]
+			args = args[:len(args)-1]
+		} else if inst.Args[0].(Reg) == X1 {
+			op = "CALL"
+			args[0] = args[1]
+			args = args[:len(args)-1]
+		} else {
+			args[0], args[1] = args[1], args[0]
+		}
+	}
+
+	// Reverse args, placing dest last.
+	for i, j := 0, len(args)-1; i < j; i, j = i+1, j-1 {
+		args[i], args[j] = args[j], args[i]
+	}
+
+	// Change to plan9 opcode format
+	// Atomic instructions do not have reorder suffix, so remove them
+	op = strings.Replace(op, ".AQRL", "", -1)
+	op = strings.Replace(op, ".AQ", "", -1)
+	op = strings.Replace(op, ".RL", "", -1)
+	op = strings.Replace(op, ".", "", -1)
+
+	if args != nil {
+		op += " " + strings.Join(args, ", ")
+	}
+
+	return op
+}
+
+func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string {
+	switch a := arg.(type) {
+	case Uimm:
+		return fmt.Sprintf("$%d", uint32(a.Imm))
+
+	case Simm:
+		imm, _ := strconv.Atoi(a.String())
+		if a.Width == 13 || a.Width == 21 {
+			addr := int64(pc) + int64(imm)
+			if s, base := symname(uint64(addr)); s != "" && uint64(addr) == base {
+				return fmt.Sprintf("%s(SB)", s)
+			}
+			return fmt.Sprintf("%d(PC)", imm/4)
+		}
+		return fmt.Sprintf("$%d", int32(imm))
+
+	case Reg:
+		if a <= 31 {
+			return fmt.Sprintf("X%d", a)
+		} else {
+			return fmt.Sprintf("F%d", a-32)
+		}
+
+	case RegOffset:
+		if a.Ofs.Imm == 0 {
+			return fmt.Sprintf("(X%d)", a.OfsReg)
+		} else {
+			return fmt.Sprintf("%s(X%d)", a.Ofs.String(), a.OfsReg)
+		}
+
+	case AmoReg:
+		return fmt.Sprintf("(X%d)", a.reg)
+
+	default:
+		return strings.ToUpper(arg.String())
+	}
+}
diff --git a/riscv64/riscv64asm/tables.go b/riscv64/riscv64asm/tables.go
new file mode 100644
index 00000000..3e5db415
--- /dev/null
+++ b/riscv64/riscv64asm/tables.go
@@ -0,0 +1,1474 @@
+// Code generated by riscv64spec riscv-opcodes
+// DO NOT EDIT
+
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv64asm
+
+const (
+	_ Op = iota
+	ADD
+	ADDI
+	ADDIW
+	ADDW
+	ADD_UW
+	AMOADD_D
+	AMOADD_D_AQ
+	AMOADD_D_AQRL
+	AMOADD_D_RL
+	AMOADD_W
+	AMOADD_W_AQ
+	AMOADD_W_AQRL
+	AMOADD_W_RL
+	AMOAND_D
+	AMOAND_D_AQ
+	AMOAND_D_AQRL
+	AMOAND_D_RL
+	AMOAND_W
+	AMOAND_W_AQ
+	AMOAND_W_AQRL
+	AMOAND_W_RL
+	AMOMAXU_D
+	AMOMAXU_D_AQ
+	AMOMAXU_D_AQRL
+	AMOMAXU_D_RL
+	AMOMAXU_W
+	AMOMAXU_W_AQ
+	AMOMAXU_W_AQRL
+	AMOMAXU_W_RL
+	AMOMAX_D
+	AMOMAX_D_AQ
+	AMOMAX_D_AQRL
+	AMOMAX_D_RL
+	AMOMAX_W
+	AMOMAX_W_AQ
+	AMOMAX_W_AQRL
+	AMOMAX_W_RL
+	AMOMINU_D
+	AMOMINU_D_AQ
+	AMOMINU_D_AQRL
+	AMOMINU_D_RL
+	AMOMINU_W
+	AMOMINU_W_AQ
+	AMOMINU_W_AQRL
+	AMOMINU_W_RL
+	AMOMIN_D
+	AMOMIN_D_AQ
+	AMOMIN_D_AQRL
+	AMOMIN_D_RL
+	AMOMIN_W
+	AMOMIN_W_AQ
+	AMOMIN_W_AQRL
+	AMOMIN_W_RL
+	AMOOR_D
+	AMOOR_D_AQ
+	AMOOR_D_AQRL
+	AMOOR_D_RL
+	AMOOR_W
+	AMOOR_W_AQ
+	AMOOR_W_AQRL
+	AMOOR_W_RL
+	AMOSWAP_D
+	AMOSWAP_D_AQ
+	AMOSWAP_D_AQRL
+	AMOSWAP_D_RL
+	AMOSWAP_W
+	AMOSWAP_W_AQ
+	AMOSWAP_W_AQRL
+	AMOSWAP_W_RL
+	AMOXOR_D
+	AMOXOR_D_AQ
+	AMOXOR_D_AQRL
+	AMOXOR_D_RL
+	AMOXOR_W
+	AMOXOR_W_AQ
+	AMOXOR_W_AQRL
+	AMOXOR_W_RL
+	AND
+	ANDI
+	ANDN
+	AUIPC
+	BCLR
+	BCLRI
+	BEQ
+	BEXT
+	BEXTI
+	BGE
+	BGEU
+	BINV
+	BINVI
+	BLT
+	BLTU
+	BNE
+	BSET
+	BSETI
+	CLZ
+	CLZW
+	CPOP
+	CPOPW
+	CSRRC
+	CSRRCI
+	CSRRS
+	CSRRSI
+	CSRRW
+	CSRRWI
+	CTZ
+	CTZW
+	C_ADD
+	C_ADDI
+	C_ADDI16SP
+	C_ADDI4SPN
+	C_ADDIW
+	C_ADDW
+	C_AND
+	C_ANDI
+	C_BEQZ
+	C_BNEZ
+	C_EBREAK
+	C_FLD
+	C_FLDSP
+	C_FSD
+	C_FSDSP
+	C_J
+	C_JALR
+	C_JR
+	C_LD
+	C_LDSP
+	C_LI
+	C_LUI
+	C_LW
+	C_LWSP
+	C_MV
+	C_NOP
+	C_OR
+	C_SD
+	C_SDSP
+	C_SLLI
+	C_SRAI
+	C_SRLI
+	C_SUB
+	C_SUBW
+	C_SW
+	C_SWSP
+	C_UNIMP
+	C_XOR
+	DIV
+	DIVU
+	DIVUW
+	DIVW
+	EBREAK
+	ECALL
+	FADD_D
+	FADD_H
+	FADD_Q
+	FADD_S
+	FCLASS_D
+	FCLASS_H
+	FCLASS_Q
+	FCLASS_S
+	FCVT_D_L
+	FCVT_D_LU
+	FCVT_D_Q
+	FCVT_D_S
+	FCVT_D_W
+	FCVT_D_WU
+	FCVT_H_L
+	FCVT_H_LU
+	FCVT_H_S
+	FCVT_H_W
+	FCVT_H_WU
+	FCVT_LU_D
+	FCVT_LU_H
+	FCVT_LU_Q
+	FCVT_LU_S
+	FCVT_L_D
+	FCVT_L_H
+	FCVT_L_Q
+	FCVT_L_S
+	FCVT_Q_D
+	FCVT_Q_L
+	FCVT_Q_LU
+	FCVT_Q_S
+	FCVT_Q_W
+	FCVT_Q_WU
+	FCVT_S_D
+	FCVT_S_H
+	FCVT_S_L
+	FCVT_S_LU
+	FCVT_S_Q
+	FCVT_S_W
+	FCVT_S_WU
+	FCVT_WU_D
+	FCVT_WU_H
+	FCVT_WU_Q
+	FCVT_WU_S
+	FCVT_W_D
+	FCVT_W_H
+	FCVT_W_Q
+	FCVT_W_S
+	FDIV_D
+	FDIV_H
+	FDIV_Q
+	FDIV_S
+	FENCE
+	FENCE_I
+	FEQ_D
+	FEQ_H
+	FEQ_Q
+	FEQ_S
+	FLD
+	FLE_D
+	FLE_H
+	FLE_Q
+	FLE_S
+	FLH
+	FLQ
+	FLT_D
+	FLT_H
+	FLT_Q
+	FLT_S
+	FLW
+	FMADD_D
+	FMADD_H
+	FMADD_Q
+	FMADD_S
+	FMAX_D
+	FMAX_H
+	FMAX_Q
+	FMAX_S
+	FMIN_D
+	FMIN_H
+	FMIN_Q
+	FMIN_S
+	FMSUB_D
+	FMSUB_H
+	FMSUB_Q
+	FMSUB_S
+	FMUL_D
+	FMUL_H
+	FMUL_Q
+	FMUL_S
+	FMV_D_X
+	FMV_H_X
+	FMV_W_X
+	FMV_X_D
+	FMV_X_H
+	FMV_X_W
+	FNMADD_D
+	FNMADD_H
+	FNMADD_Q
+	FNMADD_S
+	FNMSUB_D
+	FNMSUB_H
+	FNMSUB_Q
+	FNMSUB_S
+	FSD
+	FSGNJN_D
+	FSGNJN_H
+	FSGNJN_Q
+	FSGNJN_S
+	FSGNJX_D
+	FSGNJX_H
+	FSGNJX_Q
+	FSGNJX_S
+	FSGNJ_D
+	FSGNJ_H
+	FSGNJ_Q
+	FSGNJ_S
+	FSH
+	FSQ
+	FSQRT_D
+	FSQRT_H
+	FSQRT_Q
+	FSQRT_S
+	FSUB_D
+	FSUB_H
+	FSUB_Q
+	FSUB_S
+	FSW
+	JAL
+	JALR
+	LB
+	LBU
+	LD
+	LH
+	LHU
+	LR_D
+	LR_D_AQ
+	LR_D_AQRL
+	LR_D_RL
+	LR_W
+	LR_W_AQ
+	LR_W_AQRL
+	LR_W_RL
+	LUI
+	LW
+	LWU
+	MAX
+	MAXU
+	MIN
+	MINU
+	MUL
+	MULH
+	MULHSU
+	MULHU
+	MULW
+	OR
+	ORC_B
+	ORI
+	ORN
+	REM
+	REMU
+	REMUW
+	REMW
+	REV8
+	ROL
+	ROLW
+	ROR
+	RORI
+	RORIW
+	RORW
+	SB
+	SC_D
+	SC_D_AQ
+	SC_D_AQRL
+	SC_D_RL
+	SC_W
+	SC_W_AQ
+	SC_W_AQRL
+	SC_W_RL
+	SD
+	SEXT_B
+	SEXT_H
+	SH
+	SH1ADD
+	SH1ADD_UW
+	SH2ADD
+	SH2ADD_UW
+	SH3ADD
+	SH3ADD_UW
+	SLL
+	SLLI
+	SLLIW
+	SLLI_UW
+	SLLW
+	SLT
+	SLTI
+	SLTIU
+	SLTU
+	SRA
+	SRAI
+	SRAIW
+	SRAW
+	SRL
+	SRLI
+	SRLIW
+	SRLW
+	SUB
+	SUBW
+	SW
+	XNOR
+	XOR
+	XORI
+	ZEXT_H
+)
+
+var opstr = [...]string{
+	ADD:            "ADD",
+	ADDI:           "ADDI",
+	ADDIW:          "ADDIW",
+	ADDW:           "ADDW",
+	ADD_UW:         "ADD.UW",
+	AMOADD_D:       "AMOADD.D",
+	AMOADD_D_AQ:    "AMOADD.D.AQ",
+	AMOADD_D_AQRL:  "AMOADD.D.AQRL",
+	AMOADD_D_RL:    "AMOADD.D.RL",
+	AMOADD_W:       "AMOADD.W",
+	AMOADD_W_AQ:    "AMOADD.W.AQ",
+	AMOADD_W_AQRL:  "AMOADD.W.AQRL",
+	AMOADD_W_RL:    "AMOADD.W.RL",
+	AMOAND_D:       "AMOAND.D",
+	AMOAND_D_AQ:    "AMOAND.D.AQ",
+	AMOAND_D_AQRL:  "AMOAND.D.AQRL",
+	AMOAND_D_RL:    "AMOAND.D.RL",
+	AMOAND_W:       "AMOAND.W",
+	AMOAND_W_AQ:    "AMOAND.W.AQ",
+	AMOAND_W_AQRL:  "AMOAND.W.AQRL",
+	AMOAND_W_RL:    "AMOAND.W.RL",
+	AMOMAXU_D:      "AMOMAXU.D",
+	AMOMAXU_D_AQ:   "AMOMAXU.D.AQ",
+	AMOMAXU_D_AQRL: "AMOMAXU.D.AQRL",
+	AMOMAXU_D_RL:   "AMOMAXU.D.RL",
+	AMOMAXU_W:      "AMOMAXU.W",
+	AMOMAXU_W_AQ:   "AMOMAXU.W.AQ",
+	AMOMAXU_W_AQRL: "AMOMAXU.W.AQRL",
+	AMOMAXU_W_RL:   "AMOMAXU.W.RL",
+	AMOMAX_D:       "AMOMAX.D",
+	AMOMAX_D_AQ:    "AMOMAX.D.AQ",
+	AMOMAX_D_AQRL:  "AMOMAX.D.AQRL",
+	AMOMAX_D_RL:    "AMOMAX.D.RL",
+	AMOMAX_W:       "AMOMAX.W",
+	AMOMAX_W_AQ:    "AMOMAX.W.AQ",
+	AMOMAX_W_AQRL:  "AMOMAX.W.AQRL",
+	AMOMAX_W_RL:    "AMOMAX.W.RL",
+	AMOMINU_D:      "AMOMINU.D",
+	AMOMINU_D_AQ:   "AMOMINU.D.AQ",
+	AMOMINU_D_AQRL: "AMOMINU.D.AQRL",
+	AMOMINU_D_RL:   "AMOMINU.D.RL",
+	AMOMINU_W:      "AMOMINU.W",
+	AMOMINU_W_AQ:   "AMOMINU.W.AQ",
+	AMOMINU_W_AQRL: "AMOMINU.W.AQRL",
+	AMOMINU_W_RL:   "AMOMINU.W.RL",
+	AMOMIN_D:       "AMOMIN.D",
+	AMOMIN_D_AQ:    "AMOMIN.D.AQ",
+	AMOMIN_D_AQRL:  "AMOMIN.D.AQRL",
+	AMOMIN_D_RL:    "AMOMIN.D.RL",
+	AMOMIN_W:       "AMOMIN.W",
+	AMOMIN_W_AQ:    "AMOMIN.W.AQ",
+	AMOMIN_W_AQRL:  "AMOMIN.W.AQRL",
+	AMOMIN_W_RL:    "AMOMIN.W.RL",
+	AMOOR_D:        "AMOOR.D",
+	AMOOR_D_AQ:     "AMOOR.D.AQ",
+	AMOOR_D_AQRL:   "AMOOR.D.AQRL",
+	AMOOR_D_RL:     "AMOOR.D.RL",
+	AMOOR_W:        "AMOOR.W",
+	AMOOR_W_AQ:     "AMOOR.W.AQ",
+	AMOOR_W_AQRL:   "AMOOR.W.AQRL",
+	AMOOR_W_RL:     "AMOOR.W.RL",
+	AMOSWAP_D:      "AMOSWAP.D",
+	AMOSWAP_D_AQ:   "AMOSWAP.D.AQ",
+	AMOSWAP_D_AQRL: "AMOSWAP.D.AQRL",
+	AMOSWAP_D_RL:   "AMOSWAP.D.RL",
+	AMOSWAP_W:      "AMOSWAP.W",
+	AMOSWAP_W_AQ:   "AMOSWAP.W.AQ",
+	AMOSWAP_W_AQRL: "AMOSWAP.W.AQRL",
+	AMOSWAP_W_RL:   "AMOSWAP.W.RL",
+	AMOXOR_D:       "AMOXOR.D",
+	AMOXOR_D_AQ:    "AMOXOR.D.AQ",
+	AMOXOR_D_AQRL:  "AMOXOR.D.AQRL",
+	AMOXOR_D_RL:    "AMOXOR.D.RL",
+	AMOXOR_W:       "AMOXOR.W",
+	AMOXOR_W_AQ:    "AMOXOR.W.AQ",
+	AMOXOR_W_AQRL:  "AMOXOR.W.AQRL",
+	AMOXOR_W_RL:    "AMOXOR.W.RL",
+	AND:            "AND",
+	ANDI:           "ANDI",
+	ANDN:           "ANDN",
+	AUIPC:          "AUIPC",
+	BCLR:           "BCLR",
+	BCLRI:          "BCLRI",
+	BEQ:            "BEQ",
+	BEXT:           "BEXT",
+	BEXTI:          "BEXTI",
+	BGE:            "BGE",
+	BGEU:           "BGEU",
+	BINV:           "BINV",
+	BINVI:          "BINVI",
+	BLT:            "BLT",
+	BLTU:           "BLTU",
+	BNE:            "BNE",
+	BSET:           "BSET",
+	BSETI:          "BSETI",
+	CLZ:            "CLZ",
+	CLZW:           "CLZW",
+	CPOP:           "CPOP",
+	CPOPW:          "CPOPW",
+	CSRRC:          "CSRRC",
+	CSRRCI:         "CSRRCI",
+	CSRRS:          "CSRRS",
+	CSRRSI:         "CSRRSI",
+	CSRRW:          "CSRRW",
+	CSRRWI:         "CSRRWI",
+	CTZ:            "CTZ",
+	CTZW:           "CTZW",
+	C_ADD:          "C.ADD",
+	C_ADDI:         "C.ADDI",
+	C_ADDI16SP:     "C.ADDI16SP",
+	C_ADDI4SPN:     "C.ADDI4SPN",
+	C_ADDIW:        "C.ADDIW",
+	C_ADDW:         "C.ADDW",
+	C_AND:          "C.AND",
+	C_ANDI:         "C.ANDI",
+	C_BEQZ:         "C.BEQZ",
+	C_BNEZ:         "C.BNEZ",
+	C_EBREAK:       "C.EBREAK",
+	C_FLD:          "C.FLD",
+	C_FLDSP:        "C.FLDSP",
+	C_FSD:          "C.FSD",
+	C_FSDSP:        "C.FSDSP",
+	C_J:            "C.J",
+	C_JALR:         "C.JALR",
+	C_JR:           "C.JR",
+	C_LD:           "C.LD",
+	C_LDSP:         "C.LDSP",
+	C_LI:           "C.LI",
+	C_LUI:          "C.LUI",
+	C_LW:           "C.LW",
+	C_LWSP:         "C.LWSP",
+	C_MV:           "C.MV",
+	C_NOP:          "C.NOP",
+	C_OR:           "C.OR",
+	C_SD:           "C.SD",
+	C_SDSP:         "C.SDSP",
+	C_SLLI:         "C.SLLI",
+	C_SRAI:         "C.SRAI",
+	C_SRLI:         "C.SRLI",
+	C_SUB:          "C.SUB",
+	C_SUBW:         "C.SUBW",
+	C_SW:           "C.SW",
+	C_SWSP:         "C.SWSP",
+	C_UNIMP:        "C.UNIMP",
+	C_XOR:          "C.XOR",
+	DIV:            "DIV",
+	DIVU:           "DIVU",
+	DIVUW:          "DIVUW",
+	DIVW:           "DIVW",
+	EBREAK:         "EBREAK",
+	ECALL:          "ECALL",
+	FADD_D:         "FADD.D",
+	FADD_H:         "FADD.H",
+	FADD_Q:         "FADD.Q",
+	FADD_S:         "FADD.S",
+	FCLASS_D:       "FCLASS.D",
+	FCLASS_H:       "FCLASS.H",
+	FCLASS_Q:       "FCLASS.Q",
+	FCLASS_S:       "FCLASS.S",
+	FCVT_D_L:       "FCVT.D.L",
+	FCVT_D_LU:      "FCVT.D.LU",
+	FCVT_D_Q:       "FCVT.D.Q",
+	FCVT_D_S:       "FCVT.D.S",
+	FCVT_D_W:       "FCVT.D.W",
+	FCVT_D_WU:      "FCVT.D.WU",
+	FCVT_H_L:       "FCVT.H.L",
+	FCVT_H_LU:      "FCVT.H.LU",
+	FCVT_H_S:       "FCVT.H.S",
+	FCVT_H_W:       "FCVT.H.W",
+	FCVT_H_WU:      "FCVT.H.WU",
+	FCVT_LU_D:      "FCVT.LU.D",
+	FCVT_LU_H:      "FCVT.LU.H",
+	FCVT_LU_Q:      "FCVT.LU.Q",
+	FCVT_LU_S:      "FCVT.LU.S",
+	FCVT_L_D:       "FCVT.L.D",
+	FCVT_L_H:       "FCVT.L.H",
+	FCVT_L_Q:       "FCVT.L.Q",
+	FCVT_L_S:       "FCVT.L.S",
+	FCVT_Q_D:       "FCVT.Q.D",
+	FCVT_Q_L:       "FCVT.Q.L",
+	FCVT_Q_LU:      "FCVT.Q.LU",
+	FCVT_Q_S:       "FCVT.Q.S",
+	FCVT_Q_W:       "FCVT.Q.W",
+	FCVT_Q_WU:      "FCVT.Q.WU",
+	FCVT_S_D:       "FCVT.S.D",
+	FCVT_S_H:       "FCVT.S.H",
+	FCVT_S_L:       "FCVT.S.L",
+	FCVT_S_LU:      "FCVT.S.LU",
+	FCVT_S_Q:       "FCVT.S.Q",
+	FCVT_S_W:       "FCVT.S.W",
+	FCVT_S_WU:      "FCVT.S.WU",
+	FCVT_WU_D:      "FCVT.WU.D",
+	FCVT_WU_H:      "FCVT.WU.H",
+	FCVT_WU_Q:      "FCVT.WU.Q",
+	FCVT_WU_S:      "FCVT.WU.S",
+	FCVT_W_D:       "FCVT.W.D",
+	FCVT_W_H:       "FCVT.W.H",
+	FCVT_W_Q:       "FCVT.W.Q",
+	FCVT_W_S:       "FCVT.W.S",
+	FDIV_D:         "FDIV.D",
+	FDIV_H:         "FDIV.H",
+	FDIV_Q:         "FDIV.Q",
+	FDIV_S:         "FDIV.S",
+	FENCE:          "FENCE",
+	FENCE_I:        "FENCE.I",
+	FEQ_D:          "FEQ.D",
+	FEQ_H:          "FEQ.H",
+	FEQ_Q:          "FEQ.Q",
+	FEQ_S:          "FEQ.S",
+	FLD:            "FLD",
+	FLE_D:          "FLE.D",
+	FLE_H:          "FLE.H",
+	FLE_Q:          "FLE.Q",
+	FLE_S:          "FLE.S",
+	FLH:            "FLH",
+	FLQ:            "FLQ",
+	FLT_D:          "FLT.D",
+	FLT_H:          "FLT.H",
+	FLT_Q:          "FLT.Q",
+	FLT_S:          "FLT.S",
+	FLW:            "FLW",
+	FMADD_D:        "FMADD.D",
+	FMADD_H:        "FMADD.H",
+	FMADD_Q:        "FMADD.Q",
+	FMADD_S:        "FMADD.S",
+	FMAX_D:         "FMAX.D",
+	FMAX_H:         "FMAX.H",
+	FMAX_Q:         "FMAX.Q",
+	FMAX_S:         "FMAX.S",
+	FMIN_D:         "FMIN.D",
+	FMIN_H:         "FMIN.H",
+	FMIN_Q:         "FMIN.Q",
+	FMIN_S:         "FMIN.S",
+	FMSUB_D:        "FMSUB.D",
+	FMSUB_H:        "FMSUB.H",
+	FMSUB_Q:        "FMSUB.Q",
+	FMSUB_S:        "FMSUB.S",
+	FMUL_D:         "FMUL.D",
+	FMUL_H:         "FMUL.H",
+	FMUL_Q:         "FMUL.Q",
+	FMUL_S:         "FMUL.S",
+	FMV_D_X:        "FMV.D.X",
+	FMV_H_X:        "FMV.H.X",
+	FMV_W_X:        "FMV.W.X",
+	FMV_X_D:        "FMV.X.D",
+	FMV_X_H:        "FMV.X.H",
+	FMV_X_W:        "FMV.X.W",
+	FNMADD_D:       "FNMADD.D",
+	FNMADD_H:       "FNMADD.H",
+	FNMADD_Q:       "FNMADD.Q",
+	FNMADD_S:       "FNMADD.S",
+	FNMSUB_D:       "FNMSUB.D",
+	FNMSUB_H:       "FNMSUB.H",
+	FNMSUB_Q:       "FNMSUB.Q",
+	FNMSUB_S:       "FNMSUB.S",
+	FSD:            "FSD",
+	FSGNJN_D:       "FSGNJN.D",
+	FSGNJN_H:       "FSGNJN.H",
+	FSGNJN_Q:       "FSGNJN.Q",
+	FSGNJN_S:       "FSGNJN.S",
+	FSGNJX_D:       "FSGNJX.D",
+	FSGNJX_H:       "FSGNJX.H",
+	FSGNJX_Q:       "FSGNJX.Q",
+	FSGNJX_S:       "FSGNJX.S",
+	FSGNJ_D:        "FSGNJ.D",
+	FSGNJ_H:        "FSGNJ.H",
+	FSGNJ_Q:        "FSGNJ.Q",
+	FSGNJ_S:        "FSGNJ.S",
+	FSH:            "FSH",
+	FSQ:            "FSQ",
+	FSQRT_D:        "FSQRT.D",
+	FSQRT_H:        "FSQRT.H",
+	FSQRT_Q:        "FSQRT.Q",
+	FSQRT_S:        "FSQRT.S",
+	FSUB_D:         "FSUB.D",
+	FSUB_H:         "FSUB.H",
+	FSUB_Q:         "FSUB.Q",
+	FSUB_S:         "FSUB.S",
+	FSW:            "FSW",
+	JAL:            "JAL",
+	JALR:           "JALR",
+	LB:             "LB",
+	LBU:            "LBU",
+	LD:             "LD",
+	LH:             "LH",
+	LHU:            "LHU",
+	LR_D:           "LR.D",
+	LR_D_AQ:        "LR.D.AQ",
+	LR_D_AQRL:      "LR.D.AQRL",
+	LR_D_RL:        "LR.D.RL",
+	LR_W:           "LR.W",
+	LR_W_AQ:        "LR.W.AQ",
+	LR_W_AQRL:      "LR.W.AQRL",
+	LR_W_RL:        "LR.W.RL",
+	LUI:            "LUI",
+	LW:             "LW",
+	LWU:            "LWU",
+	MAX:            "MAX",
+	MAXU:           "MAXU",
+	MIN:            "MIN",
+	MINU:           "MINU",
+	MUL:            "MUL",
+	MULH:           "MULH",
+	MULHSU:         "MULHSU",
+	MULHU:          "MULHU",
+	MULW:           "MULW",
+	OR:             "OR",
+	ORC_B:          "ORC.B",
+	ORI:            "ORI",
+	ORN:            "ORN",
+	REM:            "REM",
+	REMU:           "REMU",
+	REMUW:          "REMUW",
+	REMW:           "REMW",
+	REV8:           "REV8",
+	ROL:            "ROL",
+	ROLW:           "ROLW",
+	ROR:            "ROR",
+	RORI:           "RORI",
+	RORIW:          "RORIW",
+	RORW:           "RORW",
+	SB:             "SB",
+	SC_D:           "SC.D",
+	SC_D_AQ:        "SC.D.AQ",
+	SC_D_AQRL:      "SC.D.AQRL",
+	SC_D_RL:        "SC.D.RL",
+	SC_W:           "SC.W",
+	SC_W_AQ:        "SC.W.AQ",
+	SC_W_AQRL:      "SC.W.AQRL",
+	SC_W_RL:        "SC.W.RL",
+	SD:             "SD",
+	SEXT_B:         "SEXT.B",
+	SEXT_H:         "SEXT.H",
+	SH:             "SH",
+	SH1ADD:         "SH1ADD",
+	SH1ADD_UW:      "SH1ADD.UW",
+	SH2ADD:         "SH2ADD",
+	SH2ADD_UW:      "SH2ADD.UW",
+	SH3ADD:         "SH3ADD",
+	SH3ADD_UW:      "SH3ADD.UW",
+	SLL:            "SLL",
+	SLLI:           "SLLI",
+	SLLIW:          "SLLIW",
+	SLLI_UW:        "SLLI.UW",
+	SLLW:           "SLLW",
+	SLT:            "SLT",
+	SLTI:           "SLTI",
+	SLTIU:          "SLTIU",
+	SLTU:           "SLTU",
+	SRA:            "SRA",
+	SRAI:           "SRAI",
+	SRAIW:          "SRAIW",
+	SRAW:           "SRAW",
+	SRL:            "SRL",
+	SRLI:           "SRLI",
+	SRLIW:          "SRLIW",
+	SRLW:           "SRLW",
+	SUB:            "SUB",
+	SUBW:           "SUBW",
+	SW:             "SW",
+	XNOR:           "XNOR",
+	XOR:            "XOR",
+	XORI:           "XORI",
+	ZEXT_H:         "ZEXT.H",
+}
+
+var instFormats = [...]instFormat{
+	// ADD rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x00000033, op: ADD, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// ADDI rd, rs1, imm12
+	{mask: 0x0000707f, value: 0x00000013, op: ADDI, args: argTypeList{arg_rd, arg_rs1, arg_imm12}},
+	// ADDIW rd, rs1, imm12
+	{mask: 0x0000707f, value: 0x0000001b, op: ADDIW, args: argTypeList{arg_rd, arg_rs1, arg_imm12}},
+	// ADDW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0000003b, op: ADDW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// ADD.UW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0800003b, op: ADD_UW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// AMOADD.D rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0000302f, op: AMOADD_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOADD.D.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0400302f, op: AMOADD_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOADD.D.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0600302f, op: AMOADD_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOADD.D.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0200302f, op: AMOADD_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOADD.W rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0000202f, op: AMOADD_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOADD.W.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0400202f, op: AMOADD_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOADD.W.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0600202f, op: AMOADD_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOADD.W.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0200202f, op: AMOADD_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOAND.D rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x6000302f, op: AMOAND_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOAND.D.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x6400302f, op: AMOAND_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOAND.D.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x6600302f, op: AMOAND_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOAND.D.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x6200302f, op: AMOAND_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOAND.W rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x6000202f, op: AMOAND_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOAND.W.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x6400202f, op: AMOAND_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOAND.W.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x6600202f, op: AMOAND_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOAND.W.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x6200202f, op: AMOAND_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAXU.D rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xe000302f, op: AMOMAXU_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAXU.D.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xe400302f, op: AMOMAXU_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAXU.D.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xe600302f, op: AMOMAXU_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAXU.D.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xe200302f, op: AMOMAXU_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAXU.W rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xe000202f, op: AMOMAXU_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAXU.W.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xe400202f, op: AMOMAXU_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAXU.W.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xe600202f, op: AMOMAXU_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAXU.W.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xe200202f, op: AMOMAXU_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAX.D rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xa000302f, op: AMOMAX_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAX.D.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xa400302f, op: AMOMAX_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAX.D.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xa600302f, op: AMOMAX_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAX.D.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xa200302f, op: AMOMAX_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAX.W rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xa000202f, op: AMOMAX_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAX.W.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xa400202f, op: AMOMAX_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAX.W.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xa600202f, op: AMOMAX_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMAX.W.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xa200202f, op: AMOMAX_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMINU.D rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xc000302f, op: AMOMINU_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMINU.D.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xc400302f, op: AMOMINU_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMINU.D.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xc600302f, op: AMOMINU_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMINU.D.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xc200302f, op: AMOMINU_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMINU.W rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xc000202f, op: AMOMINU_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMINU.W.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xc400202f, op: AMOMINU_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMINU.W.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xc600202f, op: AMOMINU_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMINU.W.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0xc200202f, op: AMOMINU_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMIN.D rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x8000302f, op: AMOMIN_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMIN.D.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x8400302f, op: AMOMIN_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMIN.D.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x8600302f, op: AMOMIN_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMIN.D.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x8200302f, op: AMOMIN_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMIN.W rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x8000202f, op: AMOMIN_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMIN.W.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x8400202f, op: AMOMIN_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMIN.W.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x8600202f, op: AMOMIN_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOMIN.W.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x8200202f, op: AMOMIN_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOOR.D rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x4000302f, op: AMOOR_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOOR.D.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x4400302f, op: AMOOR_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOOR.D.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x4600302f, op: AMOOR_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOOR.D.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x4200302f, op: AMOOR_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOOR.W rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x4000202f, op: AMOOR_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOOR.W.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x4400202f, op: AMOOR_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOOR.W.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x4600202f, op: AMOOR_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOOR.W.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x4200202f, op: AMOOR_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOSWAP.D rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0800302f, op: AMOSWAP_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOSWAP.D.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0c00302f, op: AMOSWAP_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOSWAP.D.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0e00302f, op: AMOSWAP_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOSWAP.D.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0a00302f, op: AMOSWAP_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOSWAP.W rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0800202f, op: AMOSWAP_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOSWAP.W.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0c00202f, op: AMOSWAP_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOSWAP.W.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0e00202f, op: AMOSWAP_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOSWAP.W.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x0a00202f, op: AMOSWAP_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOXOR.D rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x2000302f, op: AMOXOR_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOXOR.D.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x2400302f, op: AMOXOR_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOXOR.D.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x2600302f, op: AMOXOR_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOXOR.D.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x2200302f, op: AMOXOR_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOXOR.W rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x2000202f, op: AMOXOR_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOXOR.W.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x2400202f, op: AMOXOR_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOXOR.W.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x2600202f, op: AMOXOR_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AMOXOR.W.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x2200202f, op: AMOXOR_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// AND rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x00007033, op: AND, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// ANDI rd, rs1, imm12
+	{mask: 0x0000707f, value: 0x00007013, op: ANDI, args: argTypeList{arg_rd, arg_rs1, arg_imm12}},
+	// ANDN rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x40007033, op: ANDN, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// AUIPC rd, imm20
+	{mask: 0x0000007f, value: 0x00000017, op: AUIPC, args: argTypeList{arg_rd, arg_imm20}},
+	// BCLR rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x48001033, op: BCLR, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// BCLRI rd, rs1, shamt6
+	{mask: 0xfc00707f, value: 0x48001013, op: BCLRI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}},
+	// BEQ rs1, rs2, bimm12
+	{mask: 0x0000707f, value: 0x00000063, op: BEQ, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}},
+	// BEXT rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x48005033, op: BEXT, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// BEXTI rd, rs1, shamt6
+	{mask: 0xfc00707f, value: 0x48005013, op: BEXTI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}},
+	// BGE rs1, rs2, bimm12
+	{mask: 0x0000707f, value: 0x00005063, op: BGE, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}},
+	// BGEU rs1, rs2, bimm12
+	{mask: 0x0000707f, value: 0x00007063, op: BGEU, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}},
+	// BINV rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x68001033, op: BINV, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// BINVI rd, rs1, shamt6
+	{mask: 0xfc00707f, value: 0x68001013, op: BINVI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}},
+	// BLT rs1, rs2, bimm12
+	{mask: 0x0000707f, value: 0x00004063, op: BLT, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}},
+	// BLTU rs1, rs2, bimm12
+	{mask: 0x0000707f, value: 0x00006063, op: BLTU, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}},
+	// BNE rs1, rs2, bimm12
+	{mask: 0x0000707f, value: 0x00001063, op: BNE, args: argTypeList{arg_rs1, arg_rs2, arg_bimm12}},
+	// BSET rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x28001033, op: BSET, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// BSETI rd, rs1, shamt6
+	{mask: 0xfc00707f, value: 0x28001013, op: BSETI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}},
+	// CLZ rd, rs1
+	{mask: 0xfff0707f, value: 0x60001013, op: CLZ, args: argTypeList{arg_rd, arg_rs1}},
+	// CLZW rd, rs1
+	{mask: 0xfff0707f, value: 0x6000101b, op: CLZW, args: argTypeList{arg_rd, arg_rs1}},
+	// CPOP rd, rs1
+	{mask: 0xfff0707f, value: 0x60201013, op: CPOP, args: argTypeList{arg_rd, arg_rs1}},
+	// CPOPW rd, rs1
+	{mask: 0xfff0707f, value: 0x6020101b, op: CPOPW, args: argTypeList{arg_rd, arg_rs1}},
+	// CSRRC rd, csr, rs1
+	{mask: 0x0000707f, value: 0x00003073, op: CSRRC, args: argTypeList{arg_rd, arg_csr, arg_rs1}},
+	// CSRRCI rd, csr, zimm
+	{mask: 0x0000707f, value: 0x00007073, op: CSRRCI, args: argTypeList{arg_rd, arg_csr, arg_zimm}},
+	// CSRRS rd, csr, rs1
+	{mask: 0x0000707f, value: 0x00002073, op: CSRRS, args: argTypeList{arg_rd, arg_csr, arg_rs1}},
+	// CSRRSI rd, csr, zimm
+	{mask: 0x0000707f, value: 0x00006073, op: CSRRSI, args: argTypeList{arg_rd, arg_csr, arg_zimm}},
+	// CSRRW rd, csr, rs1
+	{mask: 0x0000707f, value: 0x00001073, op: CSRRW, args: argTypeList{arg_rd, arg_csr, arg_rs1}},
+	// CSRRWI rd, csr, zimm
+	{mask: 0x0000707f, value: 0x00005073, op: CSRRWI, args: argTypeList{arg_rd, arg_csr, arg_zimm}},
+	// CTZ rd, rs1
+	{mask: 0xfff0707f, value: 0x60101013, op: CTZ, args: argTypeList{arg_rd, arg_rs1}},
+	// CTZW rd, rs1
+	{mask: 0xfff0707f, value: 0x6010101b, op: CTZW, args: argTypeList{arg_rd, arg_rs1}},
+	// C.ADD rd_rs1_n0, c_rs2_n0
+	{mask: 0x0000f003, value: 0x00009002, op: C_ADD, args: argTypeList{arg_rd_rs1_n0, arg_c_rs2_n0}},
+	// C.ADDI rd_rs1_n0, c_nzimm6
+	{mask: 0x0000e003, value: 0x00000001, op: C_ADDI, args: argTypeList{arg_rd_rs1_n0, arg_c_nzimm6}},
+	// C.ADDI16SP c_nzimm10
+	{mask: 0x0000ef83, value: 0x00006101, op: C_ADDI16SP, args: argTypeList{arg_c_nzimm10}},
+	// C.ADDI4SPN rd_p, c_nzuimm10
+	{mask: 0x0000e003, value: 0x00000000, op: C_ADDI4SPN, args: argTypeList{arg_rd_p, arg_c_nzuimm10}},
+	// C.ADDIW rd_rs1_n0, c_imm6
+	{mask: 0x0000e003, value: 0x00002001, op: C_ADDIW, args: argTypeList{arg_rd_rs1_n0, arg_c_imm6}},
+	// C.ADDW rd_rs1_p, rs2_p
+	{mask: 0x0000fc63, value: 0x00009c21, op: C_ADDW, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}},
+	// C.AND rd_rs1_p, rs2_p
+	{mask: 0x0000fc63, value: 0x00008c61, op: C_AND, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}},
+	// C.ANDI rd_rs1_p, c_imm6
+	{mask: 0x0000ec03, value: 0x00008801, op: C_ANDI, args: argTypeList{arg_rd_rs1_p, arg_c_imm6}},
+	// C.BEQZ rs1_p, c_bimm9
+	{mask: 0x0000e003, value: 0x0000c001, op: C_BEQZ, args: argTypeList{arg_rs1_p, arg_c_bimm9}},
+	// C.BNEZ rs1_p, c_bimm9
+	{mask: 0x0000e003, value: 0x0000e001, op: C_BNEZ, args: argTypeList{arg_rs1_p, arg_c_bimm9}},
+	// C.EBREAK
+	{mask: 0x0000ffff, value: 0x00009002, op: C_EBREAK, args: argTypeList{}},
+	// C.FLD fd_p, rs1_p, c_uimm8
+	{mask: 0x0000e003, value: 0x00002000, op: C_FLD, args: argTypeList{arg_fd_p, arg_rs1_p, arg_c_uimm8}},
+	// C.FLDSP fd, c_uimm9sp
+	{mask: 0x0000e003, value: 0x00002002, op: C_FLDSP, args: argTypeList{arg_fd, arg_c_uimm9sp}},
+	// C.FSD rs1_p, fs2_p, c_uimm8
+	{mask: 0x0000e003, value: 0x0000a000, op: C_FSD, args: argTypeList{arg_rs1_p, arg_fs2_p, arg_c_uimm8}},
+	// C.FSDSP c_fs2, c_uimm9sp_s
+	{mask: 0x0000e003, value: 0x0000a002, op: C_FSDSP, args: argTypeList{arg_c_fs2, arg_c_uimm9sp_s}},
+	// C.J c_imm12
+	{mask: 0x0000e003, value: 0x0000a001, op: C_J, args: argTypeList{arg_c_imm12}},
+	// C.JALR c_rs1_n0
+	{mask: 0x0000f07f, value: 0x00009002, op: C_JALR, args: argTypeList{arg_c_rs1_n0}},
+	// C.JR rs1_n0
+	{mask: 0x0000f07f, value: 0x00008002, op: C_JR, args: argTypeList{arg_rs1_n0}},
+	// C.LD rd_p, rs1_p, c_uimm8
+	{mask: 0x0000e003, value: 0x00006000, op: C_LD, args: argTypeList{arg_rd_p, arg_rs1_p, arg_c_uimm8}},
+	// C.LDSP rd_n0, c_uimm9sp
+	{mask: 0x0000e003, value: 0x00006002, op: C_LDSP, args: argTypeList{arg_rd_n0, arg_c_uimm9sp}},
+	// C.LI rd_n0, c_imm6
+	{mask: 0x0000e003, value: 0x00004001, op: C_LI, args: argTypeList{arg_rd_n0, arg_c_imm6}},
+	// C.LUI rd_n2, c_nzimm18
+	{mask: 0x0000e003, value: 0x00006001, op: C_LUI, args: argTypeList{arg_rd_n2, arg_c_nzimm18}},
+	// C.LW rd_p, rs1_p, c_uimm7
+	{mask: 0x0000e003, value: 0x00004000, op: C_LW, args: argTypeList{arg_rd_p, arg_rs1_p, arg_c_uimm7}},
+	// C.LWSP rd_n0, c_uimm8sp
+	{mask: 0x0000e003, value: 0x00004002, op: C_LWSP, args: argTypeList{arg_rd_n0, arg_c_uimm8sp}},
+	// C.MV rd_n0, c_rs2_n0
+	{mask: 0x0000f003, value: 0x00008002, op: C_MV, args: argTypeList{arg_rd_n0, arg_c_rs2_n0}},
+	// C.NOP c_nzimm6
+	{mask: 0x0000ef83, value: 0x00000001, op: C_NOP, args: argTypeList{arg_c_nzimm6}},
+	// C.OR rd_rs1_p, rs2_p
+	{mask: 0x0000fc63, value: 0x00008c41, op: C_OR, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}},
+	// C.SD rs1_p, rs2_p, c_uimm8
+	{mask: 0x0000e003, value: 0x0000e000, op: C_SD, args: argTypeList{arg_rs1_p, arg_rs2_p, arg_c_uimm8}},
+	// C.SDSP c_rs2, c_uimm9sp_s
+	{mask: 0x0000e003, value: 0x0000e002, op: C_SDSP, args: argTypeList{arg_c_rs2, arg_c_uimm9sp_s}},
+	// C.SLLI rd_rs1_n0, c_nzuimm6
+	{mask: 0x0000e003, value: 0x00000002, op: C_SLLI, args: argTypeList{arg_rd_rs1_n0, arg_c_nzuimm6}},
+	// C.SRAI rd_rs1_p, c_nzuimm6
+	{mask: 0x0000ec03, value: 0x00008401, op: C_SRAI, args: argTypeList{arg_rd_rs1_p, arg_c_nzuimm6}},
+	// C.SRLI rd_rs1_p, c_nzuimm6
+	{mask: 0x0000ec03, value: 0x00008001, op: C_SRLI, args: argTypeList{arg_rd_rs1_p, arg_c_nzuimm6}},
+	// C.SUB rd_rs1_p, rs2_p
+	{mask: 0x0000fc63, value: 0x00008c01, op: C_SUB, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}},
+	// C.SUBW rd_rs1_p, rs2_p
+	{mask: 0x0000fc63, value: 0x00009c01, op: C_SUBW, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}},
+	// C.SW rs1_p, rs2_p, c_uimm7
+	{mask: 0x0000e003, value: 0x0000c000, op: C_SW, args: argTypeList{arg_rs1_p, arg_rs2_p, arg_c_uimm7}},
+	// C.SWSP c_rs2, c_uimm8sp_s
+	{mask: 0x0000e003, value: 0x0000c002, op: C_SWSP, args: argTypeList{arg_c_rs2, arg_c_uimm8sp_s}},
+	// C.UNIMP
+	{mask: 0x0000ffff, value: 0x00000000, op: C_UNIMP, args: argTypeList{}},
+	// C.XOR rd_rs1_p, rs2_p
+	{mask: 0x0000fc63, value: 0x00008c21, op: C_XOR, args: argTypeList{arg_rd_rs1_p, arg_rs2_p}},
+	// DIV rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x02004033, op: DIV, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// DIVU rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x02005033, op: DIVU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// DIVUW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0200503b, op: DIVUW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// DIVW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0200403b, op: DIVW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// EBREAK
+	{mask: 0xffffffff, value: 0x00100073, op: EBREAK, args: argTypeList{}},
+	// ECALL
+	{mask: 0xffffffff, value: 0x00000073, op: ECALL, args: argTypeList{}},
+	// FADD.D fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x02000053, op: FADD_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FADD.H fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x04000053, op: FADD_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FADD.Q fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x06000053, op: FADD_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FADD.S fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x00000053, op: FADD_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FCLASS.D rd, fs1
+	{mask: 0xfff0707f, value: 0xe2001053, op: FCLASS_D, args: argTypeList{arg_rd, arg_fs1}},
+	// FCLASS.H rd, fs1
+	{mask: 0xfff0707f, value: 0xe4001053, op: FCLASS_H, args: argTypeList{arg_rd, arg_fs1}},
+	// FCLASS.Q rd, fs1
+	{mask: 0xfff0707f, value: 0xe6001053, op: FCLASS_Q, args: argTypeList{arg_rd, arg_fs1}},
+	// FCLASS.S rd, fs1
+	{mask: 0xfff0707f, value: 0xe0001053, op: FCLASS_S, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.D.L fd, rs1
+	{mask: 0xfff0007f, value: 0xd2200053, op: FCVT_D_L, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.D.LU fd, rs1
+	{mask: 0xfff0007f, value: 0xd2300053, op: FCVT_D_LU, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.D.Q fd, fs1
+	{mask: 0xfff0007f, value: 0x42300053, op: FCVT_D_Q, args: argTypeList{arg_fd, arg_fs1}},
+	// FCVT.D.S fd, fs1
+	{mask: 0xfff0007f, value: 0x42000053, op: FCVT_D_S, args: argTypeList{arg_fd, arg_fs1}},
+	// FCVT.D.W fd, rs1
+	{mask: 0xfff0007f, value: 0xd2000053, op: FCVT_D_W, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.D.WU fd, rs1
+	{mask: 0xfff0007f, value: 0xd2100053, op: FCVT_D_WU, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.H.L fd, rs1
+	{mask: 0xfff0007f, value: 0xd4200053, op: FCVT_H_L, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.H.LU fd, rs1
+	{mask: 0xfff0007f, value: 0xd4300053, op: FCVT_H_LU, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.H.S fd, fs1
+	{mask: 0xfff0007f, value: 0x44000053, op: FCVT_H_S, args: argTypeList{arg_fd, arg_fs1}},
+	// FCVT.H.W fd, rs1
+	{mask: 0xfff0007f, value: 0xd4000053, op: FCVT_H_W, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.H.WU fd, rs1
+	{mask: 0xfff0007f, value: 0xd4100053, op: FCVT_H_WU, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.LU.D rd, fs1
+	{mask: 0xfff0007f, value: 0xc2300053, op: FCVT_LU_D, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.LU.H rd, fs1
+	{mask: 0xfff0007f, value: 0xc4300053, op: FCVT_LU_H, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.LU.Q rd, fs1
+	{mask: 0xfff0007f, value: 0xc6300053, op: FCVT_LU_Q, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.LU.S rd, fs1
+	{mask: 0xfff0007f, value: 0xc0300053, op: FCVT_LU_S, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.L.D rd, fs1
+	{mask: 0xfff0007f, value: 0xc2200053, op: FCVT_L_D, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.L.H rd, fs1
+	{mask: 0xfff0007f, value: 0xc4200053, op: FCVT_L_H, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.L.Q rd, fs1
+	{mask: 0xfff0007f, value: 0xc6200053, op: FCVT_L_Q, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.L.S rd, fs1
+	{mask: 0xfff0007f, value: 0xc0200053, op: FCVT_L_S, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.Q.D fd, fs1
+	{mask: 0xfff0007f, value: 0x46100053, op: FCVT_Q_D, args: argTypeList{arg_fd, arg_fs1}},
+	// FCVT.Q.L fd, rs1
+	{mask: 0xfff0007f, value: 0xd6200053, op: FCVT_Q_L, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.Q.LU fd, rs1
+	{mask: 0xfff0007f, value: 0xd6300053, op: FCVT_Q_LU, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.Q.S fd, fs1
+	{mask: 0xfff0007f, value: 0x46000053, op: FCVT_Q_S, args: argTypeList{arg_fd, arg_fs1}},
+	// FCVT.Q.W fd, rs1
+	{mask: 0xfff0007f, value: 0xd6000053, op: FCVT_Q_W, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.Q.WU fd, rs1
+	{mask: 0xfff0007f, value: 0xd6100053, op: FCVT_Q_WU, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.S.D fd, fs1
+	{mask: 0xfff0007f, value: 0x40100053, op: FCVT_S_D, args: argTypeList{arg_fd, arg_fs1}},
+	// FCVT.S.H fd, fs1
+	{mask: 0xfff0007f, value: 0x40200053, op: FCVT_S_H, args: argTypeList{arg_fd, arg_fs1}},
+	// FCVT.S.L fd, rs1
+	{mask: 0xfff0007f, value: 0xd0200053, op: FCVT_S_L, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.S.LU fd, rs1
+	{mask: 0xfff0007f, value: 0xd0300053, op: FCVT_S_LU, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.S.Q fd, fs1
+	{mask: 0xfff0007f, value: 0x40300053, op: FCVT_S_Q, args: argTypeList{arg_fd, arg_fs1}},
+	// FCVT.S.W fd, rs1
+	{mask: 0xfff0007f, value: 0xd0000053, op: FCVT_S_W, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.S.WU fd, rs1
+	{mask: 0xfff0007f, value: 0xd0100053, op: FCVT_S_WU, args: argTypeList{arg_fd, arg_rs1}},
+	// FCVT.WU.D rd, fs1
+	{mask: 0xfff0007f, value: 0xc2100053, op: FCVT_WU_D, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.WU.H rd, fs1
+	{mask: 0xfff0007f, value: 0xc4100053, op: FCVT_WU_H, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.WU.Q rd, fs1
+	{mask: 0xfff0007f, value: 0xc6100053, op: FCVT_WU_Q, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.WU.S rd, fs1
+	{mask: 0xfff0007f, value: 0xc0100053, op: FCVT_WU_S, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.W.D rd, fs1
+	{mask: 0xfff0007f, value: 0xc2000053, op: FCVT_W_D, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.W.H rd, fs1
+	{mask: 0xfff0007f, value: 0xc4000053, op: FCVT_W_H, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.W.Q rd, fs1
+	{mask: 0xfff0007f, value: 0xc6000053, op: FCVT_W_Q, args: argTypeList{arg_rd, arg_fs1}},
+	// FCVT.W.S rd, fs1
+	{mask: 0xfff0007f, value: 0xc0000053, op: FCVT_W_S, args: argTypeList{arg_rd, arg_fs1}},
+	// FDIV.D fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x1a000053, op: FDIV_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FDIV.H fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x1c000053, op: FDIV_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FDIV.Q fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x1e000053, op: FDIV_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FDIV.S fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x18000053, op: FDIV_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FENCE pred, succ
+	{mask: 0x0000707f, value: 0x0000000f, op: FENCE, args: argTypeList{arg_pred, arg_succ}},
+	// FENCE.I
+	{mask: 0x0000707f, value: 0x0000100f, op: FENCE_I, args: argTypeList{}},
+	// FEQ.D rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa2002053, op: FEQ_D, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FEQ.H rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa4002053, op: FEQ_H, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FEQ.Q rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa6002053, op: FEQ_Q, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FEQ.S rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa0002053, op: FEQ_S, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FLD fd, rs1_mem
+	{mask: 0x0000707f, value: 0x00003007, op: FLD, args: argTypeList{arg_fd, arg_rs1_mem}},
+	// FLE.D rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa2000053, op: FLE_D, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FLE.H rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa4000053, op: FLE_H, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FLE.Q rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa6000053, op: FLE_Q, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FLE.S rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa0000053, op: FLE_S, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FLH fd, rs1_mem
+	{mask: 0x0000707f, value: 0x00001007, op: FLH, args: argTypeList{arg_fd, arg_rs1_mem}},
+	// FLQ fd, rs1_mem
+	{mask: 0x0000707f, value: 0x00004007, op: FLQ, args: argTypeList{arg_fd, arg_rs1_mem}},
+	// FLT.D rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa2001053, op: FLT_D, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FLT.H rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa4001053, op: FLT_H, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FLT.Q rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa6001053, op: FLT_Q, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FLT.S rd, fs1, fs2
+	{mask: 0xfe00707f, value: 0xa0001053, op: FLT_S, args: argTypeList{arg_rd, arg_fs1, arg_fs2}},
+	// FLW fd, rs1_mem
+	{mask: 0x0000707f, value: 0x00002007, op: FLW, args: argTypeList{arg_fd, arg_rs1_mem}},
+	// FMADD.D fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x02000043, op: FMADD_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FMADD.H fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x04000043, op: FMADD_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FMADD.Q fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x06000043, op: FMADD_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FMADD.S fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x00000043, op: FMADD_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FMAX.D fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x2a001053, op: FMAX_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMAX.H fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x2c001053, op: FMAX_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMAX.Q fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x2e001053, op: FMAX_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMAX.S fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x28001053, op: FMAX_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMIN.D fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x2a000053, op: FMIN_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMIN.H fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x2c000053, op: FMIN_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMIN.Q fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x2e000053, op: FMIN_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMIN.S fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x28000053, op: FMIN_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMSUB.D fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x02000047, op: FMSUB_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FMSUB.H fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x04000047, op: FMSUB_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FMSUB.Q fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x06000047, op: FMSUB_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FMSUB.S fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x00000047, op: FMSUB_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FMUL.D fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x12000053, op: FMUL_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMUL.H fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x14000053, op: FMUL_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMUL.Q fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x16000053, op: FMUL_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMUL.S fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x10000053, op: FMUL_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FMV.D.X fd, rs1
+	{mask: 0xfff0707f, value: 0xf2000053, op: FMV_D_X, args: argTypeList{arg_fd, arg_rs1}},
+	// FMV.H.X fd, rs1
+	{mask: 0xfff0707f, value: 0xf4000053, op: FMV_H_X, args: argTypeList{arg_fd, arg_rs1}},
+	// FMV.W.X fd, rs1
+	{mask: 0xfff0707f, value: 0xf0000053, op: FMV_W_X, args: argTypeList{arg_fd, arg_rs1}},
+	// FMV.X.D rd, fs1
+	{mask: 0xfff0707f, value: 0xe2000053, op: FMV_X_D, args: argTypeList{arg_rd, arg_fs1}},
+	// FMV.X.H rd, fs1
+	{mask: 0xfff0707f, value: 0xe4000053, op: FMV_X_H, args: argTypeList{arg_rd, arg_fs1}},
+	// FMV.X.W rd, fs1
+	{mask: 0xfff0707f, value: 0xe0000053, op: FMV_X_W, args: argTypeList{arg_rd, arg_fs1}},
+	// FNMADD.D fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x0200004f, op: FNMADD_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FNMADD.H fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x0400004f, op: FNMADD_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FNMADD.Q fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x0600004f, op: FNMADD_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FNMADD.S fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x0000004f, op: FNMADD_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FNMSUB.D fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x0200004b, op: FNMSUB_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FNMSUB.H fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x0400004b, op: FNMSUB_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FNMSUB.Q fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x0600004b, op: FNMSUB_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FNMSUB.S fd, fs1, fs2, fs3
+	{mask: 0x0600007f, value: 0x0000004b, op: FNMSUB_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2, arg_fs3}},
+	// FSD fs2, rs1_store
+	{mask: 0x0000707f, value: 0x00003027, op: FSD, args: argTypeList{arg_fs2, arg_rs1_store}},
+	// FSGNJN.D fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x22001053, op: FSGNJN_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJN.H fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x24001053, op: FSGNJN_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJN.Q fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x26001053, op: FSGNJN_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJN.S fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x20001053, op: FSGNJN_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJX.D fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x22002053, op: FSGNJX_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJX.H fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x24002053, op: FSGNJX_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJX.Q fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x26002053, op: FSGNJX_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJX.S fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x20002053, op: FSGNJX_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJ.D fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x22000053, op: FSGNJ_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJ.H fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x24000053, op: FSGNJ_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJ.Q fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x26000053, op: FSGNJ_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSGNJ.S fd, fs1, fs2
+	{mask: 0xfe00707f, value: 0x20000053, op: FSGNJ_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSH fs2, rs1_store
+	{mask: 0x0000707f, value: 0x00001027, op: FSH, args: argTypeList{arg_fs2, arg_rs1_store}},
+	// FSQ fs2, rs1_store
+	{mask: 0x0000707f, value: 0x00004027, op: FSQ, args: argTypeList{arg_fs2, arg_rs1_store}},
+	// FSQRT.D fd, fs1
+	{mask: 0xfff0007f, value: 0x5a000053, op: FSQRT_D, args: argTypeList{arg_fd, arg_fs1}},
+	// FSQRT.H fd, fs1
+	{mask: 0xfff0007f, value: 0x5c000053, op: FSQRT_H, args: argTypeList{arg_fd, arg_fs1}},
+	// FSQRT.Q fd, fs1
+	{mask: 0xfff0007f, value: 0x5e000053, op: FSQRT_Q, args: argTypeList{arg_fd, arg_fs1}},
+	// FSQRT.S fd, fs1
+	{mask: 0xfff0007f, value: 0x58000053, op: FSQRT_S, args: argTypeList{arg_fd, arg_fs1}},
+	// FSUB.D fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x0a000053, op: FSUB_D, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSUB.H fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x0c000053, op: FSUB_H, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSUB.Q fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x0e000053, op: FSUB_Q, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSUB.S fd, fs1, fs2
+	{mask: 0xfe00007f, value: 0x08000053, op: FSUB_S, args: argTypeList{arg_fd, arg_fs1, arg_fs2}},
+	// FSW fs2, rs1_store
+	{mask: 0x0000707f, value: 0x00002027, op: FSW, args: argTypeList{arg_fs2, arg_rs1_store}},
+	// JAL rd, jimm20
+	{mask: 0x0000007f, value: 0x0000006f, op: JAL, args: argTypeList{arg_rd, arg_jimm20}},
+	// JALR rd, rs1_mem
+	{mask: 0x0000707f, value: 0x00000067, op: JALR, args: argTypeList{arg_rd, arg_rs1_mem}},
+	// LB rd, rs1_mem
+	{mask: 0x0000707f, value: 0x00000003, op: LB, args: argTypeList{arg_rd, arg_rs1_mem}},
+	// LBU rd, rs1_mem
+	{mask: 0x0000707f, value: 0x00004003, op: LBU, args: argTypeList{arg_rd, arg_rs1_mem}},
+	// LD rd, rs1_mem
+	{mask: 0x0000707f, value: 0x00003003, op: LD, args: argTypeList{arg_rd, arg_rs1_mem}},
+	// LH rd, rs1_mem
+	{mask: 0x0000707f, value: 0x00001003, op: LH, args: argTypeList{arg_rd, arg_rs1_mem}},
+	// LHU rd, rs1_mem
+	{mask: 0x0000707f, value: 0x00005003, op: LHU, args: argTypeList{arg_rd, arg_rs1_mem}},
+	// LR.D rd, rs1_amo
+	{mask: 0xfff0707f, value: 0x1000302f, op: LR_D, args: argTypeList{arg_rd, arg_rs1_amo}},
+	// LR.D.AQ rd, rs1_amo
+	{mask: 0xfff0707f, value: 0x1400302f, op: LR_D_AQ, args: argTypeList{arg_rd, arg_rs1_amo}},
+	// LR.D.AQRL rd, rs1_amo
+	{mask: 0xfff0707f, value: 0x1600302f, op: LR_D_AQRL, args: argTypeList{arg_rd, arg_rs1_amo}},
+	// LR.D.RL rd, rs1_amo
+	{mask: 0xfff0707f, value: 0x1200302f, op: LR_D_RL, args: argTypeList{arg_rd, arg_rs1_amo}},
+	// LR.W rd, rs1_amo
+	{mask: 0xfff0707f, value: 0x1000202f, op: LR_W, args: argTypeList{arg_rd, arg_rs1_amo}},
+	// LR.W.AQ rd, rs1_amo
+	{mask: 0xfff0707f, value: 0x1400202f, op: LR_W_AQ, args: argTypeList{arg_rd, arg_rs1_amo}},
+	// LR.W.AQRL rd, rs1_amo
+	{mask: 0xfff0707f, value: 0x1600202f, op: LR_W_AQRL, args: argTypeList{arg_rd, arg_rs1_amo}},
+	// LR.W.RL rd, rs1_amo
+	{mask: 0xfff0707f, value: 0x1200202f, op: LR_W_RL, args: argTypeList{arg_rd, arg_rs1_amo}},
+	// LUI rd, imm20
+	{mask: 0x0000007f, value: 0x00000037, op: LUI, args: argTypeList{arg_rd, arg_imm20}},
+	// LW rd, rs1_mem
+	{mask: 0x0000707f, value: 0x00002003, op: LW, args: argTypeList{arg_rd, arg_rs1_mem}},
+	// LWU rd, rs1_mem
+	{mask: 0x0000707f, value: 0x00006003, op: LWU, args: argTypeList{arg_rd, arg_rs1_mem}},
+	// MAX rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0a006033, op: MAX, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// MAXU rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0a007033, op: MAXU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// MIN rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0a004033, op: MIN, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// MINU rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0a005033, op: MINU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// MUL rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x02000033, op: MUL, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// MULH rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x02001033, op: MULH, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// MULHSU rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x02002033, op: MULHSU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// MULHU rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x02003033, op: MULHU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// MULW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0200003b, op: MULW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// OR rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x00006033, op: OR, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// ORC.B rd, rs1
+	{mask: 0xfff0707f, value: 0x28705013, op: ORC_B, args: argTypeList{arg_rd, arg_rs1}},
+	// ORI rd, rs1, imm12
+	{mask: 0x0000707f, value: 0x00006013, op: ORI, args: argTypeList{arg_rd, arg_rs1, arg_imm12}},
+	// ORN rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x40006033, op: ORN, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// REM rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x02006033, op: REM, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// REMU rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x02007033, op: REMU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// REMUW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0200703b, op: REMUW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// REMW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0200603b, op: REMW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// REV8 rd, rs1
+	{mask: 0xfff0707f, value: 0x6b805013, op: REV8, args: argTypeList{arg_rd, arg_rs1}},
+	// ROL rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x60001033, op: ROL, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// ROLW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x6000103b, op: ROLW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// ROR rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x60005033, op: ROR, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// RORI rd, rs1, shamt6
+	{mask: 0xfc00707f, value: 0x60005013, op: RORI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}},
+	// RORIW rd, rs1, shamt5
+	{mask: 0xfe00707f, value: 0x6000501b, op: RORIW, args: argTypeList{arg_rd, arg_rs1, arg_shamt5}},
+	// RORW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x6000503b, op: RORW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SB rs2, rs1_store
+	{mask: 0x0000707f, value: 0x00000023, op: SB, args: argTypeList{arg_rs2, arg_rs1_store}},
+	// SC.D rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x1800302f, op: SC_D, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// SC.D.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x1c00302f, op: SC_D_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// SC.D.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x1e00302f, op: SC_D_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// SC.D.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x1a00302f, op: SC_D_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// SC.W rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x1800202f, op: SC_W, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// SC.W.AQ rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x1c00202f, op: SC_W_AQ, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// SC.W.AQRL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x1e00202f, op: SC_W_AQRL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// SC.W.RL rd, rs2, rs1_amo
+	{mask: 0xfe00707f, value: 0x1a00202f, op: SC_W_RL, args: argTypeList{arg_rd, arg_rs2, arg_rs1_amo}},
+	// SD rs2, rs1_store
+	{mask: 0x0000707f, value: 0x00003023, op: SD, args: argTypeList{arg_rs2, arg_rs1_store}},
+	// SEXT.B rd, rs1
+	{mask: 0xfff0707f, value: 0x60401013, op: SEXT_B, args: argTypeList{arg_rd, arg_rs1}},
+	// SEXT.H rd, rs1
+	{mask: 0xfff0707f, value: 0x60501013, op: SEXT_H, args: argTypeList{arg_rd, arg_rs1}},
+	// SH rs2, rs1_store
+	{mask: 0x0000707f, value: 0x00001023, op: SH, args: argTypeList{arg_rs2, arg_rs1_store}},
+	// SH1ADD rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x20002033, op: SH1ADD, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SH1ADD.UW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x2000203b, op: SH1ADD_UW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SH2ADD rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x20004033, op: SH2ADD, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SH2ADD.UW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x2000403b, op: SH2ADD_UW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SH3ADD rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x20006033, op: SH3ADD, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SH3ADD.UW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x2000603b, op: SH3ADD_UW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SLL rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x00001033, op: SLL, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SLLI rd, rs1, shamt6
+	{mask: 0xfc00707f, value: 0x00001013, op: SLLI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}},
+	// SLLIW rd, rs1, shamt5
+	{mask: 0xfe00707f, value: 0x0000101b, op: SLLIW, args: argTypeList{arg_rd, arg_rs1, arg_shamt5}},
+	// SLLI.UW rd, rs1, shamt6
+	{mask: 0xfc00707f, value: 0x0800101b, op: SLLI_UW, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}},
+	// SLLW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0000103b, op: SLLW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SLT rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x00002033, op: SLT, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SLTI rd, rs1, imm12
+	{mask: 0x0000707f, value: 0x00002013, op: SLTI, args: argTypeList{arg_rd, arg_rs1, arg_imm12}},
+	// SLTIU rd, rs1, imm12
+	{mask: 0x0000707f, value: 0x00003013, op: SLTIU, args: argTypeList{arg_rd, arg_rs1, arg_imm12}},
+	// SLTU rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x00003033, op: SLTU, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SRA rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x40005033, op: SRA, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SRAI rd, rs1, shamt6
+	{mask: 0xfc00707f, value: 0x40005013, op: SRAI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}},
+	// SRAIW rd, rs1, shamt5
+	{mask: 0xfe00707f, value: 0x4000501b, op: SRAIW, args: argTypeList{arg_rd, arg_rs1, arg_shamt5}},
+	// SRAW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x4000503b, op: SRAW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SRL rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x00005033, op: SRL, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SRLI rd, rs1, shamt6
+	{mask: 0xfc00707f, value: 0x00005013, op: SRLI, args: argTypeList{arg_rd, arg_rs1, arg_shamt6}},
+	// SRLIW rd, rs1, shamt5
+	{mask: 0xfe00707f, value: 0x0000501b, op: SRLIW, args: argTypeList{arg_rd, arg_rs1, arg_shamt5}},
+	// SRLW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x0000503b, op: SRLW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SUB rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x40000033, op: SUB, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SUBW rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x4000003b, op: SUBW, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// SW rs2, rs1_store
+	{mask: 0x0000707f, value: 0x00002023, op: SW, args: argTypeList{arg_rs2, arg_rs1_store}},
+	// XNOR rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x40004033, op: XNOR, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// XOR rd, rs1, rs2
+	{mask: 0xfe00707f, value: 0x00004033, op: XOR, args: argTypeList{arg_rd, arg_rs1, arg_rs2}},
+	// XORI rd, rs1, imm12
+	{mask: 0x0000707f, value: 0x00004013, op: XORI, args: argTypeList{arg_rd, arg_rs1, arg_imm12}},
+	// ZEXT.H rd, rs1
+	{mask: 0xfff0707f, value: 0x0800403b, op: ZEXT_H, args: argTypeList{arg_rd, arg_rs1}},
+}
diff --git a/riscv64/riscv64spec/spec.go b/riscv64/riscv64spec/spec.go
index 53c0f1de..55c498a0 100644
--- a/riscv64/riscv64spec/spec.go
+++ b/riscv64/riscv64spec/spec.go
@@ -51,7 +51,7 @@ var extensions = []string{
 }
 
 const (
-	prologueSec    = "// Generated by riscv64spec riscv-opcodes\n// DO NOT EDIT\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage riscv64asm\n\n"
+	prologueSec    = "// Code generated by riscv64spec riscv-opcodes\n// DO NOT EDIT\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage riscv64asm\n\n"
 	opSec          = "const (\n\t_ Op = iota\n"
 	opstrSec       = "var opstr = [...]string{\n"
 	instFormatsSec = "var instFormats = [...]instFormat{\n"
@@ -175,7 +175,7 @@ func genInst(words []string) {
 
 	var value uint32
 	var mask uint32
-	var instArgs []string
+	var argTypeList []string
 
 	for i := 1; i < len(words); i++ {
 		if strings.Contains(words[i], "=") {
@@ -188,13 +188,13 @@ func genInst(words []string) {
 			value |= subval
 			mask |= submsk
 		} else if len(words[i]) > 0 {
-			instArgs = append(instArgs, words[i])
+			argTypeList = append(argTypeList, words[i])
 		}
 	}
 
-	instArgsStr := inferFormats(instArgs, op)
+	instArgsStr := inferFormats(argTypeList, op)
 	instFormatComment := "// " + strings.Replace(op, "_", ".", -1) + " " + strings.Replace(instArgsStr, "arg_", "", -1)
-	instFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: instArgs{%s}},", mask, value, op, instArgsStr)
+	instFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: argTypeList{%s}},", mask, value, op, instArgsStr)
 
 	// Handle the suffix of atomic instruction.
 	if isAtomic(op) {
@@ -206,7 +206,7 @@ func genInst(words []string) {
 			avalue := value | (uint32(i) << 25)
 			amask := mask | 0x06000000
 			ainstFormatComment := "// " + strings.Replace(aop, "_", ".", -1) + " " + strings.Replace(instArgsStr, "arg_", "", -1)
-			ainstFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: instArgs{%s}},", amask, avalue, aop, instArgsStr)
+			ainstFormat := fmt.Sprintf("{mask: %#08x, value: %#08x, op: %s, args: argTypeList{%s}},", amask, avalue, aop, instArgsStr)
 			ops = append(ops, aop)
 			opstrs[aop] = aopstr
 			instFormats[aop] = ainstFormat
@@ -227,7 +227,7 @@ func genInst(words []string) {
 // U-Type (inst rd, imm),
 // SB-Type (inst rs1, rs2, offset)
 // S-Type (inst rs2, offset(rs1))
-func inferFormats(instArgs []string, op string) string {
+func inferFormats(argTypeList []string, op string) string {
 	switch {
 	case strings.Contains(op, "AMO") || strings.Contains(op, "SC_"):
 		return "arg_rd, arg_rs2, arg_rs1_amo"
@@ -265,7 +265,7 @@ func inferFormats(instArgs []string, op string) string {
 
 	default:
 		var instStr []string
-		for _, arg := range instArgs {
+		for _, arg := range argTypeList {
 			if decodeArgs(arg, op) != "" {
 				instStr = append(instStr, decodeArgs(arg, op))
 			}

From bc8e2b9ab6c676219fde0a71f3cdfc902daf0204 Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Wed, 29 May 2024 16:57:21 -0500
Subject: [PATCH 032/200] ppc64/ppc64asm: speed up PPC64 instruction decoding

It's really slow to iterate every instruction until a match is found.
This turns decoding PPC64 binaries into a seemingly quick operation
instead of a seconds long process for go toolchain sized binaries.

Use the primary opcode to map each instruction into a list of viable
masks, and group instructions with identical masks into a map to
speed up decoding.

Change-Id: Id0d0eefbb77244c379832d8a602662e551a7568a
Reviewed-on: https://go-review.googlesource.com/c/arch/+/602717
Reviewed-by: Archana Ravindar <aravinda@redhat.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
---
 ppc64/ppc64asm/decode.go | 50 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/ppc64/ppc64asm/decode.go b/ppc64/ppc64asm/decode.go
index b8d857c6..6c25c5c3 100644
--- a/ppc64/ppc64asm/decode.go
+++ b/ppc64/ppc64asm/decode.go
@@ -8,6 +8,8 @@ import (
 	"encoding/binary"
 	"fmt"
 	"log"
+	"sort"
+	"sync"
 )
 
 const debugDecode = false
@@ -111,6 +113,47 @@ const (
 	TypeLast                 // must be the last one
 )
 
+type InstMaskMap struct {
+	mask uint64
+	insn map[uint64]*instFormat
+}
+
+// Note, plxv/pstxv have a 5 bit opcode in the second instruction word. Only match the most significant 5 of 6 bits of the second primary opcode.
+const lookupOpcodeMask = uint64(0xFC000000F8000000)
+
+// Three level lookup for any instruction:
+//  1. Primary opcode map to a list of secondary opcode maps.
+//  2. A list of opcodes with distinct masks, sorted by largest to smallest mask.
+//  3. A map to a specific opcodes with a given mask.
+var getLookupMap = sync.OnceValue(func() map[uint64][]InstMaskMap {
+	lMap := make(map[uint64][]InstMaskMap)
+	for idx, _ := range instFormats {
+		i := &instFormats[idx]
+		pop := i.Value & lookupOpcodeMask
+		var me *InstMaskMap
+		masks := lMap[pop]
+		for im, m := range masks {
+			if m.mask == i.Mask {
+				me = &masks[im]
+				break
+			}
+		}
+		if me == nil {
+			me = &InstMaskMap{i.Mask, map[uint64]*instFormat{}}
+			masks = append(masks, *me)
+		}
+		me.insn[i.Value] = i
+		lMap[pop] = masks
+	}
+	// Reverse sort masks to ensure extended mnemonics match before more generic forms of an opcode (e.x nop over ori 0,0,0)
+	for _, v := range lMap {
+		sort.Slice(v, func(i, j int) bool {
+			return v[i].mask > v[j].mask
+		})
+	}
+	return lMap
+})
+
 func (t ArgType) String() string {
 	switch t {
 	default:
@@ -191,10 +234,13 @@ func Decode(src []byte, ord binary.ByteOrder) (inst Inst, err error) {
 		ui |= uint64(ui_extn[1])
 		inst.SuffixEnc = ui_extn[1]
 	}
-	for i, iform := range instFormats {
-		if ui&iform.Mask != iform.Value {
+
+	fmts := getLookupMap()[ui&lookupOpcodeMask]
+	for i, masks := range fmts {
+		if _, fnd := masks.insn[masks.mask&ui]; !fnd {
 			continue
 		}
+		iform := masks.insn[masks.mask&ui]
 		if ui&iform.DontCare != 0 {
 			if debugDecode {
 				log.Printf("Decode(%#x): unused bit is 1 for Op %s", ui, iform.Op)

From 76fb3b0a5d488e2e7a5fbbc6fef71483b973c723 Mon Sep 17 00:00:00 2001
From: Srinivas Pokala <Pokala.Srinivas@ibm.com>
Date: Tue, 15 Oct 2024 11:12:59 +0200
Subject: [PATCH 033/200] s390x: fix decoding several gnu instructions

Some of the GNU instructions decoding incorrectly due to incorrect
Immediate field type and added testcases for the same.
Also, done the code clean up and reformat/Indentation.

Change-Id: I21df05c240d918f9de48b825123c7eaa2181c259
Reviewed-on: https://go-review.googlesource.com/c/arch/+/620335
Reviewed-by: Vishwanatha HD <vishwanatha.hd@ibm.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 s390x/s390xasm/field.go                      |   51 -
 s390x/s390xasm/gnu.go                        |   28 +-
 s390x/s390xasm/inst.go                       |   62 +-
 s390x/s390xasm/tables.go                     |   75 +-
 s390x/s390xasm/testdata/decode_generated.txt | 1186 +++++++++---------
 s390x/s390xmap/map.go                        |    8 +-
 6 files changed, 697 insertions(+), 713 deletions(-)

diff --git a/s390x/s390xasm/field.go b/s390x/s390xasm/field.go
index e00415fc..29adc821 100644
--- a/s390x/s390xasm/field.go
+++ b/s390x/s390xasm/field.go
@@ -6,7 +6,6 @@ package s390xasm
 
 import (
 	"fmt"
-	"strings"
 )
 
 // A BitField is a bit-field in a 64-bit double word.
@@ -46,53 +45,3 @@ func (b BitField) ParseSigned(i uint64) int64 {
 	u := int64(b.Parse(i))
 	return u << (64 - b.Bits) >> (64 - b.Bits)
 }
-
-// BitFields is a series of BitFields representing a single number.
-type BitFields []BitField
-
-func (bs BitFields) String() string {
-	ss := make([]string, len(bs))
-	for i, bf := range bs {
-		ss[i] = bf.String()
-	}
-	return fmt.Sprintf("<%s>", strings.Join(ss, "|"))
-}
-
-func (bs *BitFields) Append(b BitField) {
-	*bs = append(*bs, b)
-}
-
-// parse extracts the bitfields from i, concatenate them and return the result
-// as an unsigned integer and the total length of all the bitfields.
-// parse will panic if any bitfield in b is invalid, but it doesn't check if
-// the sequence of bitfields is reasonable.
-func (bs BitFields) parse(i uint64) (u uint64, Bits uint8) {
-	for _, b := range bs {
-		u = (u << b.Bits) | uint64(b.Parse(i))
-		Bits += b.Bits
-	}
-	return u, Bits
-}
-
-// Parse extracts the bitfields from i, concatenate them and return the result
-// as an unsigned integer. Parse will panic if any bitfield in b is invalid.
-func (bs BitFields) Parse(i uint64) uint64 {
-	u, _ := bs.parse(i)
-	return u
-}
-
-// ParseSigned extracts the bitfields from i, concatenate them and return the result
-// as a signed integer. Parse will panic if any bitfield in b is invalid.
-func (bs BitFields) ParseSigned(i uint64) int64 {
-	u, l := bs.parse(i)
-	return int64(u) << (64 - l) >> (64 - l)
-}
-
-// Count the number of bits in the aggregate BitFields
-func (bs BitFields) NumBits() int {
-	num := 0
-	for _, b := range bs {
-		num += int(b.Bits)
-	}
-	return num
-}
diff --git a/s390x/s390xasm/gnu.go b/s390x/s390xasm/gnu.go
index 5755b354..71b9f138 100644
--- a/s390x/s390xasm/gnu.go
+++ b/s390x/s390xasm/gnu.go
@@ -280,6 +280,7 @@ func HandleExtndMnemonic(inst *Inst) string {
 		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcesb"},
 		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcesbs"},
 		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 0, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcedb"},
+		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 0, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "vfcedbs"},
 		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcesb"},
 		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 2, Value2: 8, Value3: 1, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcesbs"},
 		typ5ExtndMnics{BaseOpStr: "vfce", Value1: 3, Value2: 8, Value3: 0, Offset1: 3, Offset2: 4, Offset3: 5, ExtnOpStr: "wfcedb"},
@@ -453,8 +454,7 @@ func HandleExtndMnemonic(inst *Inst) string {
 	case "vavg", "vavgl", "verllv", "veslv", "vesrav", "vesrlv", "vgfm", "vgm", "vmx", "vmxl", "vmrh", "vmrl", "vmn", "vmnl", "vrep",
 		"vclz", "vctz", "vec", "vecl", "vlc", "vlp", "vpopct", "vrepi", "verim", "verll", "vesl", "vesra", "vesrl", "vgfma", "vlrep",
 		"vlgv", "vlvg", "vlbrrep", "vler", "vlbr", "vstbr", "vster", "vpk", "vme", "vmh", "vmle", "vmlh", "vmlo", "vml", "vmo", "vmae",
-		"vmale", "vmalo", "vmal", "vmah", "vmalh", "vmao", "vmph", "vmplh", "vupl", "vupll", "vscbi", "vs", "vsum", "vsumg", "vsumq",
-		"va", "vacc":
+		"vmale", "vmalo", "vmal", "vmah", "vmalh", "vmao", "vmph", "vmplh", "vupl", "vupll", "vscbi", "vs", "vsum", "vsumg", "vsumq", "va", "vacc":
 
 		switch opString {
 
@@ -569,16 +569,18 @@ func HandleExtndMnemonic(inst *Inst) string {
 					break
 				}
 			}
-		case "vsum", "vsumg":
-			for i := 1; i < len(vecInstrExtndMnics)-4; i++ {
-				if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value {
-					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
-					removeArg(inst, int8(vecInstrExtndMnics[i].Offset))
-					break
-				}
+		case "vsum", "vsumg", "vsumq":
+			var off int
+			switch opString {
+			case "vsum":
+				off = 0
+			case "vsumg":
+				off = 1
+			case "vsumq":
+				off = 2
+
 			}
-		case "vsumq":
-			for i := 2; i < len(vecInstrExtndMnics)-2; i++ {
+			for i := off; i < len(vecInstrExtndMnics)-4+off; i++ {
 				if uint8(inst.Args[vecInstrExtndMnics[i].Offset].(Mask)) == vecInstrExtndMnics[i].Value {
 					newOpStr = opString + vecInstrExtndMnics[i].ExtnOpStr
 					removeArg(inst, int8(vecInstrExtndMnics[i].Offset))
@@ -668,8 +670,8 @@ func HandleExtndMnemonic(inst *Inst) string {
 
 	case "vac", "vaccc":
 		if uint8(inst.Args[4].(Mask)) == uint8(4) {
-			newOpStr = opString + vecInstrExtndMnics[3].ExtnOpStr
-			removeArg(inst, int8(3))
+			newOpStr = opString + vecInstrExtndMnics[4].ExtnOpStr
+			removeArg(inst, int8(4))
 		}
 
 	case "vceq", "vch", "vchl":
diff --git a/s390x/s390xasm/inst.go b/s390x/s390xasm/inst.go
index 19d70156..e1fde847 100644
--- a/s390x/s390xasm/inst.go
+++ b/s390x/s390xasm/inst.go
@@ -12,9 +12,9 @@ import (
 
 type Inst struct {
 	Op   Op     // Opcode mnemonic
-	Enc  uint64 // Raw encoding bits (if Len == 8, this is the prefix word)
+	Enc  uint64 // Raw encoding bits
 	Len  int    // Length of encoding in bytes.
-	Args Args   // Instruction arguments, in Power ISA manual order.
+	Args Args   // Instruction arguments, in s390x ISA manual order.
 }
 
 func (i Inst) String(pc uint64) string {
@@ -26,19 +26,32 @@ func (i Inst) String(pc uint64) string {
 	}
 	mnemonic := HandleExtndMnemonic(&i)
 	buf.WriteString(fmt.Sprintf("%s", mnemonic))
-	for j, arg := range i.Args {
-		if arg == nil {
+	for j := 0; j < len(i.Args); j++ {
+		if i.Args[j] == nil {
 			break
 		}
+		str := i.Args[j].String(pc)
 		if j == 0 {
 			buf.WriteString(" ")
 		} else {
-			switch arg.(type) {
-			case VReg, Reg:
+			switch i.Args[j].(type) {
+			case VReg:
 				if _, ok := i.Args[j-1].(Disp12); ok {
-					buf.WriteString("")
+					buf.WriteString("(")
 				} else if _, ok := i.Args[j-1].(Disp20); ok {
-					buf.WriteString("")
+					buf.WriteString("(")
+				} else {
+					buf.WriteString(",")
+				}
+			case Reg:
+				if _, ok := i.Args[j-1].(Disp12); ok {
+					if str != "" {
+						buf.WriteString("(")
+					}
+				} else if _, ok := i.Args[j-1].(Disp20); ok {
+					if str != "" {
+						buf.WriteString("(")
+					}
 				} else {
 					buf.WriteString(",")
 				}
@@ -47,13 +60,34 @@ func (i Inst) String(pc uint64) string {
 					buf.WriteString(",")
 				} else if _, ok := i.Args[j-1].(Reg); ok {
 					buf.WriteString(",")
+				} else if _, ok := i.Args[j-1].(Disp12); ok {
+					if str != "" {
+						buf.WriteString("(")
+					}
+				} else if _, ok := i.Args[j-1].(Disp20); ok {
+					if str != "" {
+						buf.WriteString("(")
+					}
+				} else if _, ok := i.Args[j-1].(Len); ok {
+					buf.WriteString(",")
+				} else if _, ok := i.Args[j-1].(Index); ok {
+					if ((i.Args[j-1].String(pc)) != "") && str != "" {
+						str = "," + str
+					} else if str == "" {
+						str = ")"
+					}
 				}
 			case Index, Len:
+				if str != "" || (i.Args[j+1].String(pc)) != "" {
+					buf.WriteString("(")
+				} else {
+					j = j + 1
+				}
 			default:
 				buf.WriteString(",")
 			}
 		}
-		buf.WriteString(arg.String(pc))
+		buf.WriteString(str)
 		if rxb_check && i.Args[j+2] == nil {
 			break
 		}
@@ -145,7 +179,7 @@ func (r Index) String(pc uint64) string {
 	switch {
 	case X1 <= r && r <= X15:
 		s := "%"
-		return fmt.Sprintf("%sr%d,", s, int(r-X0))
+		return fmt.Sprintf("%sr%d", s, int(r-X0))
 	case X0 == r:
 		return fmt.Sprintf("")
 	default:
@@ -159,9 +193,9 @@ type Disp20 uint32
 func (Disp20) IsArg() {}
 func (r Disp20) String(pc uint64) string {
 	if (r>>19)&0x01 == 1 {
-		return fmt.Sprintf("%d(", int32(r|0xfff<<20))
+		return fmt.Sprintf("%d", int32(r|0xfff<<20))
 	} else {
-		return fmt.Sprintf("%d(", int32(r))
+		return fmt.Sprintf("%d", int32(r))
 	}
 }
 
@@ -170,7 +204,7 @@ type Disp12 uint16
 
 func (Disp12) IsArg() {}
 func (r Disp12) String(pc uint64) string {
-	return fmt.Sprintf("%d(", r)
+	return fmt.Sprintf("%d", r)
 }
 
 // RegIm12 represents an 12-bit Register immediate number.
@@ -395,5 +429,5 @@ type Len uint8
 
 func (Len) IsArg() {}
 func (i Len) String(pc uint64) string {
-	return fmt.Sprintf("%d,", uint16(i)+1)
+	return fmt.Sprintf("%d", uint16(i)+1)
 }
diff --git a/s390x/s390xasm/tables.go b/s390x/s390xasm/tables.go
index f0db5e90..5a66c1fe 100644
--- a/s390x/s390xasm/tables.go
+++ b/s390x/s390xasm/tables.go
@@ -2518,8 +2518,6 @@ var (
 	ap_ImmUnsigned_16_47   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 32}}
 	ap_FPReg_12_15         = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{12, 4}}
 	ap_Len_8_15            = &argField{Type: TypeLen, flags: 0x10, BitField: BitField{8, 8}}
-	ap_ImmUnsigned_8_15    = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{8, 8}}
-	ap_ImmUnsigned_16_31   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 16}}
 	ap_Mask_8_11           = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{8, 4}}
 	ap_RegImSigned16_32_47 = &argField{Type: TypeRegImSigned16, flags: 0x80, BitField: BitField{32, 16}}
 	ap_RegImSigned12_12_23 = &argField{Type: TypeRegImSigned12, flags: 0x80, BitField: BitField{12, 12}}
@@ -2531,8 +2529,10 @@ var (
 	ap_ImmSigned16_32_47   = &argField{Type: TypeImmSigned16, flags: 0x0, BitField: BitField{32, 16}}
 	ap_ImmSigned8_32_39    = &argField{Type: TypeImmSigned8, flags: 0x0, BitField: BitField{32, 8}}
 	ap_Mask_12_15          = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{12, 4}}
+	ap_ImmUnsigned_8_15    = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{8, 8}}
 	ap_ImmUnsigned_32_47   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{32, 16}}
 	ap_ImmUnsigned_32_39   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{32, 8}}
+	ap_ImmUnsigned_16_31   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 16}}
 	ap_FPReg_32_35         = &argField{Type: TypeFPReg, flags: 0x2, BitField: BitField{32, 4}}
 	ap_Mask_36_39          = &argField{Type: TypeMask, flags: 0x800, BitField: BitField{36, 4}}
 	ap_ACReg_24_27         = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{24, 4}}
@@ -2546,6 +2546,7 @@ var (
 	ap_ACReg_12_15         = &argField{Type: TypeACReg, flags: 0x3, BitField: BitField{12, 4}}
 	ap_CReg_8_11           = &argField{Type: TypeCReg, flags: 0x4, BitField: BitField{8, 4}}
 	ap_CReg_12_15          = &argField{Type: TypeCReg, flags: 0x4, BitField: BitField{12, 4}}
+	ap_ImmSigned32_16_31   = &argField{Type: TypeImmSigned32, flags: 0x0, BitField: BitField{16, 16}}
 	ap_ImmUnsigned_24_27   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{24, 4}}
 	ap_ImmUnsigned_28_31   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{28, 4}}
 	ap_ImmUnsigned_16_23   = &argField{Type: TypeImmUnsigned, flags: 0x0, BitField: BitField{16, 8}}
@@ -2706,21 +2707,21 @@ var instFormats = [...]instFormat{
 	{NC, 0xff00000000000000, 0xd400000000000000, 0x0, // AND (character) (NC D1(L1,B1),D2(B2))
 		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
 	{NI, 0xff00000000000000, 0x9400000000000000, 0x0, // AND (immediate) (NI D1(B1),I2)
-		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned8_8_15}},
 	{NIY, 0xff00000000ff0000, 0xeb00000000540000, 0x0, // AND (immediate) (NIY D1(B1),I2)
-		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}},
 	{NIHH, 0xff0f000000000000, 0xa504000000000000, 0x0, // AND IMMEDIATE (high high) (NIHH R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{NIHL, 0xff0f000000000000, 0xa505000000000000, 0x0, // AND IMMEDIATE (high low) (NIHL R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{NIHF, 0xff0f000000000000, 0xc00a000000000000, 0x0, // AND IMMEDIATE (high) (NIHF R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{NILH, 0xff0f000000000000, 0xa506000000000000, 0x0, // AND IMMEDIATE (low high) (NILH R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{NILL, 0xff0f000000000000, 0xa507000000000000, 0x0, // AND IMMEDIATE (low low) (NILL R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{NILF, 0xff0f000000000000, 0xc00b000000000000, 0x0, // AND IMMEDIATE (low) (NILF R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{NCRK, 0xffff000000000000, 0xb9f5000000000000, 0xf0000000000, // AND WITH COMPLEMENT(32) (NCRK R1,R2,R3)
 		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
 	{NCGRK, 0xffff000000000000, 0xb9e5000000000000, 0xf0000000000, // AND WITH COMPLEMENT(64) (NCGRK R1,R2,R3)
@@ -3338,13 +3339,13 @@ var instFormats = [...]instFormat{
 	{XC, 0xff00000000000000, 0xd700000000000000, 0x0, // EXCLUSIVE OR (character) (XC D1(L1,B1),D2(B2))
 		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
 	{XI, 0xff00000000000000, 0x9700000000000000, 0x0, // EXCLUSIVE OR (immediate) (XI D1(B1),I2)
-		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned8_8_15}},
 	{XIY, 0xff00000000ff0000, 0xeb00000000570000, 0x0, // EXCLUSIVE OR (immediate) (XIY D1(B1),I2)
-		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}},
 	{XIHF, 0xff0f000000000000, 0xc006000000000000, 0x0, // EXCLUSIVE OR IMMEDIATE (high) (XIHF R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{XILF, 0xff0f000000000000, 0xc007000000000000, 0x0, // EXCLUSIVE OR IMMEDIATE (low) (XILF R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{EX, 0xff00000000000000, 0x4400000000000000, 0x0, // EXECUTE (EX R1,D2(X2,B2))
 		[8]*argField{ap_Reg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
 	{EXRL, 0xff0f000000000000, 0xc600000000000000, 0x0, // EXECUTE RELATIVE LONG (EXRL R1,RI2)
@@ -3642,7 +3643,7 @@ var instFormats = [...]instFormat{
 	{LOCFHR, 0xffff000000000000, 0xb9e0000000000000, 0xf0000000000, // LOAD HIGH ON CONDITION (32) (LOCFHR R1,R2,M3)
 		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Mask_16_19}},
 	{LGFI, 0xff0f000000000000, 0xc001000000000000, 0x0, // LOAD IMMEDIATE (64→32) (LGFI R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{LXDB, 0xff00000000ff0000, 0xed00000000050000, 0xff000000, // LOAD LENGTHENED (long to extended BFP) (LXDB R1,D2(X2,B2))
 		[8]*argField{ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
 	{LXDBR, 0xffff000000000000, 0xb305000000000000, 0xff0000000000, // LOAD LENGTHENED (long to extended BFP) (LXDBR R1,R2)
@@ -3706,17 +3707,17 @@ var instFormats = [...]instFormat{
 	{LLGHRL, 0xff0f000000000000, 0xc406000000000000, 0x0, // LOAD LOGICAL HALFWORD RELATIVE LONG(64→16) (LLGHRL R1,RI2)
 		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
 	{LLIHH, 0xff0f000000000000, 0xa50c000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high high) (LLIHH R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{LLIHL, 0xff0f000000000000, 0xa50d000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high low) (LLIHL R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{LLIHF, 0xff0f000000000000, 0xc00e000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (high) (LLIHF R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{LLILH, 0xff0f000000000000, 0xa50e000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low high) (LLILH R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{LLILL, 0xff0f000000000000, 0xa50f000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low low) (LLILL R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{LLILF, 0xff0f000000000000, 0xc00f000000000000, 0x0, // LOAD LOGICAL IMMEDIATE (low) (LLILF R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{LLGFRL, 0xff0f000000000000, 0xc40e000000000000, 0x0, // LOAD LOGICAL RELATIVE LONG (64→32) (LLGFRL R1,RI2)
 		[8]*argField{ap_Reg_8_11, ap_RegImSigned32_16_47}},
 	{LLGT, 0xff00000000ff0000, 0xe300000000170000, 0x0, // LOAD LOGICAL THIRTY ONE BITS (64→31) (LLGT R1,D2(X2,B2))
@@ -4016,9 +4017,9 @@ var instFormats = [...]instFormat{
 	{MGH, 0xff00000000ff0000, 0xe3000000003c0000, 0x0, // MULTIPLY HALFWORD (64→16) (MGH R1,D2(X2,B2))
 		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
 	{MHI, 0xff0f000000000000, 0xa70c000000000000, 0x0, // MULTIPLY HALFWORD IMMEDIATE (32→16) (MHI R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_31}},
 	{MGHI, 0xff0f000000000000, 0xa70d000000000000, 0x0, // MULTIPLY HALFWORD IMMEDIATE (64→16) (MGHI R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_31}},
 	{MLG, 0xff00000000ff0000, 0xe300000000860000, 0x0, // MULTIPLY LOGICAL (128→64) (MLG R1,D2(X2,B2))
 		[8]*argField{ap_Reg_8_11, ap_DispSigned20_20_39, ap_IndexReg_12_15, ap_BaseReg_16_19}},
 	{MLGR, 0xffff000000000000, 0xb986000000000000, 0xff0000000000, // MULTIPLY LOGICAL (128→64) (MLGR R1,R2)
@@ -4050,9 +4051,9 @@ var instFormats = [...]instFormat{
 	{MSGFR, 0xffff000000000000, 0xb91c000000000000, 0xff0000000000, // MULTIPLY SINGLE (64←32) (MSGFR R1,R2)
 		[8]*argField{ap_Reg_24_27, ap_Reg_28_31}},
 	{MSFI, 0xff0f000000000000, 0xc201000000000000, 0x0, // MULTIPLY SINGLE IMMEDIATE (32) (MSFI R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{MSGFI, 0xff0f000000000000, 0xc200000000000000, 0x0, // MULTIPLY SINGLE IMMEDIATE (64←32) (MSGFI R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{MYH, 0xff00000000ff0000, 0xed000000003d0000, 0xf000000, // MULTIPLY UNNORM. (long to ext. high HFP) (MYH R1,R3,D2(X2,B2))
 		[8]*argField{ap_FPReg_32_35, ap_FPReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19}},
 	{MYHR, 0xffff000000000000, 0xb33d000000000000, 0xf0000000000, // MULTIPLY UNNORM. (long to ext. high HFP) (MYHR R1,R3,R2)
@@ -4100,21 +4101,21 @@ var instFormats = [...]instFormat{
 	{OC, 0xff00000000000000, 0xd600000000000000, 0x0, // OR (character) (OC D1(L1,B1),D2(B2))
 		[8]*argField{ap_DispUnsigned_20_31, ap_Len_8_15, ap_BaseReg_16_19, ap_DispUnsigned_36_47, ap_BaseReg_32_35}},
 	{OI, 0xff00000000000000, 0x9600000000000000, 0x0, // OR (immediate) (OI D1(B1),I2)
-		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+		[8]*argField{ap_DispUnsigned_20_31, ap_BaseReg_16_19, ap_ImmSigned8_8_15}},
 	{OIY, 0xff00000000ff0000, 0xeb00000000560000, 0x0, // OR (immediate) (OIY D1(B1),I2)
-		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmUnsigned_8_15}},
+		[8]*argField{ap_DispSigned20_20_39, ap_BaseReg_16_19, ap_ImmSigned8_8_15}},
 	{OIHH, 0xff0f000000000000, 0xa508000000000000, 0x0, // OR IMMEDIATE (high high) (OIHH R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{OIHL, 0xff0f000000000000, 0xa509000000000000, 0x0, // OR IMMEDIATE (high low) (OIHL R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{OIHF, 0xff0f000000000000, 0xc00c000000000000, 0x0, // OR IMMEDIATE (high) (OIHF R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{OILH, 0xff0f000000000000, 0xa50a000000000000, 0x0, // OR IMMEDIATE (low high) (OILH R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{OILL, 0xff0f000000000000, 0xa50b000000000000, 0x0, // OR IMMEDIATE (low low) (OILL R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_31}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned16_16_31}},
 	{OILF, 0xff0f000000000000, 0xc00d000000000000, 0x0, // OR IMMEDIATE (low) (OILF R1,I2)
-		[8]*argField{ap_Reg_8_11, ap_ImmUnsigned_16_47}},
+		[8]*argField{ap_Reg_8_11, ap_ImmSigned32_16_47}},
 	{OCRK, 0xffff000000000000, 0xb975000000000000, 0xf0000000000, // OR WITH COMPLEMENT (32) (OCRK R1,R2,R3)
 		[8]*argField{ap_Reg_24_27, ap_Reg_28_31, ap_Reg_16_19}},
 	{OCGRK, 0xffff000000000000, 0xb965000000000000, 0xf0000000000, // OR WITH COMPLEMENT (64) (OCGRK R1,R2,R3)
@@ -4830,13 +4831,13 @@ var instFormats = [...]instFormat{
 	{VLEB, 0xff00000000ff0000, 0xe700000000000000, 0x0, // VECTOR LOAD ELEMENT (8) (VLEB V1,D2(X2,B2),M3)
 		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
 	{VLEIH, 0xff00000000ff0000, 0xe700000000410000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (16) (VLEIH V1,I2,M3)
-		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+		[8]*argField{ap_VecReg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
 	{VLEIF, 0xff00000000ff0000, 0xe700000000430000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (32) (VLEIF V1,I2,M3)
-		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+		[8]*argField{ap_VecReg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
 	{VLEIG, 0xff00000000ff0000, 0xe700000000420000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (64) (VLEIG V1,I2,M3)
-		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+		[8]*argField{ap_VecReg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
 	{VLEIB, 0xff00000000ff0000, 0xe700000000400000, 0xf000000000000, // VECTOR LOAD ELEMENT IMMEDIATE (8) (VLEIB V1,I2,M3)
-		[8]*argField{ap_VecReg_8_11, ap_ImmUnsigned_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
+		[8]*argField{ap_VecReg_8_11, ap_ImmSigned16_16_31, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
 	{VLER, 0xff00000000ff0000, 0xe600000000070000, 0x0, // VECTOR LOAD ELEMENTS REVERSED (VLER V1,D2(X2,B2),M3)
 		[8]*argField{ap_VecReg_8_11, ap_DispUnsigned_20_31, ap_IndexReg_12_15, ap_BaseReg_16_19, ap_Mask_32_35, ap_ImmUnsigned_36_39}},
 	{VFI, 0xff00000000ff0000, 0xe700000000c70000, 0xff0000000000, // VECTOR LOAD FP INTEGER (VFI V1,V2,M3,M4,M5)
diff --git a/s390x/s390xasm/testdata/decode_generated.txt b/s390x/s390xasm/testdata/decode_generated.txt
index b7b3f09f..738a76e0 100644
--- a/s390x/s390xasm/testdata/decode_generated.txt
+++ b/s390x/s390xasm/testdata/decode_generated.txt
@@ -1,170 +1,170 @@
-    5a82100b|	gnu	a %r8,11(%r2,%r1) 
-        1a80|	gnu	ar %r8,%r0        
-    b9f80080|	gnu	ark %r8,%r0,%r0   
+    5a82100b|	gnu	a %r8,11(%r2,%r1)
+        1a80|	gnu	ar %r8,%r0
+    b9f80080|	gnu	ark %r8,%r0,%r0
 e382100b005a|	gnu	ay %r8,11(%r2,%r1)
 e382100b0008|	gnu	ag %r8,11(%r2,%r1)
-    b9080080|	gnu	agr %r8,%r0       
-    b9e80080|	gnu	agrk %r8,%r0,%r0  
+    b9080080|	gnu	agr %r8,%r0
+    b9e80080|	gnu	agrk %r8,%r0,%r0
 e382100b0018|	gnu	agf %r8,11(%r2,%r1)
-    b9180080|	gnu	agfr %r8,%r0      
-    b34a0080|	gnu	axbr %f8,%f0      
-    b3da0080|	gnu	axtr %f8,%f0,%f0  
+    b9180080|	gnu	agfr %r8,%r0
+    b34a0080|	gnu	axbr %f8,%f0
+    b3da0080|	gnu	axtr %f8,%f0,%f0
     b3da0180|	gnu	axtra %f8,%f0,%f0,1
 ed82100b001a|	gnu	adb %f8,11(%r2,%r1)
-    b31a0080|	gnu	adbr %f8,%f0      
-    b3d20080|	gnu	adtr %f8,%f0,%f0  
+    b31a0080|	gnu	adbr %f8,%f0
+    b3d20080|	gnu	adtr %f8,%f0,%f0
     b3d20180|	gnu	adtra %f8,%f0,%f0,1
 ed82100b000a|	gnu	aeb %f8,11(%r2,%r1)
-    b30a0080|	gnu	aebr %f8,%f0      
+    b30a0080|	gnu	aebr %f8,%f0
 fa332006100b|	gnu	ap 6(4,%r2),11(4,%r1)
     4a82100b|	gnu	ah %r8,11(%r2,%r1)
 e382100b007a|	gnu	ahy %r8,11(%r2,%r1)
 e382100b0038|	gnu	agh %r8,11(%r2,%r1)
-    a78a0008|	gnu	ahi %r8,8         
-    a78b0008|	gnu	aghi %r8,8        
-    b9c80080|	gnu	ahhhr %r8,%r0,%r0 
-    b9d80080|	gnu	ahhlr %r8,%r0,%r0 
-c28900000008|	gnu	afi %r8,8         
-ec80000800d8|	gnu	ahik %r8,%r0,8    
-eb082006006a|	gnu	asi 6(%r2),8      
-ec80000800d9|	gnu	aghik %r8,%r0,8   
-c28800000008|	gnu	agfi %r8,8        
-eb082006007a|	gnu	agsi 6(%r2),8     
-cc8800000008|	gnu	aih %r8,8         
+    a78a0008|	gnu	ahi %r8,8
+    a78b0008|	gnu	aghi %r8,8
+    b9c80080|	gnu	ahhhr %r8,%r0,%r0
+    b9d80080|	gnu	ahhlr %r8,%r0,%r0
+c28900000008|	gnu	afi %r8,8
+ec80000800d8|	gnu	ahik %r8,%r0,8
+eb082006006a|	gnu	asi 6(%r2),8
+ec80000800d9|	gnu	aghik %r8,%r0,8
+c28800000008|	gnu	agfi %r8,8
+eb082006007a|	gnu	agsi 6(%r2),8
+cc8800000008|	gnu	aih %r8,8
     5e82100b|	gnu	al %r8,11(%r2,%r1)
-        1e80|	gnu	alr %r8,%r0       
-    b9fa0080|	gnu	alrk %r8,%r0,%r0  
+        1e80|	gnu	alr %r8,%r0
+    b9fa0080|	gnu	alrk %r8,%r0,%r0
 e382100b005e|	gnu	aly %r8,11(%r2,%r1)
 e382100b000a|	gnu	alg %r8,11(%r2,%r1)
-    b90a0080|	gnu	algr %r8,%r0      
-    b9ea0080|	gnu	algrk %r8,%r0,%r0 
+    b90a0080|	gnu	algr %r8,%r0
+    b9ea0080|	gnu	algrk %r8,%r0,%r0
 e382100b001a|	gnu	algf %r8,11(%r2,%r1)
-    b91a0080|	gnu	algfr %r8,%r0     
+    b91a0080|	gnu	algfr %r8,%r0
     b9ca0080|	gnu	alhhhr %r8,%r0,%r0
     b9da0080|	gnu	alhhlr %r8,%r0,%r0
-c28b00000008|	gnu	alfi %r8,8        
-c28a00000008|	gnu	algfi %r8,8       
+c28b00000008|	gnu	alfi %r8,8
+c28a00000008|	gnu	algfi %r8,8
 e382100b0098|	gnu	alc %r8,11(%r2,%r1)
-    b9980080|	gnu	alcr %r8,%r0      
+    b9980080|	gnu	alcr %r8,%r0
 e382100b0088|	gnu	alcg %r8,11(%r2,%r1)
-    b9880080|	gnu	alcgr %r8,%r0     
-ec80000800da|	gnu	alhsik %r8,%r0,8  
-eb082006006e|	gnu	alsi 6(%r2),8     
-ec80000800db|	gnu	alghsik %r8,%r0,8 
-eb082006007e|	gnu	algsi 6(%r2),8    
-cc8a00000008|	gnu	alsih %r8,8       
-cc8b00000008|	gnu	alsihn %r8,8      
-        3680|	gnu	axr %f8,%f0       
+    b9880080|	gnu	alcgr %r8,%r0
+ec80000800da|	gnu	alhsik %r8,%r0,8
+eb082006006e|	gnu	alsi 6(%r2),8
+ec80000800db|	gnu	alghsik %r8,%r0,8
+eb082006007e|	gnu	algsi 6(%r2),8
+cc8a00000008|	gnu	alsih %r8,8
+cc8b00000008|	gnu	alsihn %r8,8
+        3680|	gnu	axr %f8,%f0
     6a82100b|	gnu	ad %f8,11(%r2,%r1)
-        2a80|	gnu	adr %f8,%f0       
+        2a80|	gnu	adr %f8,%f0
     7a82100b|	gnu	ae %f8,11(%r2,%r1)
-        3a80|	gnu	aer %f8,%f0       
+        3a80|	gnu	aer %f8,%f0
     6e82100b|	gnu	aw %f8,11(%r2,%r1)
-        2e80|	gnu	awr %f8,%f0       
+        2e80|	gnu	awr %f8,%f0
     7e82100b|	gnu	au %f8,11(%r2,%r1)
-        3e80|	gnu	aur %f8,%f0       
-    5482100b|	gnu	n %r8,11(%r2,%r1) 
-        1480|	gnu	nr %r8,%r0        
-    b9f40080|	gnu	nrk %r8,%r0,%r0   
+        3e80|	gnu	aur %f8,%f0
+    5482100b|	gnu	n %r8,11(%r2,%r1)
+        1480|	gnu	nr %r8,%r0
+    b9f40080|	gnu	nrk %r8,%r0,%r0
 e382100b0054|	gnu	ny %r8,11(%r2,%r1)
 e382100b0080|	gnu	ng %r8,11(%r2,%r1)
-    b9800080|	gnu	ngr %r8,%r0       
-    b9e40080|	gnu	ngrk %r8,%r0,%r0  
+    b9800080|	gnu	ngr %r8,%r0
+    b9e40080|	gnu	ngrk %r8,%r0,%r0
 d4032006100b|	gnu	nc 6(4,%r2),11(%r1)
-    94082006|	gnu	ni 6(%r2),8       
-eb0820060054|	gnu	niy 6(%r2),8      
-    a5840008|	gnu	nihh %r8,8        
-    a5850008|	gnu	nihl %r8,8        
-c08a00000008|	gnu	nihf %r8,8        
-    a5860008|	gnu	nilh %r8,8        
-    a5870008|	gnu	nill %r8,8        
-c08b00000008|	gnu	nilf %r8,8        
-    b9f50080|	gnu	ncrk %r8,%r0,%r0  
-    b9e50080|	gnu	ncgrk %r8,%r0,%r0 
+    94ff2006|	gnu	ni 6(%r2),-1
+ebff20060054|	gnu	niy 6(%r2),-1
+    a584fffe|	gnu	nihh %r8,-2
+    a585fffe|	gnu	nihl %r8,-2
+c08afffffffe|	gnu	nihf %r8,-2
+    a586fffe|	gnu	nilh %r8,-2
+    a587fffe|	gnu	nill %r8,-2
+c08bfffffffe|	gnu	nilf %r8,-2
+    b9f50080|	gnu	ncrk %r8,%r0,%r0
+    b9e50080|	gnu	ncgrk %r8,%r0,%r0
     4582100b|	gnu	bal %r8,11(%r2,%r1)
-        0580|	gnu	balr %r8,%r0      
+        0580|	gnu	balr %r8,%r0
     4d82100b|	gnu	bas %r8,11(%r2,%r1)
-        0d80|	gnu	basr %r8,%r0      
-        0c80|	gnu	bassm %r8,%r0     
-    b25a0080|	gnu	bsa %r8,%r0       
-        0b80|	gnu	bsm %r8,%r0       
-    b2400080|	gnu	bakr %r8,%r0      
-    b2580080|	gnu	bsg %r8,%r0       
-e372100b0047|	gnu	bine 11(%r2,%r1)  
-    4772100b|	gnu	bne 11(%r2,%r1)   
-        0770|	gnu	bner %r0          
+        0d80|	gnu	basr %r8,%r0
+        0c80|	gnu	bassm %r8,%r0
+    b25a0080|	gnu	bsa %r8,%r0
+        0b80|	gnu	bsm %r8,%r0
+    b2400080|	gnu	bakr %r8,%r0
+    b2580080|	gnu	bsg %r8,%r0
+e372100b0047|	gnu	bine 11(%r2,%r1)
+    4772100b|	gnu	bne 11(%r2,%r1)
+        0770|	gnu	bner %r0
     4682100b|	gnu	bct %r8,11(%r2,%r1)
-        0680|	gnu	bctr %r8,%r0      
+        0680|	gnu	bctr %r8,%r0
 e382100b0046|	gnu	bctg %r8,11(%r2,%r1)
-    b9460080|	gnu	bctgr %r8,%r0     
+    b9460080|	gnu	bctgr %r8,%r0
     8680100b|	gnu	bxh %r8,%r0,11(%r1)
 eb80100b0044|	gnu	bxhg %r8,%r0,11(%r1)
     8780100b|	gnu	bxle %r8,%r0,11(%r1)
 eb80100b0045|	gnu	bxleg %r8,%r0,11(%r1)
 c77060b60000|	gnu	bpp 7,0x1cc,182(%r6)
 c57000000093|	gnu	bprp 7,0x1d2,0x2f8
-    a7850000|	gnu	bras %r8,0x1d8    
-c08500000000|	gnu	brasl %r8,0x1dc   
-    a7740000|	gnu	jne 0x1e2         
-c07400000000|	gnu	jgne 0x1e6        
-    a7860000|	gnu	brct %r8,0x1ec    
-    a7870000|	gnu	brctg %r8,0x1f0   
-cc8600000000|	gnu	brcth %r8,0x1f4   
+    a7850000|	gnu	bras %r8,0x1d8
+c08500000000|	gnu	brasl %r8,0x1dc
+    a7740000|	gnu	jne 0x1e2
+c07400000000|	gnu	jgne 0x1e6
+    a7860000|	gnu	brct %r8,0x1ec
+    a7870000|	gnu	brctg %r8,0x1f0
+cc8600000000|	gnu	brcth %r8,0x1f4
     84800000|	gnu	brxh %r8,%r0,0x1fa
 ec8000000044|	gnu	brxhg %r8,%r0,0x1fe
     85800000|	gnu	brxle %r8,%r0,0x204
 ec8000000045|	gnu	brxlg %r8,%r0,0x208
-    b2760000|	gnu	xsch              
-    b2410080|	gnu	cksm %r8,%r0      
-    b92e0080|	gnu	km %r8,%r0        
-    b9290080|	gnu	kma %r8,%r0,%r0   
-    b92f0080|	gnu	kmc %r8,%r0       
-    b92a0080|	gnu	kmf %r8,%r0       
-    b92d0080|	gnu	kmctr %r8,%r0,%r0 
-    b92b0080|	gnu	kmo %r8,%r0       
-    b2300000|	gnu	csch              
-    5982100b|	gnu	c %r8,11(%r2,%r1) 
-        1980|	gnu	cr %r8,%r0        
+    b2760000|	gnu	xsch
+    b2410080|	gnu	cksm %r8,%r0
+    b92e0080|	gnu	km %r8,%r0
+    b9290080|	gnu	kma %r8,%r0,%r0
+    b92f0080|	gnu	kmc %r8,%r0
+    b92a0080|	gnu	kmf %r8,%r0
+    b92d0080|	gnu	kmctr %r8,%r0,%r0
+    b92b0080|	gnu	kmo %r8,%r0
+    b2300000|	gnu	csch
+    5982100b|	gnu	c %r8,11(%r2,%r1)
+        1980|	gnu	cr %r8,%r0
 e382100b0059|	gnu	cy %r8,11(%r2,%r1)
 e382100b0020|	gnu	cg %r8,11(%r2,%r1)
-    b9200080|	gnu	cgr %r8,%r0       
+    b9200080|	gnu	cgr %r8,%r0
 e382100b0030|	gnu	cgf %r8,11(%r2,%r1)
-    b9300080|	gnu	cgfr %r8,%r0      
-    b3490080|	gnu	cxbr %f8,%f0      
-    b3ec0080|	gnu	cxtr %f8,%f0      
-    b3690080|	gnu	cxr %f8,%f0       
+    b9300080|	gnu	cgfr %r8,%r0
+    b3490080|	gnu	cxbr %f8,%f0
+    b3ec0080|	gnu	cxtr %f8,%f0
+    b3690080|	gnu	cxr %f8,%f0
 ed82100b0019|	gnu	cdb %f8,11(%r2,%r1)
-    b3190080|	gnu	cdbr %f8,%f0      
-    b3e40080|	gnu	cdtr %f8,%f0      
+    b3190080|	gnu	cdbr %f8,%f0
+    b3e40080|	gnu	cdtr %f8,%f0
     6982100b|	gnu	cd %f8,11(%r2,%r1)
-        2980|	gnu	cdr %f8,%f0       
+        2980|	gnu	cdr %f8,%f0
 ed82100b0009|	gnu	ceb %f8,11(%r2,%r1)
-    b3090080|	gnu	cebr %f8,%f0      
+    b3090080|	gnu	cebr %f8,%f0
     7982100b|	gnu	ce %f8,11(%r2,%r1)
-        3980|	gnu	cer %f8,%f0       
+        3980|	gnu	cer %f8,%f0
 ec8080cd30f6|	gnu	crb %r8,%r0,3,205(%r8)
 ec8080cd30e4|	gnu	cgrb %r8,%r0,3,205(%r8)
 ec80ffac3076|	gnu	crj %r8,%r0,3,0x1e6
 ec80ffac3064|	gnu	cgrj %r8,%r0,3,0x1ec
-    b21a100b|	gnu	cfc 11(%r1)       
+    b21a100b|	gnu	cfc 11(%r1)
     b98f0180|	gnu	crdte %r8,%r0,%r0,1
-    b3480080|	gnu	kxbr %f8,%f0      
-    b3e80080|	gnu	kxtr %f8,%f0      
+    b3480080|	gnu	kxbr %f8,%f0
+    b3e80080|	gnu	kxtr %f8,%f0
 ed82100b0018|	gnu	kdb %f8,11(%r2,%r1)
-    b3180080|	gnu	kdbr %f8,%f0      
-    b3e00080|	gnu	kdtr %f8,%f0      
+    b3180080|	gnu	kdbr %f8,%f0
+    b3e00080|	gnu	kdtr %f8,%f0
 ed82100b0008|	gnu	keb %f8,11(%r2,%r1)
-    b3080080|	gnu	kebr %f8,%f0      
+    b3080080|	gnu	kebr %f8,%f0
     ba80100b|	gnu	cs %r8,%r0,11(%r1)
 eb80100b0014|	gnu	csy %r8,%r0,11(%r1)
 eb80100b0030|	gnu	csg %r8,%r0,11(%r1)
-    b2500080|	gnu	csp %r8,%r0       
-    b98a0080|	gnu	cspg %r8,%r0      
+    b2500080|	gnu	csp %r8,%r0
+    b98a0080|	gnu	cspg %r8,%r0
 c8022006100b|	gnu	csst 6(%r2),11(%r1),%r0
-    b9723080|	gnu	crt %r8,%r0,3     
-    b9603080|	gnu	cgrt %r8,%r0,3    
-    b3fc0080|	gnu	cextr %f8,%f0     
-    b3f40080|	gnu	cedtr %f8,%f0     
+    b9723080|	gnu	crt %r8,%r0,3
+    b9603080|	gnu	cgrt %r8,%r0,3
+    b3fc0080|	gnu	cextr %f8,%f0
+    b3f40080|	gnu	cedtr %f8,%f0
 f9332006100b|	gnu	cp 6(4,%r2),11(4,%r1)
     bb80100b|	gnu	cds %r8,%r0,11(%r1)
 eb80100b0031|	gnu	cdsy %r8,%r0,11(%r1)
@@ -172,105 +172,105 @@ eb80100b003e|	gnu	cdsg %r8,%r0,11(%r1)
     4982100b|	gnu	ch %r8,11(%r2,%r1)
 e382100b0079|	gnu	chy %r8,11(%r2,%r1)
 e382100b0034|	gnu	cgh %r8,11(%r2,%r1)
-e55420060008|	gnu	chhsi 6(%r2),8    
-    a78e0008|	gnu	chi %r8,8         
-e55c20060008|	gnu	chsi 6(%r2),8     
-    a78f0008|	gnu	cghi %r8,8        
-e55820060008|	gnu	cghsi 6(%r2),8    
-c68500000000|	gnu	chrl %r8,0x330    
-c68400000000|	gnu	cghrl %r8,0x336   
+e55420060008|	gnu	chhsi 6(%r2),8
+    a78e0008|	gnu	chi %r8,8
+e55c20060008|	gnu	chsi 6(%r2),8
+    a78f0008|	gnu	cghi %r8,8
+e55820060008|	gnu	cghsi 6(%r2),8
+c68500000000|	gnu	chrl %r8,0x330
+c68400000000|	gnu	cghrl %r8,0x336
 e382100b00cd|	gnu	chf %r8,11(%r2,%r1)
-    b9cd0080|	gnu	chhr %r8,%r0      
-    b9dd0080|	gnu	chlr %r8,%r0      
-c28d00000008|	gnu	cfi %r8,8         
-c28c00000008|	gnu	cgfi %r8,8        
+    b9cd0080|	gnu	chhr %r8,%r0
+    b9dd0080|	gnu	chlr %r8,%r0
+c28d00000008|	gnu	cfi %r8,8
+c28c00000008|	gnu	cgfi %r8,8
 ec8380cd08fe|	gnu	cib %r8,8,3,205(%r8)
 ec8380cd08fc|	gnu	cgib %r8,8,3,205(%r8)
-ec83ffac087e|	gnu	cij %r8,8,3,0x2ba 
+ec83ffac087e|	gnu	cij %r8,8,3,0x2ba
 ec83ffac087c|	gnu	cgij %r8,8,3,0x2c0
-ec8000083072|	gnu	cit %r8,8,3       
-ec8000083070|	gnu	cgit %r8,8,3      
-cc8d00000008|	gnu	cih %r8,8         
+ec8000083072|	gnu	cit %r8,8,3
+ec8000083070|	gnu	cgit %r8,8,3
+cc8d00000008|	gnu	cih %r8,8
     5582100b|	gnu	cl %r8,11(%r2,%r1)
-        1580|	gnu	clr %r8,%r0       
+        1580|	gnu	clr %r8,%r0
 e382100b0055|	gnu	cly %r8,11(%r2,%r1)
 e382100b0021|	gnu	clg %r8,11(%r2,%r1)
-    b9210080|	gnu	clgr %r8,%r0      
+    b9210080|	gnu	clgr %r8,%r0
 e382100b0031|	gnu	clgf %r8,11(%r2,%r1)
-    b9310080|	gnu	clgfr %r8,%r0     
+    b9310080|	gnu	clgfr %r8,%r0
 d5032006100b|	gnu	clc 6(4,%r2),11(%r1)
-    95082006|	gnu	cli 6(%r2),8      
-eb0820060055|	gnu	cliy 6(%r2),8     
+    95082006|	gnu	cli 6(%r2),8
+eb0820060055|	gnu	cliy 6(%r2),8
 ec8080cd30f7|	gnu	clrb %r8,%r0,3,205(%r8)
 ec8080cd30e5|	gnu	clgrb %r8,%r0,3,205(%r8)
 ec80ffac3077|	gnu	clrj %r8,%r0,3,0x314
 ec80ffac3065|	gnu	clgrj %r8,%r0,3,0x31a
-    b9733080|	gnu	clrt %r8,%r0,3    
-eb83100b0023|	gnu	clt %r8,3,11(%r1) 
-    b9613080|	gnu	clgrt %r8,%r0,3   
+    b9733080|	gnu	clrt %r8,%r0,3
+eb83100b0023|	gnu	clt %r8,3,11(%r1)
+    b9613080|	gnu	clgrt %r8,%r0,3
 eb83100b002b|	gnu	clgt %r8,3,11(%r1)
 eb83100b0020|	gnu	clmh %r8,3,11(%r1)
-    bd83100b|	gnu	clm %r8,3,11(%r1) 
+    bd83100b|	gnu	clm %r8,3,11(%r1)
 eb83100b0021|	gnu	clmy %r8,3,11(%r1)
 e382100b00cf|	gnu	clhf %r8,11(%r2,%r1)
-    b9cf0080|	gnu	clhhr %r8,%r0     
-    b9df0080|	gnu	clhlr %r8,%r0     
-e55520060008|	gnu	clhhsi 6(%r2),8   
-c28f00000008|	gnu	clfi %r8,8        
-e55d20060008|	gnu	clfhsi 6(%r2),8   
-e55920060008|	gnu	clghsi 6(%r2),8   
-c28e00000008|	gnu	clgfi %r8,8       
+    b9cf0080|	gnu	clhhr %r8,%r0
+    b9df0080|	gnu	clhlr %r8,%r0
+e55520060008|	gnu	clhhsi 6(%r2),8
+c28f00000008|	gnu	clfi %r8,8
+e55d20060008|	gnu	clfhsi 6(%r2),8
+e55920060008|	gnu	clghsi 6(%r2),8
+c28e00000008|	gnu	clgfi %r8,8
 ec8380cd08ff|	gnu	clib %r8,8,3,205(%r8)
 ec8380cd08fd|	gnu	clgib %r8,8,3,205(%r8)
 ec83ffac087f|	gnu	clij %r8,8,3,0x37c
 ec83ffac087d|	gnu	clgij %r8,8,3,0x382
-ec8000083073|	gnu	clfit %r8,8,3     
-ec8000083071|	gnu	clgit %r8,8,3     
-cc8f00000008|	gnu	clih %r8,8        
-        0f80|	gnu	clcl %r8,%r0      
+ec8000083073|	gnu	clfit %r8,8,3
+ec8000083071|	gnu	clgit %r8,8,3
+cc8f00000008|	gnu	clih %r8,8
+        0f80|	gnu	clcl %r8,%r0
     a980100b|	gnu	clcle %r8,%r0,11(%r1)
 eb80100b008f|	gnu	clclu %r8,%r0,11(%r1)
-c68f00000000|	gnu	clrl %r8,0x44e    
-c68700000000|	gnu	clhrl %r8,0x454   
-c68a00000000|	gnu	clgrl %r8,0x45a   
-c68600000000|	gnu	clghrl %r8,0x460  
-c68e00000000|	gnu	clgfrl %r8,0x466  
-    b25d0080|	gnu	clst %r8,%r0      
-c68d00000000|	gnu	crl %r8,0x470     
-c68800000000|	gnu	cgrl %r8,0x476    
-c68c00000000|	gnu	cgfrl %r8,0x47c   
-    b2570080|	gnu	cuse %r8,%r0      
-    b2630080|	gnu	cmpsc %r8,%r0     
-    b93a0080|	gnu	kdsa %r8,%r0      
-    b93e0080|	gnu	kimd %r8,%r0      
-    b93f0080|	gnu	klmd %r8,%r0      
-    b91e0080|	gnu	kmac %r8,%r0      
-    b3590080|	gnu	thdr %f8,%f0      
-    b3580080|	gnu	thder %f8,%f0     
-    b3960080|	gnu	cxfbr %f8,%r0     
+c68f00000000|	gnu	clrl %r8,0x44e
+c68700000000|	gnu	clhrl %r8,0x454
+c68a00000000|	gnu	clgrl %r8,0x45a
+c68600000000|	gnu	clghrl %r8,0x460
+c68e00000000|	gnu	clgfrl %r8,0x466
+    b25d0080|	gnu	clst %r8,%r0
+c68d00000000|	gnu	crl %r8,0x470
+c68800000000|	gnu	cgrl %r8,0x476
+c68c00000000|	gnu	cgfrl %r8,0x47c
+    b2570080|	gnu	cuse %r8,%r0
+    b2630080|	gnu	cmpsc %r8,%r0
+    b93a0080|	gnu	kdsa %r8,%r0
+    b93e0080|	gnu	kimd %r8,%r0
+    b93f0080|	gnu	klmd %r8,%r0
+    b91e0080|	gnu	kmac %r8,%r0
+    b3590080|	gnu	thdr %f8,%f0
+    b3580080|	gnu	thder %f8,%f0
+    b3960080|	gnu	cxfbr %f8,%r0
     b3963180|	gnu	cxfbra %f8,3,%r0,1
-    b9593180|	gnu	cxftr %f8,3,%r0,1 
-    b3b60080|	gnu	cxfr %f8,%r0      
-    b3950080|	gnu	cdfbr %f8,%r0     
+    b9593180|	gnu	cxftr %f8,3,%r0,1
+    b3b60080|	gnu	cxfr %f8,%r0
+    b3950080|	gnu	cdfbr %f8,%r0
     b3953180|	gnu	cdfbra %f8,3,%r0,1
-    b9513180|	gnu	cdftr %f8,3,%r0,1 
-    b3b50080|	gnu	cdfr %f8,%r0      
-    b3940080|	gnu	cefbr %f8,%r0     
+    b9513180|	gnu	cdftr %f8,3,%r0,1
+    b3b50080|	gnu	cdfr %f8,%r0
+    b3940080|	gnu	cefbr %f8,%r0
     b3943180|	gnu	cefbra %f8,3,%r0,1
-    b3b40080|	gnu	cefr %f8,%r0      
-    b3a60080|	gnu	cxgbr %f8,%r0     
+    b3b40080|	gnu	cefr %f8,%r0
+    b3a60080|	gnu	cxgbr %f8,%r0
     b3a63180|	gnu	cxgbra %f8,3,%r0,1
-    b3f90080|	gnu	cxgtr %f8,%r0     
+    b3f90080|	gnu	cxgtr %f8,%r0
     b3f93180|	gnu	cxgtra %f8,3,%r0,1
-    b3c60080|	gnu	cxgr %f8,%r0      
-    b3a50080|	gnu	cdgbr %f8,%r0     
+    b3c60080|	gnu	cxgr %f8,%r0
+    b3a50080|	gnu	cdgbr %f8,%r0
     b3a53180|	gnu	cdgbra %f8,3,%r0,1
-    b3f10080|	gnu	cdgtr %f8,%r0     
+    b3f10080|	gnu	cdgtr %f8,%r0
     b3f13180|	gnu	cdgtra %f8,3,%r0,1
-    b3c50080|	gnu	cdgr %f8,%r0      
-    b3a40080|	gnu	cegbr %f8,%r0     
+    b3c50080|	gnu	cdgr %f8,%r0
+    b3a40080|	gnu	cegbr %f8,%r0
     b3a43180|	gnu	cegbra %f8,3,%r0,1
-    b3c40080|	gnu	cegr %f8,%r0      
+    b3c40080|	gnu	cegr %f8,%r0
     b3923180|	gnu	cxlfbr %f8,3,%r0,1
     b95b3180|	gnu	cxlftr %f8,3,%r0,1
     b3913180|	gnu	cdlfbr %f8,3,%r0,1
@@ -283,44 +283,44 @@ c68c00000000|	gnu	cgfrl %r8,0x47c
     b3a03180|	gnu	celgbr %f8,3,%r0,1
 ed03100b83af|	gnu	cxpt %f8,11(4,%r1),3
 ed03100b83ae|	gnu	cdpt %f8,11(4,%r1),3
-    b3fb0080|	gnu	cxstr %f8,%r0     
-    b3f30080|	gnu	cdstr %f8,%r0     
-    b3fa0080|	gnu	cxutr %f8,%r0     
-    b3f20080|	gnu	cdutr %f8,%r0     
+    b3fb0080|	gnu	cxstr %f8,%r0
+    b3f30080|	gnu	cdstr %f8,%r0
+    b3fa0080|	gnu	cxutr %f8,%r0
+    b3f20080|	gnu	cdutr %f8,%r0
 ed03100b83ab|	gnu	cxzt %f8,11(4,%r1),3
 ed03100b83aa|	gnu	cdzt %f8,11(4,%r1),3
-    b3503080|	gnu	tbedr %f8,3,%f0   
-    b3513080|	gnu	tbdr %f8,3,%f0    
+    b3503080|	gnu	tbedr %f8,3,%f0
+    b3513080|	gnu	tbdr %f8,3,%f0
     4f82100b|	gnu	cvb %r8,11(%r2,%r1)
 e382100b0006|	gnu	cvby %r8,11(%r2,%r1)
 e382100b000e|	gnu	cvbg %r8,11(%r2,%r1)
     4e82100b|	gnu	cvd %r8,11(%r2,%r1)
 e382100b0026|	gnu	cvdy %r8,11(%r2,%r1)
 e382100b002e|	gnu	cvdg %r8,11(%r2,%r1)
-    b39a3080|	gnu	cfxbr %r8,3,%f0   
+    b39a3080|	gnu	cfxbr %r8,3,%f0
     b39a3180|	gnu	cfxbra %r8,3,%f0,1
-    b3aa3080|	gnu	cgxbr %r8,3,%f0   
+    b3aa3080|	gnu	cgxbr %r8,3,%f0
     b3aa3180|	gnu	cgxbra %r8,3,%f0,1
-    b9493180|	gnu	cfxtr %r8,3,%f0,1 
-    b3e93080|	gnu	cgxtr %r8,3,%f0   
+    b9493180|	gnu	cfxtr %r8,3,%f0,1
+    b3e93080|	gnu	cgxtr %r8,3,%f0
     b3e93180|	gnu	cgxtra %r8,3,%f0,1
-    b3ba3080|	gnu	cfxr %r8,3,%f0    
-    b3ca3080|	gnu	cgxr %r8,3,%f0    
-    b3993080|	gnu	cfdbr %r8,3,%f0   
+    b3ba3080|	gnu	cfxr %r8,3,%f0
+    b3ca3080|	gnu	cgxr %r8,3,%f0
+    b3993080|	gnu	cfdbr %r8,3,%f0
     b3993180|	gnu	cfdbra %r8,3,%f0,1
-    b3a93080|	gnu	cgdbr %r8,3,%f0   
+    b3a93080|	gnu	cgdbr %r8,3,%f0
     b3a93180|	gnu	cgdbra %r8,3,%f0,1
-    b9413180|	gnu	cfdtr %r8,3,%f0,1 
-    b3e13080|	gnu	cgdtr %r8,3,%f0   
+    b9413180|	gnu	cfdtr %r8,3,%f0,1
+    b3e13080|	gnu	cgdtr %r8,3,%f0
     b3e13180|	gnu	cgdtra %r8,3,%f0,1
-    b3b93080|	gnu	cfdr %r8,3,%f0    
-    b3c93080|	gnu	cgdr %r8,3,%f0    
-    b3983080|	gnu	cfebr %r8,3,%f0   
+    b3b93080|	gnu	cfdr %r8,3,%f0
+    b3c93080|	gnu	cgdr %r8,3,%f0
+    b3983080|	gnu	cfebr %r8,3,%f0
     b3983180|	gnu	cfebra %r8,3,%f0,1
-    b3a83080|	gnu	cgebr %r8,3,%f0   
+    b3a83080|	gnu	cgebr %r8,3,%f0
     b3a83180|	gnu	cgebra %r8,3,%f0,1
-    b3b83080|	gnu	cfer %r8,3,%f0    
-    b3c83080|	gnu	cger %r8,3,%f0    
+    b3b83080|	gnu	cfer %r8,3,%f0
+    b3c83080|	gnu	cger %r8,3,%f0
     b39e3180|	gnu	clfxbr %r8,3,%f0,1
     b3ae3180|	gnu	clgxbr %r8,3,%f0,1
     b94b3180|	gnu	clfxtr %r8,3,%f0,1
@@ -333,124 +333,124 @@ e382100b002e|	gnu	cvdg %r8,11(%r2,%r1)
     b3ac3180|	gnu	clgebr %r8,3,%f0,1
 ed03100b83ad|	gnu	cpxt %f8,11(4,%r1),3
 ed03100b83ac|	gnu	cpdt %f8,11(4,%r1),3
-    b3eb0180|	gnu	csxtr %r8,%f0,1   
-    b3e30180|	gnu	csdtr %r8,%f0,1   
-    b3ea0080|	gnu	cuxtr %r8,%f0     
-    b3e20080|	gnu	cudtr %r8,%f0     
+    b3eb0180|	gnu	csxtr %r8,%f0,1
+    b3e30180|	gnu	csdtr %r8,%f0,1
+    b3ea0080|	gnu	cuxtr %r8,%f0
+    b3e20080|	gnu	cudtr %r8,%f0
 ed03100b83a9|	gnu	czxt %f8,11(4,%r1),3
 ed03100b83a8|	gnu	czdt %f8,11(4,%r1),3
-    b2a63080|	gnu	cu21 %r8,%r0,3    
-    b9b13080|	gnu	cu24 %r8,%r0,3    
-    b2a63080|	gnu	cu21 %r8,%r0,3    
-    b2a73080|	gnu	cu12 %r8,%r0,3    
-    b2a73080|	gnu	cu12 %r8,%r0,3    
-    b9b03080|	gnu	cu14 %r8,%r0,3    
-    b9b30080|	gnu	cu42 %r8,%r0      
-    b9b20080|	gnu	cu41 %r8,%r0      
-    b24d0080|	gnu	cpya %a8,%a0      
-    b3720080|	gnu	cpsdr %f8,%f0,%f0 
+    b2a63080|	gnu	cu21 %r8,%r0,3
+    b9b13080|	gnu	cu24 %r8,%r0,3
+    b2a63080|	gnu	cu21 %r8,%r0,3
+    b2a73080|	gnu	cu12 %r8,%r0,3
+    b2a73080|	gnu	cu12 %r8,%r0,3
+    b9b03080|	gnu	cu14 %r8,%r0,3
+    b9b30080|	gnu	cu42 %r8,%r0
+    b9b20080|	gnu	cu41 %r8,%r0
+    b24d0080|	gnu	cpya %a8,%a0
+    b3720080|	gnu	cpsdr %f8,%f0,%f0
 e6235000087c|	gnu	vscshp %v18,%v3,%v5
 e62350901874|	gnu	vschp %v18,%v3,%v5,1,9
     b9390080|	gnu	dfltcc %r8,%r0,%r0
-    5d82100b|	gnu	d %r8,11(%r2,%r1) 
-        1d80|	gnu	dr %r8,%r0        
-    b34d0080|	gnu	dxbr %f8,%f0      
-    b3d90080|	gnu	dxtr %f8,%f0,%f0  
+    5d82100b|	gnu	d %r8,11(%r2,%r1)
+        1d80|	gnu	dr %r8,%r0
+    b34d0080|	gnu	dxbr %f8,%f0
+    b3d90080|	gnu	dxtr %f8,%f0,%f0
     b3d90180|	gnu	dxtra %f8,%f0,%f0,1
-    b22d0080|	gnu	dxr %f8,%f0       
+    b22d0080|	gnu	dxr %f8,%f0
 ed82100b001d|	gnu	ddb %f8,11(%r2,%r1)
-    b31d0080|	gnu	ddbr %f8,%f0      
-    b3d10080|	gnu	ddtr %f8,%f0,%f0  
+    b31d0080|	gnu	ddbr %f8,%f0
+    b3d10080|	gnu	ddtr %f8,%f0,%f0
     b3d10180|	gnu	ddtra %f8,%f0,%f0,1
     6d82100b|	gnu	dd %f8,11(%r2,%r1)
-        2d80|	gnu	ddr %f8,%f0       
+        2d80|	gnu	ddr %f8,%f0
 ed82100b000d|	gnu	deb %f8,11(%r2,%r1)
-    b30d0080|	gnu	debr %f8,%f0      
+    b30d0080|	gnu	debr %f8,%f0
     7d82100b|	gnu	de %f8,11(%r2,%r1)
-        3d80|	gnu	der %f8,%f0       
+        3d80|	gnu	der %f8,%f0
 fd332006100b|	gnu	dp 6(4,%r2),11(4,%r1)
 e382100b0097|	gnu	dl %r8,11(%r2,%r1)
-    b9970080|	gnu	dlr %r8,%r0       
+    b9970080|	gnu	dlr %r8,%r0
 e382100b0087|	gnu	dlg %r8,11(%r2,%r1)
-    b9870080|	gnu	dlgr %r8,%r0      
+    b9870080|	gnu	dlgr %r8,%r0
 e382100b000d|	gnu	dsg %r8,11(%r2,%r1)
-    b90d0080|	gnu	dsgr %r8,%r0      
+    b90d0080|	gnu	dsgr %r8,%r0
 e382100b001d|	gnu	dsgf %r8,11(%r2,%r1)
-    b91d0080|	gnu	dsgfr %r8,%r0     
+    b91d0080|	gnu	dsgfr %r8,%r0
     b35b0180|	gnu	didbr %f8,%f0,%f0,1
     b3530180|	gnu	diebr %f8,%f0,%f0,1
 de032006100b|	gnu	ed 6(4,%r2),11(%r1)
 df032006100b|	gnu	edmk 6(4,%r2),11(%r1)
-    5782100b|	gnu	x %r8,11(%r2,%r1) 
-        1780|	gnu	xr %r8,%r0        
-    b9f70080|	gnu	xrk %r8,%r0,%r0   
+    5782100b|	gnu	x %r8,11(%r2,%r1)
+        1780|	gnu	xr %r8,%r0
+    b9f70080|	gnu	xrk %r8,%r0,%r0
 e382100b0057|	gnu	xy %r8,11(%r2,%r1)
 e382100b0082|	gnu	xg %r8,11(%r2,%r1)
-    b9820080|	gnu	xgr %r8,%r0       
-    b9e70080|	gnu	xgrk %r8,%r0,%r0  
+    b9820080|	gnu	xgr %r8,%r0
+    b9e70080|	gnu	xgrk %r8,%r0,%r0
 d7032006100b|	gnu	xc 6(4,%r2),11(%r1)
-    97082006|	gnu	xi 6(%r2),8       
-eb0820060057|	gnu	xiy 6(%r2),8      
-c08600000008|	gnu	xihf %r8,8        
-c08700000008|	gnu	xilf %r8,8        
+    97ff2006|	gnu	xi 6(%r2),-1
+ebff20060057|	gnu	xiy 6(%r2),-1
+c086ffffffff|	gnu	xihf %r8,-1
+c087ffffffff|	gnu	xilf %r8,-1
     4482100b|	gnu	ex %r8,11(%r2,%r1)
-c68000000000|	gnu	exrl %r8,0x720    
-    b24f0080|	gnu	ear %r8,%a0       
-    b99d0080|	gnu	esea %r8          
-    b3ed0080|	gnu	eextr %r8,%f0     
-    b3e50080|	gnu	eedtr %r8,%f0     
+c68000000000|	gnu	exrl %r8,0x720
+    b24f0080|	gnu	ear %r8,%a0
+    b99d0080|	gnu	esea %r8
+    b3ed0080|	gnu	eextr %r8,%f0
+    b3e50080|	gnu	eedtr %r8,%f0
 eb80100b004c|	gnu	ecag %r8,%r0,11(%r1)
 c8012006100b|	gnu	ectg 6(%r2),11(%r1),%r0
-    b38c0080|	gnu	efpc %r8          
-    b2260080|	gnu	epar %r8          
-    b99a0080|	gnu	epair %r8         
-    b98d0080|	gnu	epsw %r8,%r0      
-    b2270080|	gnu	esar %r8          
-    b99b0080|	gnu	esair %r8         
-    b3ef0080|	gnu	esxtr %r8,%f0     
-    b3e70080|	gnu	esdtr %r8,%f0     
-    b2490080|	gnu	ereg %r8,%r0      
-    b90e0080|	gnu	eregg %r8,%r0     
-    b24a0080|	gnu	esta %r8,%r0      
-    b2ec0080|	gnu	etnd %r8          
-    b9830080|	gnu	flogr %r8,%r0     
-    b2310000|	gnu	hsch              
-        2480|	gnu	hdr %f8,%f0       
-        3480|	gnu	her %f8,%f0       
-    b2240080|	gnu	iac %r8           
-    b3fe0080|	gnu	iextr %f8,%f0,%r0 
-    b3f60080|	gnu	iedtr %f8,%f0,%r0 
+    b38c0080|	gnu	efpc %r8
+    b2260080|	gnu	epar %r8
+    b99a0080|	gnu	epair %r8
+    b98d0080|	gnu	epsw %r8,%r0
+    b2270080|	gnu	esar %r8
+    b99b0080|	gnu	esair %r8
+    b3ef0080|	gnu	esxtr %r8,%f0
+    b3e70080|	gnu	esdtr %r8,%f0
+    b2490080|	gnu	ereg %r8,%r0
+    b90e0080|	gnu	eregg %r8,%r0
+    b24a0080|	gnu	esta %r8,%r0
+    b2ec0080|	gnu	etnd %r8
+    b9830080|	gnu	flogr %r8,%r0
+    b2310000|	gnu	hsch
+        2480|	gnu	hdr %f8,%f0
+        3480|	gnu	her %f8,%f0
+    b2240080|	gnu	iac %r8
+    b3fe0080|	gnu	iextr %f8,%f0,%r0
+    b3f60080|	gnu	iedtr %f8,%f0,%r0
     4382100b|	gnu	ic %r8,11(%r2,%r1)
 e382100b0073|	gnu	icy %r8,11(%r2,%r1)
 eb83100b0080|	gnu	icmh %r8,3,11(%r1)
-    bf83100b|	gnu	icm %r8,3,11(%r1) 
+    bf83100b|	gnu	icm %r8,3,11(%r1)
 eb83100b0081|	gnu	icmy %r8,3,11(%r1)
-    a5800008|	gnu	iihh %r8,8        
-    a5810008|	gnu	iihl %r8,8        
-c08800000008|	gnu	iihf %r8,8        
-    a5820008|	gnu	iilh %r8,8        
-    a5830008|	gnu	iill %r8,8        
-c08900000008|	gnu	iilf %r8,8        
-    b2220080|	gnu	ipm %r8           
-    b20b0000|	gnu	ipk               
-    b9ac0080|	gnu	irbm %r8,%r0      
-    b2290080|	gnu	iske %r8,%r0      
-    b2230080|	gnu	ivsk %r8,%r0      
+    a5800008|	gnu	iihh %r8,8
+    a5810008|	gnu	iihl %r8,8
+c08800000008|	gnu	iihf %r8,8
+    a5820008|	gnu	iilh %r8,8
+    a5830008|	gnu	iill %r8,8
+c08900000008|	gnu	iilf %r8,8
+    b2220080|	gnu	ipm %r8
+    b20b0000|	gnu	ipk
+    b9ac0080|	gnu	irbm %r8,%r0
+    b2290080|	gnu	iske %r8,%r0
+    b2230080|	gnu	ivsk %r8,%r0
     b98e0180|	gnu	idte %r8,%r0,%r0,1
     b2210180|	gnu	ipte %r8,%r0,%r0,1
-    5882100b|	gnu	l %r8,11(%r2,%r1) 
-        1880|	gnu	lr %r8,%r0        
+    5882100b|	gnu	l %r8,11(%r2,%r1)
+        1880|	gnu	lr %r8,%r0
 e382100b0058|	gnu	ly %r8,11(%r2,%r1)
 e382100b0004|	gnu	lg %r8,11(%r2,%r1)
-    b9040080|	gnu	lgr %r8,%r0       
+    b9040080|	gnu	lgr %r8,%r0
 e382100b0014|	gnu	lgf %r8,11(%r2,%r1)
-    b9140080|	gnu	lgfr %r8,%r0      
-    b3650080|	gnu	lxr %f8,%f0       
+    b9140080|	gnu	lgfr %r8,%r0
+    b3650080|	gnu	lxr %f8,%f0
     6882100b|	gnu	ld %f8,11(%r2,%r1)
-        2880|	gnu	ldr %f8,%f0       
+        2880|	gnu	ldr %f8,%f0
 ed82100b0065|	gnu	ldy %f8,11(%r2,%r1)
     7882100b|	gnu	le %f8,11(%r2,%r1)
-        3880|	gnu	ler %f8,%f0       
+        3880|	gnu	ler %f8,%f0
 ed82100b0064|	gnu	ley %f8,11(%r2,%r1)
     9a80100b|	gnu	lam %a8,%a0,11(%r1)
 eb80100b009a|	gnu	lamy %a8,%a0,11(%r1)
@@ -458,7 +458,7 @@ eb80100b009a|	gnu	lamy %a8,%a0,11(%r1)
 e382100b0071|	gnu	lay %r8,11(%r2,%r1)
     5182100b|	gnu	lae %r8,11(%r2,%r1)
 e382100b0075|	gnu	laey %r8,11(%r2,%r1)
-c08000000000|	gnu	larl %r8,0x836    
+c08000000000|	gnu	larl %r8,0x836
 e5002006100b|	gnu	lasp 6(%r2),11(%r1)
 eb80100b00f8|	gnu	laa %r8,%r0,11(%r1)
 eb80100b00e8|	gnu	laag %r8,%r0,11(%r1)
@@ -471,200 +471,200 @@ eb80100b00e7|	gnu	laxg %r8,%r0,11(%r1)
 eb80100b00f6|	gnu	lao %r8,%r0,11(%r1)
 eb80100b00e6|	gnu	laog %r8,%r0,11(%r1)
 e382100b0012|	gnu	lt %r8,11(%r2,%r1)
-        1280|	gnu	ltr %r8,%r0       
+        1280|	gnu	ltr %r8,%r0
 e382100b0002|	gnu	ltg %r8,11(%r2,%r1)
-    b9020080|	gnu	ltgr %r8,%r0      
+    b9020080|	gnu	ltgr %r8,%r0
 e382100b0032|	gnu	ltgf %r8,11(%r2,%r1)
-    b9120080|	gnu	ltgfr %r8,%r0     
-    b3420080|	gnu	ltxbr %f8,%f0     
-    b3de0080|	gnu	ltxtr %f8,%f0     
-    b3620080|	gnu	ltxr %f8,%f0      
-    b3120080|	gnu	ltdbr %f8,%f0     
-    b3d60080|	gnu	ltdtr %f8,%f0     
-        2280|	gnu	ltdr %f8,%f0      
-    b3020080|	gnu	ltebr %f8,%f0     
-        3280|	gnu	lter %f8,%f0      
+    b9120080|	gnu	ltgfr %r8,%r0
+    b3420080|	gnu	ltxbr %f8,%f0
+    b3de0080|	gnu	ltxtr %f8,%f0
+    b3620080|	gnu	ltxr %f8,%f0
+    b3120080|	gnu	ltdbr %f8,%f0
+    b3d60080|	gnu	ltdtr %f8,%f0
+        2280|	gnu	ltdr %f8,%f0
+    b3020080|	gnu	ltebr %f8,%f0
+        3280|	gnu	lter %f8,%f0
 e382100b009f|	gnu	lat %r8,11(%r2,%r1)
 e382100b0085|	gnu	lgat %r8,11(%r2,%r1)
 e382100b003b|	gnu	lzrf %r8,11(%r2,%r1)
 e382100b002a|	gnu	lzrg %r8,11(%r2,%r1)
-    b200100b|	gnu	lbear 11(%r1)     
+    b200100b|	gnu	lbear 11(%r1)
 e382100b0076|	gnu	lb %r8,11(%r2,%r1)
-    b9260080|	gnu	lbr %r8,%r0       
+    b9260080|	gnu	lbr %r8,%r0
 e382100b0077|	gnu	lgb %r8,11(%r2,%r1)
-    b9060080|	gnu	lgbr %r8,%r0      
+    b9060080|	gnu	lgbr %r8,%r0
 e382100b00c0|	gnu	lbh %r8,11(%r2,%r1)
-        1380|	gnu	lcr %r8,%r0       
-    b9030080|	gnu	lcgr %r8,%r0      
-    b9130080|	gnu	lcgfr %r8,%r0     
-    b3430080|	gnu	lcxbr %f8,%f0     
-    b3630080|	gnu	lcxr %f8,%f0      
-    b3130080|	gnu	lcdbr %f8,%f0     
-        2380|	gnu	lcdr %f8,%f0      
-    b3730080|	gnu	lcdfr %f8,%f0     
-    b3030080|	gnu	lcebr %f8,%f0     
-        3380|	gnu	lcer %f8,%f0      
+        1380|	gnu	lcr %r8,%r0
+    b9030080|	gnu	lcgr %r8,%r0
+    b9130080|	gnu	lcgfr %r8,%r0
+    b3430080|	gnu	lcxbr %f8,%f0
+    b3630080|	gnu	lcxr %f8,%f0
+    b3130080|	gnu	lcdbr %f8,%f0
+        2380|	gnu	lcdr %f8,%f0
+    b3730080|	gnu	lcdfr %f8,%f0
+    b3030080|	gnu	lcebr %f8,%f0
+        3380|	gnu	lcer %f8,%f0
     b780100b|	gnu	lctl %c8,%c0,11(%r1)
 eb80100b002f|	gnu	lctlg %c8,%c0,11(%r1)
 e782100b3027|	gnu	lcbb %r8,11(%r2,%r1),3
-    b3473080|	gnu	fixbr %f8,3,%f0   
+    b3473080|	gnu	fixbr %f8,3,%f0
     b3473180|	gnu	fixbra %f8,3,%f0,1
-    b3df3180|	gnu	fixtr %f8,3,%f0,1 
-    b3670080|	gnu	fixr %f8,%f0      
-    b35f3080|	gnu	fidbr %f8,3,%f0   
+    b3df3180|	gnu	fixtr %f8,3,%f0,1
+    b3670080|	gnu	fixr %f8,%f0
+    b35f3080|	gnu	fidbr %f8,3,%f0
     b35f3180|	gnu	fidbra %f8,3,%f0,1
-    b3d73180|	gnu	fidtr %f8,3,%f0,1 
-    b37f0080|	gnu	fidr %f8,%f0      
-    b3573080|	gnu	fiebr %f8,3,%f0   
+    b3d73180|	gnu	fidtr %f8,3,%f0,1
+    b37f0080|	gnu	fidr %f8,%f0
+    b3573080|	gnu	fiebr %f8,3,%f0
     b3573180|	gnu	fiebra %f8,3,%f0,1
-    b3770080|	gnu	fier %f8,%f0      
-    b29d100b|	gnu	lfpc 11(%r1)      
-    b2bd100b|	gnu	lfas 11(%r1)      
-    b3c10080|	gnu	ldgr %f8,%r0      
-    b3cd0080|	gnu	lgdr %r8,%f0      
+    b3770080|	gnu	fier %f8,%f0
+    b29d100b|	gnu	lfpc 11(%r1)
+    b2bd100b|	gnu	lfas 11(%r1)
+    b3c10080|	gnu	ldgr %f8,%r0
+    b3cd0080|	gnu	lgdr %r8,%f0
 e382100b004c|	gnu	lgg %r8,11(%r2,%r1)
 e382100b004d|	gnu	lgsc %r8,11(%r2,%r1)
     4882100b|	gnu	lh %r8,11(%r2,%r1)
-    b9270080|	gnu	lhr %r8,%r0       
+    b9270080|	gnu	lhr %r8,%r0
 e382100b0078|	gnu	lhy %r8,11(%r2,%r1)
 e382100b0015|	gnu	lgh %r8,11(%r2,%r1)
-    b9070080|	gnu	lghr %r8,%r0      
+    b9070080|	gnu	lghr %r8,%r0
 e382100b00c4|	gnu	lhh %r8,11(%r2,%r1)
-ec830008004e|	gnu	lochhinle %r8,8   
-    a7880008|	gnu	lhi %r8,8         
-    a7890008|	gnu	lghi %r8,8        
-ec8300080042|	gnu	lochinle %r8,8    
-ec8300080046|	gnu	locghinle %r8,8   
-c48500000000|	gnu	lhrl %r8,0x99e    
-c48400000000|	gnu	lghrl %r8,0x9a4   
+ec830008004e|	gnu	lochhinle %r8,8
+    a7880008|	gnu	lhi %r8,8
+    a7890008|	gnu	lghi %r8,8
+ec8300080042|	gnu	lochinle %r8,8
+ec8300080046|	gnu	locghinle %r8,8
+c48500000000|	gnu	lhrl %r8,0x99e
+c48400000000|	gnu	lghrl %r8,0x9a4
 e382100b00ca|	gnu	lfh %r8,11(%r2,%r1)
 e382100b00c8|	gnu	lfhat %r8,11(%r2,%r1)
 eb83100b00e0|	gnu	locfhnle %r8,11(%r1)
-    b9e03080|	gnu	locfhrnle %r8,%r0 
-c08100000008|	gnu	lgfi %r8,8        
+    b9e03080|	gnu	locfhrnle %r8,%r0
+c081fffffffe|	gnu	lgfi %r8,-2
 ed82100b0005|	gnu	lxdb %f8,11(%r2,%r1)
-    b3050080|	gnu	lxdbr %f8,%f0     
-    b3dc0180|	gnu	lxdtr %f8,%f0,1   
+    b3050080|	gnu	lxdbr %f8,%f0
+    b3dc0180|	gnu	lxdtr %f8,%f0,1
 ed82100b0025|	gnu	lxd %f8,11(%r2,%r1)
-    b3250080|	gnu	lxdr %f8,%f0      
+    b3250080|	gnu	lxdr %f8,%f0
 ed82100b0006|	gnu	lxeb %f8,11(%r2,%r1)
-    b3060080|	gnu	lxebr %f8,%f0     
+    b3060080|	gnu	lxebr %f8,%f0
 ed82100b0026|	gnu	lxe %f8,11(%r2,%r1)
-    b3260080|	gnu	lxer %f8,%f0      
+    b3260080|	gnu	lxer %f8,%f0
 ed82100b0004|	gnu	ldeb %f8,11(%r2,%r1)
-    b3040080|	gnu	ldebr %f8,%f0     
-    b3d40180|	gnu	ldetr %f8,%f0,1   
+    b3040080|	gnu	ldebr %f8,%f0
+    b3d40180|	gnu	ldetr %f8,%f0,1
 ed82100b0024|	gnu	lde %f8,11(%r2,%r1)
-    b3240080|	gnu	lder %f8,%f0      
+    b3240080|	gnu	lder %f8,%f0
 e382100b0016|	gnu	llgf %r8,11(%r2,%r1)
-    b9160080|	gnu	llgfr %r8,%r0     
+    b9160080|	gnu	llgfr %r8,%r0
 e382100b0048|	gnu	llgfsg %r8,11(%r2,%r1)
 e382100b009d|	gnu	llgfat %r8,11(%r2,%r1)
 e382100b003a|	gnu	llzrgf %r8,11(%r2,%r1)
 e382100b0094|	gnu	llc %r8,11(%r2,%r1)
-    b9940080|	gnu	llcr %r8,%r0      
+    b9940080|	gnu	llcr %r8,%r0
 e382100b0090|	gnu	llgc %r8,11(%r2,%r1)
-    b9840080|	gnu	llgcr %r8,%r0     
+    b9840080|	gnu	llgcr %r8,%r0
 e382100b00c2|	gnu	llch %r8,11(%r2,%r1)
 e382100b0095|	gnu	llh %r8,11(%r2,%r1)
-    b9950080|	gnu	llhr %r8,%r0      
+    b9950080|	gnu	llhr %r8,%r0
 e382100b0091|	gnu	llgh %r8,11(%r2,%r1)
-    b9850080|	gnu	llghr %r8,%r0     
+    b9850080|	gnu	llghr %r8,%r0
 e382100b00c6|	gnu	llhh %r8,11(%r2,%r1)
-c48200000000|	gnu	llhrl %r8,0xa5a   
-c48600000000|	gnu	llghrl %r8,0xa60  
-    a58c0008|	gnu	llihh %r8,8       
-    a58d0008|	gnu	llihl %r8,8       
-c08e00000008|	gnu	llihf %r8,8       
-    a58e0008|	gnu	llilh %r8,8       
-    a58f0008|	gnu	llill %r8,8       
-c08f00000008|	gnu	llilf %r8,8       
-c48e00000000|	gnu	llgfrl %r8,0xa82  
+c48200000000|	gnu	llhrl %r8,0xa5a
+c48600000000|	gnu	llghrl %r8,0xa60
+    a58cffff|	gnu	llihh %r8,-1
+    a58dffff|	gnu	llihl %r8,-1
+c08efffffffe|	gnu	llihf %r8,-2
+    a58effff|	gnu	llilh %r8,-1
+    a58fffff|	gnu	llill %r8,-1
+c08ffffffffe|	gnu	llilf %r8,-2
+c48e00000000|	gnu	llgfrl %r8,0xa82
 e382100b0017|	gnu	llgt %r8,11(%r2,%r1)
-    b9170080|	gnu	llgtr %r8,%r0     
+    b9170080|	gnu	llgtr %r8,%r0
 e382100b009c|	gnu	llgtat %r8,11(%r2,%r1)
     9880100b|	gnu	lm %r8,%r0,11(%r1)
 eb80100b0098|	gnu	lmy %r8,%r0,11(%r1)
 eb80100b0004|	gnu	lmg %r8,%r0,11(%r1)
 ef80100b80cd|	gnu	lmd %r8,%r0,11(%r1),205(%r8)
 eb80100b0096|	gnu	lmh %r8,%r0,11(%r1)
-        1180|	gnu	lnr %r8,%r0       
-    b9010080|	gnu	lngr %r8,%r0      
-    b9110080|	gnu	lngfr %r8,%r0     
-    b3410080|	gnu	lnxbr %f8,%f0     
-    b3610080|	gnu	lnxr %f8,%f0      
-    b3110080|	gnu	lndbr %f8,%f0     
-        2180|	gnu	lndr %f8,%f0      
-    b3710080|	gnu	lndfr %f8,%f0     
-    b3010080|	gnu	lnebr %f8,%f0     
-        3180|	gnu	lner %f8,%f0      
+        1180|	gnu	lnr %r8,%r0
+    b9010080|	gnu	lngr %r8,%r0
+    b9110080|	gnu	lngfr %r8,%r0
+    b3410080|	gnu	lnxbr %f8,%f0
+    b3610080|	gnu	lnxr %f8,%f0
+    b3110080|	gnu	lndbr %f8,%f0
+        2180|	gnu	lndr %f8,%f0
+    b3710080|	gnu	lndfr %f8,%f0
+    b3010080|	gnu	lnebr %f8,%f0
+        3180|	gnu	lner %f8,%f0
 eb83100b00f2|	gnu	locnle %r8,11(%r1)
-    b9f23080|	gnu	locrnle %r8,%r0   
+    b9f23080|	gnu	locrnle %r8,%r0
 eb83100b00e2|	gnu	locgnle %r8,11(%r1)
-    b9e23080|	gnu	locgrnle %r8,%r0  
+    b9e23080|	gnu	locgrnle %r8,%r0
     b9aa0180|	gnu	lptea %r8,%r0,%r0,1
 c8042006100b|	gnu	lpd %r0,6(%r2),11(%r1)
 c8052006100b|	gnu	lpdg %r0,6(%r2),11(%r1)
 e382100b008f|	gnu	lpq %r8,11(%r2,%r1)
-        1080|	gnu	lpr %r8,%r0       
-    b9000080|	gnu	lpgr %r8,%r0      
-    b9100080|	gnu	lpgfr %r8,%r0     
-    b3400080|	gnu	lpxbr %f8,%f0     
-    b3600080|	gnu	lpxr %f8,%f0      
-    b3100080|	gnu	lpdbr %f8,%f0     
-        2080|	gnu	lpdr %f8,%f0      
-    b3700080|	gnu	lpdfr %f8,%f0     
-    b3000080|	gnu	lpebr %f8,%f0     
-        3080|	gnu	lper %f8,%f0      
-    82002006|	gnu	lpsw 6(%r2)       
-    b2b2100b|	gnu	lpswe 11(%r1)     
-eb0020060071|	gnu	lpswey 6(%r2)     
+        1080|	gnu	lpr %r8,%r0
+    b9000080|	gnu	lpgr %r8,%r0
+    b9100080|	gnu	lpgfr %r8,%r0
+    b3400080|	gnu	lpxbr %f8,%f0
+    b3600080|	gnu	lpxr %f8,%f0
+    b3100080|	gnu	lpdbr %f8,%f0
+        2080|	gnu	lpdr %f8,%f0
+    b3700080|	gnu	lpdfr %f8,%f0
+    b3000080|	gnu	lpebr %f8,%f0
+        3080|	gnu	lper %f8,%f0
+    82002006|	gnu	lpsw 6(%r2)
+    b2b2100b|	gnu	lpswe 11(%r1)
+eb0020060071|	gnu	lpswey 6(%r2)
     b182100b|	gnu	lra %r8,11(%r2,%r1)
 e382100b0013|	gnu	lray %r8,11(%r2,%r1)
 e382100b0003|	gnu	lrag %r8,11(%r2,%r1)
-c48d00000000|	gnu	lrl %r8,0xb40     
-c48800000000|	gnu	lgrl %r8,0xb46    
-c48c00000000|	gnu	lgfrl %r8,0xb4c   
+c48d00000000|	gnu	lrl %r8,0xb40
+c48800000000|	gnu	lgrl %r8,0xb46
+c48c00000000|	gnu	lgfrl %r8,0xb4c
 e382100b001f|	gnu	lrvh %r8,11(%r2,%r1)
 e382100b001e|	gnu	lrv %r8,11(%r2,%r1)
-    b91f0080|	gnu	lrvr %r8,%r0      
+    b91f0080|	gnu	lrvr %r8,%r0
 e382100b000f|	gnu	lrvg %r8,11(%r2,%r1)
-    b90f0080|	gnu	lrvgr %r8,%r0     
-    b3450080|	gnu	ldxbr %f8,%f0     
+    b90f0080|	gnu	lrvgr %r8,%r0
+    b3450080|	gnu	ldxbr %f8,%f0
     b3453180|	gnu	ldxbra %f8,3,%f0,1
-    b3dd3180|	gnu	ldxtr %f8,3,%f0,1 
-        2580|	gnu	ldxr %f8,%f0      
-        2580|	gnu	ldxr %f8,%f0      
-    b3460080|	gnu	lexbr %f8,%f0     
+    b3dd3180|	gnu	ldxtr %f8,3,%f0,1
+        2580|	gnu	ldxr %f8,%f0
+        2580|	gnu	ldxr %f8,%f0
+    b3460080|	gnu	lexbr %f8,%f0
     b3463180|	gnu	lexbra %f8,3,%f0,1
-    b3660080|	gnu	lexr %f8,%f0      
-    b3440080|	gnu	ledbr %f8,%f0     
+    b3660080|	gnu	lexr %f8,%f0
+    b3440080|	gnu	ledbr %f8,%f0
     b3443180|	gnu	ledbra %f8,3,%f0,1
-    b3d53180|	gnu	ledtr %f8,3,%f0,1 
-        3580|	gnu	ledr %f8,%f0      
-        3580|	gnu	ledr %f8,%f0      
-    b24b0080|	gnu	lura %r8,%r0      
-    b9050080|	gnu	lurag %r8,%r0     
-    b3760080|	gnu	lzxr %f8          
-    b3750080|	gnu	lzdr %f8          
-    b3740080|	gnu	lzer %f8          
-    b2470080|	gnu	msta %r8          
-    b232100b|	gnu	msch 11(%r1)      
-    af082006|	gnu	mc 6(%r2),8       
-e54420060008|	gnu	mvhhi 6(%r2),8    
-e54c20060008|	gnu	mvhi 6(%r2),8     
-e54820060008|	gnu	mvghi 6(%r2),8    
+    b3d53180|	gnu	ledtr %f8,3,%f0,1
+        3580|	gnu	ledr %f8,%f0
+        3580|	gnu	ledr %f8,%f0
+    b24b0080|	gnu	lura %r8,%r0
+    b9050080|	gnu	lurag %r8,%r0
+    b3760080|	gnu	lzxr %f8
+    b3750080|	gnu	lzdr %f8
+    b3740080|	gnu	lzer %f8
+    b2470080|	gnu	msta %r8
+    b232100b|	gnu	msch 11(%r1)
+    af082006|	gnu	mc 6(%r2),8
+e54420060008|	gnu	mvhhi 6(%r2),8
+e54c20060008|	gnu	mvhi 6(%r2),8
+e54820060008|	gnu	mvghi 6(%r2),8
 d2032006100b|	gnu	mvc 6(4,%r2),11(%r1)
-    92082006|	gnu	mvi 6(%r2),8      
-eb0820060052|	gnu	mviy 6(%r2),8     
+    92082006|	gnu	mvi 6(%r2),8
+eb0820060052|	gnu	mviy 6(%r2),8
 e8032006100b|	gnu	mvcin 6(4,%r2),11(%r1)
-        0e80|	gnu	mvcl %r8,%r0      
+        0e80|	gnu	mvcl %r8,%r0
     a880100b|	gnu	mvcle %r8,%r0,11(%r1)
 eb80100b008e|	gnu	mvclu %r8,%r0,11(%r1)
 d1032006100b|	gnu	mvn 6(4,%r2),11(%r1)
-    b2540080|	gnu	mvpg %r8,%r0      
+    b2540080|	gnu	mvpg %r8,%r0
 e50a2006100b|	gnu	mvcrl 6(%r2),11(%r1)
-    b2550080|	gnu	mvst %r8,%r0      
+    b2550080|	gnu	mvst %r8,%r0
 da802006100b|	gnu	mvcp 6(%r8,%r2),11(%r1),%r0
 db802006100b|	gnu	mvcs 6(%r8,%r2),11(%r1),%r0
 e50f2006100b|	gnu	mvcdk 6(%r2),11(%r1)
@@ -674,147 +674,147 @@ c8002006100b|	gnu	mvcos 6(%r2),11(%r1),%r0
 e50e2006100b|	gnu	mvcsk 6(%r2),11(%r1)
 d3032006100b|	gnu	mvz 6(4,%r2),11(%r1)
 e382100b0084|	gnu	mg %r8,11(%r2,%r1)
-    b9ec0080|	gnu	mgrk %r8,%r0,%r0  
-    5c82100b|	gnu	m %r8,11(%r2,%r1) 
+    b9ec0080|	gnu	mgrk %r8,%r0,%r0
+    5c82100b|	gnu	m %r8,11(%r2,%r1)
 e382100b005c|	gnu	mfy %r8,11(%r2,%r1)
-        1c80|	gnu	mr %r8,%r0        
-    b34c0080|	gnu	mxbr %f8,%f0      
-    b3d80080|	gnu	mxtr %f8,%f0,%f0  
+        1c80|	gnu	mr %r8,%r0
+    b34c0080|	gnu	mxbr %f8,%f0
+    b3d80080|	gnu	mxtr %f8,%f0,%f0
     b3d80180|	gnu	mxtra %f8,%f0,%f0,1
-        2680|	gnu	mxr %f8,%f0       
+        2680|	gnu	mxr %f8,%f0
 ed82100b001c|	gnu	mdb %f8,11(%r2,%r1)
-    b31c0080|	gnu	mdbr %f8,%f0      
-    b3d00080|	gnu	mdtr %f8,%f0,%f0  
+    b31c0080|	gnu	mdbr %f8,%f0
+    b3d00080|	gnu	mdtr %f8,%f0,%f0
     b3d00180|	gnu	mdtra %f8,%f0,%f0,1
     6c82100b|	gnu	md %f8,11(%r2,%r1)
-        2c80|	gnu	mdr %f8,%f0       
+        2c80|	gnu	mdr %f8,%f0
 ed82100b0007|	gnu	mxdb %f8,11(%r2,%r1)
-    b3070080|	gnu	mxdbr %f8,%f0     
+    b3070080|	gnu	mxdbr %f8,%f0
     6782100b|	gnu	mxd %f8,11(%r2,%r1)
-        2780|	gnu	mxdr %f8,%f0      
+        2780|	gnu	mxdr %f8,%f0
 ed82100b0017|	gnu	meeb %f8,11(%r2,%r1)
-    b3170080|	gnu	meebr %f8,%f0     
+    b3170080|	gnu	meebr %f8,%f0
 ed82100b0037|	gnu	mee %f8,11(%r2,%r1)
-    b3370080|	gnu	meer %f8,%f0      
+    b3370080|	gnu	meer %f8,%f0
 ed82100b000c|	gnu	mdeb %f8,11(%r2,%r1)
-    b30c0080|	gnu	mdebr %f8,%f0     
+    b30c0080|	gnu	mdebr %f8,%f0
     7c82100b|	gnu	mde %f8,11(%r2,%r1)
-        3c80|	gnu	mder %f8,%f0      
+        3c80|	gnu	mder %f8,%f0
     7c82100b|	gnu	mde %f8,11(%r2,%r1)
-        3c80|	gnu	mder %f8,%f0      
+        3c80|	gnu	mder %f8,%f0
 ed02100b803a|	gnu	may %f8,%f0,11(%r2,%r1)
-    b33a8000|	gnu	mayr %f8,%f0,%f0  
+    b33a8000|	gnu	mayr %f8,%f0,%f0
 ed02100b801e|	gnu	madb %f8,%f0,11(%r2,%r1)
-    b31e8000|	gnu	madbr %f8,%f0,%f0 
+    b31e8000|	gnu	madbr %f8,%f0,%f0
 ed02100b803e|	gnu	mad %f8,%f0,11(%r2,%r1)
-    b33e8000|	gnu	madr %f8,%f0,%f0  
+    b33e8000|	gnu	madr %f8,%f0,%f0
 ed02100b800e|	gnu	maeb %f8,%f0,11(%r2,%r1)
-    b30e8000|	gnu	maebr %f8,%f0,%f0 
+    b30e8000|	gnu	maebr %f8,%f0,%f0
 ed02100b802e|	gnu	mae %f8,%f0,11(%r2,%r1)
-    b32e8000|	gnu	maer %f8,%f0,%f0  
+    b32e8000|	gnu	maer %f8,%f0,%f0
 ed02100b803c|	gnu	mayh %f8,%f0,11(%r2,%r1)
-    b33c8000|	gnu	mayhr %f8,%f0,%f0 
+    b33c8000|	gnu	mayhr %f8,%f0,%f0
 ed02100b8038|	gnu	mayl %f8,%f0,11(%r2,%r1)
-    b3388000|	gnu	maylr %f8,%f0,%f0 
+    b3388000|	gnu	maylr %f8,%f0,%f0
 ed02100b801f|	gnu	msdb %f8,%f0,11(%r2,%r1)
-    b31f8000|	gnu	msdbr %f8,%f0,%f0 
+    b31f8000|	gnu	msdbr %f8,%f0,%f0
 ed02100b803f|	gnu	msd %f8,%f0,11(%r2,%r1)
-    b33f8000|	gnu	msdr %f8,%f0,%f0  
+    b33f8000|	gnu	msdr %f8,%f0,%f0
 ed02100b800f|	gnu	mseb %f8,%f0,11(%r2,%r1)
-    b30f8000|	gnu	msebr %f8,%f0,%f0 
+    b30f8000|	gnu	msebr %f8,%f0,%f0
 ed02100b802f|	gnu	mse %f8,%f0,11(%r2,%r1)
-    b32f8000|	gnu	mser %f8,%f0,%f0  
+    b32f8000|	gnu	mser %f8,%f0,%f0
 fc332006100b|	gnu	mp 6(4,%r2),11(4,%r1)
     4c82100b|	gnu	mh %r8,11(%r2,%r1)
 e382100b007c|	gnu	mhy %r8,11(%r2,%r1)
 e382100b003c|	gnu	mgh %r8,11(%r2,%r1)
-    a78c0008|	gnu	mhi %r8,8         
-    a78d0008|	gnu	mghi %r8,8        
+    a78cfffd|	gnu	mhi %r8,-3
+    a78dfffd|	gnu	mghi %r8,-3
 e382100b0086|	gnu	mlg %r8,11(%r2,%r1)
-    b9860080|	gnu	mlgr %r8,%r0      
+    b9860080|	gnu	mlgr %r8,%r0
 e382100b0096|	gnu	ml %r8,11(%r2,%r1)
-    b9960080|	gnu	mlr %r8,%r0       
+    b9960080|	gnu	mlr %r8,%r0
     7182100b|	gnu	ms %r8,11(%r2,%r1)
 e382100b0053|	gnu	msc %r8,11(%r2,%r1)
-    b2520080|	gnu	msr %r8,%r0       
-    b9fd0080|	gnu	msrkc %r8,%r0,%r0 
+    b2520080|	gnu	msr %r8,%r0
+    b9fd0080|	gnu	msrkc %r8,%r0,%r0
 e382100b0051|	gnu	msy %r8,11(%r2,%r1)
 e382100b000c|	gnu	msg %r8,11(%r2,%r1)
 e382100b0083|	gnu	msgc %r8,11(%r2,%r1)
-    b90c0080|	gnu	msgr %r8,%r0      
+    b90c0080|	gnu	msgr %r8,%r0
     b9ed0080|	gnu	msgrkc %r8,%r0,%r0
 e382100b001c|	gnu	msgf %r8,11(%r2,%r1)
-    b91c0080|	gnu	msgfr %r8,%r0     
-c28100000008|	gnu	msfi %r8,8        
-c28000000008|	gnu	msgfi %r8,8       
+    b91c0080|	gnu	msgfr %r8,%r0
+c281ffffffff|	gnu	msfi %r8,-1
+c280ffffffff|	gnu	msgfi %r8,-1
 ed02100b803d|	gnu	myh %f8,%f0,11(%r2,%r1)
-    b33d8000|	gnu	myhr %f8,%f0,%f0  
+    b33d8000|	gnu	myhr %f8,%f0,%f0
 ed02100b8039|	gnu	myl %f8,%f0,11(%r2,%r1)
-    b3398000|	gnu	mylr %f8,%f0,%f0  
+    b3398000|	gnu	mylr %f8,%f0,%f0
 ed02100b803b|	gnu	my %f8,%f0,11(%r2,%r1)
-    b33b8000|	gnu	myr %f8,%f0,%f0   
-    b9740080|	gnu	nnrk %r8,%r0,%r0  
-    b9640080|	gnu	nngrk %r8,%r0,%r0 
-    b93b0000|	gnu	nnpa              
-    b2fa00c8|	gnu	niai 12,8         
+    b33b8000|	gnu	myr %f8,%f0,%f0
+    b9740080|	gnu	nnrk %r8,%r0,%r0
+    b9640080|	gnu	nngrk %r8,%r0,%r0
+    b93b0000|	gnu	nnpa
+    b2fa00c8|	gnu	niai 12,8
 e382100b0025|	gnu	ntstg %r8,11(%r2,%r1)
-    b9760080|	gnu	nork %r8,%r0,%r0  
-    b9660080|	gnu	nogrk %r8,%r0,%r0 
-    b9770080|	gnu	nxrk %r8,%r0,%r0  
-    b9670080|	gnu	nxgrk %r8,%r0,%r0 
-    5682100b|	gnu	o %r8,11(%r2,%r1) 
-        1680|	gnu	or %r8,%r0        
-    b9f60080|	gnu	ork %r8,%r0,%r0   
+    b9760080|	gnu	nork %r8,%r0,%r0
+    b9660080|	gnu	nogrk %r8,%r0,%r0
+    b9770080|	gnu	nxrk %r8,%r0,%r0
+    b9670080|	gnu	nxgrk %r8,%r0,%r0
+    5682100b|	gnu	o %r8,11(%r2,%r1)
+        1680|	gnu	or %r8,%r0
+    b9f60080|	gnu	ork %r8,%r0,%r0
 e382100b0056|	gnu	oy %r8,11(%r2,%r1)
 e382100b0081|	gnu	og %r8,11(%r2,%r1)
-    b9810080|	gnu	ogr %r8,%r0       
-    b9e60080|	gnu	ogrk %r8,%r0,%r0  
+    b9810080|	gnu	ogr %r8,%r0
+    b9e60080|	gnu	ogrk %r8,%r0,%r0
 d6032006100b|	gnu	oc 6(4,%r2),11(%r1)
-    96082006|	gnu	oi 6(%r2),8       
-eb0820060056|	gnu	oiy 6(%r2),8      
-    a5880008|	gnu	oihh %r8,8        
-    a5890008|	gnu	oihl %r8,8        
-c08c00000008|	gnu	oihf %r8,8        
-    a58a0008|	gnu	oilh %r8,8        
-    a58b0008|	gnu	oill %r8,8        
-c08d00000008|	gnu	oilf %r8,8        
-    b9750080|	gnu	ocrk %r8,%r0,%r0  
-    b9650080|	gnu	ocgrk %r8,%r0,%r0 
+    96ff2006|	gnu	oi 6(%r2),-1
+ebff20060056|	gnu	oiy 6(%r2),-1
+    a588ffff|	gnu	oihh %r8,-1
+    a589ffff|	gnu	oihl %r8,-1
+c08cffffffff|	gnu	oihf %r8,-1
+    a58affff|	gnu	oilh %r8,-1
+    a58bffff|	gnu	oill %r8,-1
+c08dffffffff|	gnu	oilf %r8,-1
+    b9750080|	gnu	ocrk %r8,%r0,%r0
+    b9650080|	gnu	ocgrk %r8,%r0,%r0
 f2332006100b|	gnu	pack 6(4,%r2),11(4,%r1)
 e9032006100b|	gnu	pka 6(%r2),11(4,%r1)
 e1032006100b|	gnu	pku 6(%r2),11(4,%r1)
-    b22e0080|	gnu	pgin %r8,%r0      
-    b22f0080|	gnu	pgout %r8,%r0     
-    b92c0000|	gnu	pcc               
-    b9280000|	gnu	pckmo             
-        010a|	gnu	pfpo              
-    b9af0080|	gnu	pfmf %r8,%r0      
+    b22e0080|	gnu	pgin %r8,%r0
+    b22f0080|	gnu	pgout %r8,%r0
+    b92c0000|	gnu	pcc
+    b9280000|	gnu	pckmo
+        010a|	gnu	pfpo
+    b9af0080|	gnu	pfmf %r8,%r0
 ee80100b80cd|	gnu	plo %r8,11(%r1),%r0,205(%r8)
-    b2e83080|	gnu	ppa %r8,%r0,3     
-    b93c0080|	gnu	prno %r8,%r0      
-    b93c0080|	gnu	prno %r8,%r0      
-        0104|	gnu	ptff              
-    b9a20080|	gnu	ptf %r8           
-    b9e13080|	gnu	popcnt %r8,%r0,3  
-e372100b0036|	gnu	pfd 7,11(%r2,%r1) 
-c67200000000|	gnu	pfdrl 7,0xe68     
-    b218100b|	gnu	pc 11(%r1)        
-        0101|	gnu	pr                
-    b2280080|	gnu	pt %r8,%r0        
-    b99e0080|	gnu	pti %r8,%r0       
-    b2480000|	gnu	palb              
-    b20d0000|	gnu	ptlb              
+    b2e83080|	gnu	ppa %r8,%r0,3
+    b93c0080|	gnu	prno %r8,%r0
+    b93c0080|	gnu	prno %r8,%r0
+        0104|	gnu	ptff
+    b9a20080|	gnu	ptf %r8
+    b9e13080|	gnu	popcnt %r8,%r0,3
+e372100b0036|	gnu	pfd 7,11(%r2,%r1)
+c67200000000|	gnu	pfdrl 7,0xe68
+    b218100b|	gnu	pc 11(%r1)
+        0101|	gnu	pr
+    b2280080|	gnu	pt %r8,%r0
+    b99e0080|	gnu	pti %r8,%r0
+    b2480000|	gnu	palb
+    b20d0000|	gnu	ptlb
     b3fd0180|	gnu	qaxtr %f8,%f0,%f0,1
     b3f50180|	gnu	qadtr %f8,%f0,%f0,1
-    b28f100b|	gnu	qpaci 11(%r1)     
+    b28f100b|	gnu	qpaci 11(%r1)
     b3ff0180|	gnu	rrxtr %f8,%f0,%r0,1
     b3f70180|	gnu	rrdtr %f8,%f0,%r0,1
-    b23b0000|	gnu	rchp              
-    b98b0180|	gnu	rdp %r8,%r0,%r0,1 
-    b22a0080|	gnu	rrbe %r8,%r0      
-    b9ae0080|	gnu	rrbm %r8,%r0      
-    b277100b|	gnu	rp 11(%r1)        
-    b2380000|	gnu	rsch              
+    b23b0000|	gnu	rchp
+    b98b0180|	gnu	rdp %r8,%r0,%r0,1
+    b22a0080|	gnu	rrbe %r8,%r0
+    b9ae0080|	gnu	rrbm %r8,%r0
+    b277100b|	gnu	rp 11(%r1)
+    b2380000|	gnu	rsch
 eb80100b001d|	gnu	rll %r8,%r0,11(%r1)
 eb80100b001c|	gnu	rllg %r8,%r0,11(%r1)
 ec8009691254|	gnu	rnsbg %r8,%r0,9,105,18
@@ -824,50 +824,50 @@ ec8009691259|	gnu	risbgn %r8,%r0,9,105,18
 ec800969125d|	gnu	risbhg %r8,%r0,9,105,18
 ec8009691251|	gnu	risblg %r8,%r0,9,105,18
 ec8009691256|	gnu	rosbg %r8,%r0,9,105,18
-    b25e0080|	gnu	srst %r8,%r0      
-    b9be0080|	gnu	srstu %r8,%r0     
-    b9f00180|	gnu	selro %r8,%r0,%r0 
+    b25e0080|	gnu	srst %r8,%r0
+    b9be0080|	gnu	srstu %r8,%r0
+    b9f00180|	gnu	selro %r8,%r0,%r0
     b9e30180|	gnu	selgro %r8,%r0,%r0
     b9c00180|	gnu	selfhro %r8,%r0,%r0
-    b24e0080|	gnu	sar %a8,%r0       
-    b2370000|	gnu	sal               
-    b219100b|	gnu	sac 11(%r1)       
-    b279100b|	gnu	sacf 11(%r1)      
-        010c|	gnu	sam24             
-        010d|	gnu	sam31             
-        010e|	gnu	sam64             
-    b299100b|	gnu	srnm 11(%r1)      
-    b2b8100b|	gnu	srnmb 11(%r1)     
-    b23c0000|	gnu	schm              
-    b204100b|	gnu	sck 11(%r1)       
-    b206100b|	gnu	sckc 11(%r1)      
-        0107|	gnu	sckpf             
-    b208100b|	gnu	spt 11(%r1)       
-    b2b9100b|	gnu	srnmt 11(%r1)     
-    b3840080|	gnu	sfpc %r8          
-    b3850080|	gnu	sfasr %r8         
-    b210100b|	gnu	spx 11(%r1)       
-        0480|	gnu	spm %r8           
-    b20a100b|	gnu	spka 11(%r1)      
-    b2250080|	gnu	ssar %r8          
-    b99f0080|	gnu	ssair %r8         
-    b22b3080|	gnu	sske %r8,%r0,3    
-    80002006|	gnu	ssm 6(%r2)        
+    b24e0080|	gnu	sar %a8,%r0
+    b2370000|	gnu	sal
+    b219100b|	gnu	sac 11(%r1)
+    b279100b|	gnu	sacf 11(%r1)
+        010c|	gnu	sam24
+        010d|	gnu	sam31
+        010e|	gnu	sam64
+    b299100b|	gnu	srnm 11(%r1)
+    b2b8100b|	gnu	srnmb 11(%r1)
+    b23c0000|	gnu	schm
+    b204100b|	gnu	sck 11(%r1)
+    b206100b|	gnu	sckc 11(%r1)
+        0107|	gnu	sckpf
+    b208100b|	gnu	spt 11(%r1)
+    b2b9100b|	gnu	srnmt 11(%r1)
+    b3840080|	gnu	sfpc %r8
+    b3850080|	gnu	sfasr %r8
+    b210100b|	gnu	spx 11(%r1)
+        0480|	gnu	spm %r8
+    b20a100b|	gnu	spka 11(%r1)
+    b2250080|	gnu	ssar %r8
+    b99f0080|	gnu	ssair %r8
+    b22b3080|	gnu	sske %r8,%r0,3
+    80002006|	gnu	ssm 6(%r2)
 f0392006100b|	gnu	srp 6(4,%r2),11(%r1),9
-    8f80100b|	gnu	slda %r8,11(%r1)  
-    8d80100b|	gnu	sldl %r8,11(%r1)  
-    8b80100b|	gnu	sla %r8,11(%r1)   
+    8f80100b|	gnu	slda %r8,11(%r1)
+    8d80100b|	gnu	sldl %r8,11(%r1)
+    8b80100b|	gnu	sla %r8,11(%r1)
 eb80100b00dd|	gnu	slak %r8,%r0,11(%r1)
 eb80100b000b|	gnu	slag %r8,%r0,11(%r1)
-    8980100b|	gnu	sll %r8,11(%r1)   
+    8980100b|	gnu	sll %r8,11(%r1)
 eb80100b00df|	gnu	sllk %r8,%r0,11(%r1)
 eb80100b000d|	gnu	sllg %r8,%r0,11(%r1)
-    8e80100b|	gnu	srda %r8,11(%r1)  
-    8c80100b|	gnu	srdl %r8,11(%r1)  
-    8a80100b|	gnu	sra %r8,11(%r1)   
+    8e80100b|	gnu	srda %r8,11(%r1)
+    8c80100b|	gnu	srdl %r8,11(%r1)
+    8a80100b|	gnu	sra %r8,11(%r1)
 eb80100b00dc|	gnu	srak %r8,%r0,11(%r1)
 eb80100b000a|	gnu	srag %r8,%r0,11(%r1)
-    8880100b|	gnu	srl %r8,11(%r1)   
+    8880100b|	gnu	srl %r8,11(%r1)
 eb80100b00de|	gnu	srlk %r8,%r0,11(%r1)
 eb80100b000c|	gnu	srlg %r8,%r0,11(%r1)
 ed02100b8048|	gnu	slxt %f8,%f0,11(%r2,%r1)
@@ -875,18 +875,18 @@ ed02100b8040|	gnu	sldt %f8,%f0,11(%r2,%r1)
 ed02100b8049|	gnu	srxt %f8,%f0,11(%r2,%r1)
 ed02100b8041|	gnu	srdt %f8,%f0,11(%r2,%r1)
     ae80100b|	gnu	sigp %r8,%r0,11(%r1)
-    b9380080|	gnu	sortl %r8,%r0     
-    b3160080|	gnu	sqxbr %f8,%f0     
-    b3360080|	gnu	sqxr %f8,%f0      
+    b9380080|	gnu	sortl %r8,%r0
+    b3160080|	gnu	sqxbr %f8,%f0
+    b3360080|	gnu	sqxr %f8,%f0
 ed82100b0015|	gnu	sqdb %f8,11(%r2,%r1)
-    b3150080|	gnu	sqdbr %f8,%f0     
+    b3150080|	gnu	sqdbr %f8,%f0
 ed82100b0035|	gnu	sqd %f8,11(%r2,%r1)
-    b2440080|	gnu	sqdr %f8,%f0      
+    b2440080|	gnu	sqdr %f8,%f0
 ed82100b0014|	gnu	sqeb %f8,11(%r2,%r1)
-    b3140080|	gnu	sqebr %f8,%f0     
+    b3140080|	gnu	sqebr %f8,%f0
 ed82100b0034|	gnu	sqe %f8,11(%r2,%r1)
-    b2450080|	gnu	sqer %f8,%f0      
-    b233100b|	gnu	ssch 11(%r1)      
+    b2450080|	gnu	sqer %f8,%f0
+    b233100b|	gnu	ssch 11(%r1)
     5082100b|	gnu	st %r8,11(%r2,%r1)
 e382100b0050|	gnu	sty %r8,11(%r2,%r1)
 e382100b0024|	gnu	stg %r8,11(%r2,%r1)
@@ -896,32 +896,32 @@ ed82100b0067|	gnu	stdy %f8,11(%r2,%r1)
 ed82100b0066|	gnu	stey %f8,11(%r2,%r1)
     9b80100b|	gnu	stam %a8,%a0,11(%r1)
 eb80100b009b|	gnu	stamy %a8,%a0,11(%r1)
-    b201100b|	gnu	stbear 11(%r1)    
-    b23a100b|	gnu	stcps 11(%r1)     
-    b239100b|	gnu	stcrw 11(%r1)     
+    b201100b|	gnu	stbear 11(%r1)
+    b23a100b|	gnu	stcps 11(%r1)
+    b239100b|	gnu	stcrw 11(%r1)
     4282100b|	gnu	stc %r8,11(%r2,%r1)
 e382100b0072|	gnu	stcy %r8,11(%r2,%r1)
 e382100b00c3|	gnu	stch %r8,11(%r2,%r1)
 eb83100b002c|	gnu	stcmh %r8,3,11(%r1)
     be83100b|	gnu	stcm %r8,3,11(%r1)
 eb83100b002d|	gnu	stcmy %r8,3,11(%r1)
-    b205100b|	gnu	stck 11(%r1)      
-    b207100b|	gnu	stckc 11(%r1)     
-    b278100b|	gnu	stcke 11(%r1)     
-    b27c100b|	gnu	stckf 11(%r1)     
+    b205100b|	gnu	stck 11(%r1)
+    b207100b|	gnu	stckc 11(%r1)
+    b278100b|	gnu	stcke 11(%r1)
+    b27c100b|	gnu	stckf 11(%r1)
     b680100b|	gnu	stctl %c8,%c0,11(%r1)
 eb80100b0025|	gnu	stctg %c8,%c0,11(%r1)
-    b212100b|	gnu	stap 11(%r1)      
-    b202100b|	gnu	stidp 11(%r1)     
-    b209100b|	gnu	stpt 11(%r1)      
-    b2b1100b|	gnu	stfl 11(%r1)      
-    b2b0100b|	gnu	stfle 11(%r1)     
-    b29c100b|	gnu	stfpc 11(%r1)     
+    b212100b|	gnu	stap 11(%r1)
+    b202100b|	gnu	stidp 11(%r1)
+    b209100b|	gnu	stpt 11(%r1)
+    b2b1100b|	gnu	stfl 11(%r1)
+    b2b0100b|	gnu	stfle 11(%r1)
+    b29c100b|	gnu	stfpc 11(%r1)
 e382100b0049|	gnu	stgsc %r8,11(%r2,%r1)
     4082100b|	gnu	sth %r8,11(%r2,%r1)
 e382100b0070|	gnu	sthy %r8,11(%r2,%r1)
 e382100b00c7|	gnu	sthh %r8,11(%r2,%r1)
-c48700000000|	gnu	sthrl %r8,0x109c  
+c48700000000|	gnu	sthrl %r8,0x109c
 e382100b00cb|	gnu	stfh %r8,11(%r2,%r1)
 eb83100b00e1|	gnu	stocfhnle %r8,11(%r1)
     9080100b|	gnu	stm %r8,%r0,11(%r1)
@@ -931,67 +931,67 @@ eb80100b0026|	gnu	stmh %r8,%r0,11(%r1)
 eb83100b00f3|	gnu	stocnle %r8,11(%r1)
 eb83100b00e3|	gnu	stocgnle %r8,11(%r1)
 e382100b008e|	gnu	stpq %r8,11(%r2,%r1)
-    b211100b|	gnu	stpx 11(%r1)      
+    b211100b|	gnu	stpx 11(%r1)
 e5022006100b|	gnu	strag 6(%r2),11(%r1)
-c48f00000000|	gnu	strl %r8,0x10e0   
-c48b00000000|	gnu	stgrl %r8,0x10e6  
+c48f00000000|	gnu	strl %r8,0x10e0
+c48b00000000|	gnu	stgrl %r8,0x10e6
 e382100b003f|	gnu	strvh %r8,11(%r2,%r1)
 e382100b003e|	gnu	strv %r8,11(%r2,%r1)
 e382100b002f|	gnu	strvg %r8,11(%r2,%r1)
-    b234100b|	gnu	stsch 11(%r1)     
-    b27d100b|	gnu	stsi 11(%r1)      
-    ac082006|	gnu	stnsm 6(%r2),8    
-    ad082006|	gnu	stosm 6(%r2),8    
-    b2460080|	gnu	stura %r8,%r0     
-    b9250080|	gnu	sturg %r8,%r0     
-    5b82100b|	gnu	s %r8,11(%r2,%r1) 
-        1b80|	gnu	sr %r8,%r0        
-    b9f90080|	gnu	srk %r8,%r0,%r0   
+    b234100b|	gnu	stsch 11(%r1)
+    b27d100b|	gnu	stsi 11(%r1)
+    ac082006|	gnu	stnsm 6(%r2),8
+    ad082006|	gnu	stosm 6(%r2),8
+    b2460080|	gnu	stura %r8,%r0
+    b9250080|	gnu	sturg %r8,%r0
+    5b82100b|	gnu	s %r8,11(%r2,%r1)
+        1b80|	gnu	sr %r8,%r0
+    b9f90080|	gnu	srk %r8,%r0,%r0
 e382100b005b|	gnu	sy %r8,11(%r2,%r1)
 e382100b0009|	gnu	sg %r8,11(%r2,%r1)
-    b9090080|	gnu	sgr %r8,%r0       
-    b9e90080|	gnu	sgrk %r8,%r0,%r0  
+    b9090080|	gnu	sgr %r8,%r0
+    b9e90080|	gnu	sgrk %r8,%r0,%r0
 e382100b0019|	gnu	sgf %r8,11(%r2,%r1)
-    b9190080|	gnu	sgfr %r8,%r0      
-    b34b0080|	gnu	sxbr %f8,%f0      
-    b3db0080|	gnu	sxtr %f8,%f0,%f0  
+    b9190080|	gnu	sgfr %r8,%r0
+    b34b0080|	gnu	sxbr %f8,%f0
+    b3db0080|	gnu	sxtr %f8,%f0,%f0
     b3db0180|	gnu	sxtra %f8,%f0,%f0,1
 ed82100b001b|	gnu	sdb %f8,11(%r2,%r1)
-    b31b0080|	gnu	sdbr %f8,%f0      
-    b3d30080|	gnu	sdtr %f8,%f0,%f0  
+    b31b0080|	gnu	sdbr %f8,%f0
+    b3d30080|	gnu	sdtr %f8,%f0,%f0
     b3d30180|	gnu	sdtra %f8,%f0,%f0,1
 ed82100b000b|	gnu	seb %f8,11(%r2,%r1)
-    b30b0080|	gnu	sebr %f8,%f0      
+    b30b0080|	gnu	sebr %f8,%f0
 fb332006100b|	gnu	sp 6(4,%r2),11(4,%r1)
     4b82100b|	gnu	sh %r8,11(%r2,%r1)
 e382100b007b|	gnu	shy %r8,11(%r2,%r1)
 e382100b0039|	gnu	sgh %r8,11(%r2,%r1)
-    b9c90080|	gnu	shhhr %r8,%r0,%r0 
-    b9d90080|	gnu	shhlr %r8,%r0,%r0 
+    b9c90080|	gnu	shhhr %r8,%r0,%r0
+    b9d90080|	gnu	shhlr %r8,%r0,%r0
     5f82100b|	gnu	sl %r8,11(%r2,%r1)
-        1f80|	gnu	slr %r8,%r0       
-    b9fb0080|	gnu	slrk %r8,%r0,%r0  
+        1f80|	gnu	slr %r8,%r0
+    b9fb0080|	gnu	slrk %r8,%r0,%r0
 e382100b005f|	gnu	sly %r8,11(%r2,%r1)
 e382100b000b|	gnu	slg %r8,11(%r2,%r1)
-    b90b0080|	gnu	slgr %r8,%r0      
-    b9eb0080|	gnu	slgrk %r8,%r0,%r0 
+    b90b0080|	gnu	slgr %r8,%r0
+    b9eb0080|	gnu	slgrk %r8,%r0,%r0
 e382100b001b|	gnu	slgf %r8,11(%r2,%r1)
-    b91b0080|	gnu	slgfr %r8,%r0     
+    b91b0080|	gnu	slgfr %r8,%r0
     b9cb0080|	gnu	slhhhr %r8,%r0,%r0
     b9db0080|	gnu	slhhlr %r8,%r0,%r0
-c28500000008|	gnu	slfi %r8,8        
-c28400000008|	gnu	slgfi %r8,8       
+c28500000008|	gnu	slfi %r8,8
+c28400000008|	gnu	slgfi %r8,8
 e382100b0099|	gnu	slb %r8,11(%r2,%r1)
-    b9990080|	gnu	slbr %r8,%r0      
+    b9990080|	gnu	slbr %r8,%r0
 e382100b0089|	gnu	slbg %r8,11(%r2,%r1)
-    b9890080|	gnu	slbgr %r8,%r0     
-        3780|	gnu	sxr %f8,%f0       
+    b9890080|	gnu	slbgr %r8,%r0
+        3780|	gnu	sxr %f8,%f0
     6b82100b|	gnu	sd %f8,11(%r2,%r1)
-        2b80|	gnu	sdr %f8,%f0       
+        2b80|	gnu	sdr %f8,%f0
     7b82100b|	gnu	se %f8,11(%r2,%r1)
-        3b80|	gnu	ser %f8,%f0       
+        3b80|	gnu	ser %f8,%f0
     6f82100b|	gnu	sw %f8,11(%r2,%r1)
-        2f80|	gnu	swr %f8,%f0       
+        2f80|	gnu	swr %f8,%f0
     7f82100b|	gnu	su %f8,11(%r2,%r1)
         3f80|	gnu	sur %f8,%f0
         0a7c|	gnu	svc 124
@@ -1032,7 +1032,7 @@ dd032006100b|	gnu	trt 6(4,%r2),11(%r1)
     b9bf3080|	gnu	trte %r8,%r0,3
 d0032006100b|	gnu	trtr 6(4,%r2),11(%r1)
     b9bd3080|	gnu	trtre %r8,%r0,3
-    b2a50080|	gnu	tre %r8,%r0    
+    b2a50080|	gnu	tre %r8,%r0
     b9933080|	gnu	troo %r8,%r0,3
     b9923080|	gnu	trot %r8,%r0,3
     b9913080|	gnu	trto %r8,%r0,3
@@ -1136,10 +1136,10 @@ e722100b3801|	gnu	vleh %v18,11(%r2,%r1),3
 e722100b3803|	gnu	vlef %v18,11(%r2,%r1),3
 e722100b3802|	gnu	vleg %v18,11(%r2,%r1),3
 e722100b3800|	gnu	vleb %v18,11(%r2,%r1),3
-e72000083841|	gnu	vleih %v18,8,3
-e72000083843|	gnu	vleif %v18,8,3
-e72000083842|	gnu	vleig %v18,8,3
-e72000083840|	gnu	vleib %v18,8,3
+e720ffff3841|	gnu	vleih %v18,-1,3
+e720ffff3843|	gnu	vleif %v18,-1,3
+e720ffff3842|	gnu	vleig %v18,-1,3
+e720ffff3840|	gnu	vleib %v18,-1,3
 e622100b3807|	gnu	vlerg %v18,11(%r2,%r1)
 e723009138c7|	gnu	vfidb %v18,%v3,1,9
 e785100b1021|	gnu	vlgvh %r8,%v5,11(%r1)
diff --git a/s390x/s390xmap/map.go b/s390x/s390xmap/map.go
index 9ba698f4..1adfdfbe 100644
--- a/s390x/s390xmap/map.go
+++ b/s390x/s390xmap/map.go
@@ -272,7 +272,7 @@ func computeMaskValueReserved(args Args, text string) (mask, value, reserved uin
 }
 
 func Imm_signed_8bit_check(op string) bool {
-	imm_8 := []string{"ASI", "AGSI", "ALSI", "ALGSI", "CIB", "CGIB", "CIJ", "CGIJ"}
+	imm_8 := []string{"ASI", "AGSI", "ALSI", "ALGSI", "CIB", "CGIB", "CIJ", "CGIJ", "NI", "NIY", "OI", "OIY", "XI", "XIY"}
 	var ret bool
 	ret = false
 	for _, str := range imm_8 {
@@ -285,7 +285,7 @@ func Imm_signed_8bit_check(op string) bool {
 }
 
 func Imm_signed_16bit_check(op string) bool {
-	imm_16 := []string{"AHI", "AGHI", "ALHSIK", "ALGHSIK", "AHIK", "AGHIK", "LHI", "LGHI", "MVGHI", "CIT", "CGIT", "CGHI", "CGHSI", "CHHSI", "CHI", "CHSI", "CRJ", "CGRJ"}
+	imm_16 := []string{"AHI", "AGHI", "ALHSIK", "ALGHSIK", "AHIK", "AGHIK", "LHI", "LGHI", "MVGHI", "CIT", "CGIT", "CGHI", "CGHSI", "CHHSI", "CHI", "CHSI", "CRJ", "CGRJ", "NIHH", "NILL", "NIHL", "NILH", "LLIHH", "LLILL", "LLIHL", "LLILH", "OIHH", "OILL", "OIHL", "OILH", "VLEIB", "VLEIH", "VLEIF", "VLEIG"}
 	var ret bool
 	ret = false
 	for _, str := range imm_16 {
@@ -298,7 +298,7 @@ func Imm_signed_16bit_check(op string) bool {
 }
 
 func Imm_signed_32bit_check(op string) bool {
-	imm_32 := []string{"AFI", "AGFI", "AIH", "CIH", "CFI", "CGFI", "CRL", "STRL", "STGRL"}
+	imm_32 := []string{"AFI", "AGFI", "AIH", "CIH", "CFI", "CGFI", "CRL", "STRL", "STGRL", "LGFI", "LLIHF", "LLILF", "MSFI", "MSGFI", "MGHI", "MHI", "NIHF", "NILF", "OILF", "OIHF", "XILF", "XIHF"}
 	var ret bool
 	ret = false
 	for _, str := range imm_32 {
@@ -326,7 +326,6 @@ func check_flags(flags string) bool {
 // detected instructions into p. One entry may generate multiple intruction
 // entries as each extended mnemonic listed in text is treated like a unique
 // instruction.
-// func add(p *Prog, text, mnemonics, encoding, format string) {
 func add(p *Prog, text, mnemonics, encoding, flags string) {
 	// Parse encoding, building size and offset of each field.
 	// The first field in the encoding is the smallest offset.
@@ -338,7 +337,6 @@ func add(p *Prog, text, mnemonics, encoding, flags string) {
 	mask, value, dontCare := computeMaskValueReserved(args, text)
 
 	// split mnemonics into individual instructions
-	// example: "b target_addr (AA=0 LK=0)|ba target_addr (AA=1 LK=0)|bl target_addr (AA=0 LK=1)|bla target_addr (AA=1 LK=1)"
 	inst := Inst{Text: text, Encoding: mnemonics, Value: value, Mask: mask, DontCare: dontCare}
 
 	// order inst.Args according to mnemonics order

From 153a480e3389d31cbec417e0df8ae6a72cba4702 Mon Sep 17 00:00:00 2001
From: Srinivas Pokala <Pokala.Srinivas@ibm.com>
Date: Wed, 16 Oct 2024 08:15:10 +0200
Subject: [PATCH 034/200] s390x: add s390x disassembler support for the plan9

Change-Id: I0eb9a10535175bb70dbab5737c4e02e68fd44c94
Reviewed-on: https://go-review.googlesource.com/c/arch/+/620475
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Vishwanatha HD <vishwanatha.hd@ibm.com>
---
 s390x/s390xasm/decode_test.go      |    2 +
 s390x/s390xasm/plan9.go            | 1282 ++++++++++++++++++++++++++++
 s390x/s390xasm/testdata/decode.txt |  162 ++++
 3 files changed, 1446 insertions(+)
 create mode 100644 s390x/s390xasm/plan9.go
 create mode 100644 s390x/s390xasm/testdata/decode.txt

diff --git a/s390x/s390xasm/decode_test.go b/s390x/s390xasm/decode_test.go
index 5ca0b741..29bce8e8 100644
--- a/s390x/s390xasm/decode_test.go
+++ b/s390x/s390xasm/decode_test.go
@@ -75,6 +75,8 @@ func decode(data []byte, t *testing.T, filename string) {
 			switch syntax {
 			case "gnu":
 				out = GNUSyntax(inst, pc)
+			case "plan9":
+				out = GoSyntax(inst, pc, nil)
 			default:
 				t.Errorf("unknown syntax %q", syntax)
 				continue
diff --git a/s390x/s390xasm/plan9.go b/s390x/s390xasm/plan9.go
new file mode 100644
index 00000000..b4df0b89
--- /dev/null
+++ b/s390x/s390xasm/plan9.go
@@ -0,0 +1,1282 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package s390xasm
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+var vectorSize = map[int]string{0: "B", 1: "H", 2: "F", 3: "G", 4: "Q"}
+var vectorCS = map[int]string{0: "BS", 1: "HS", 2: "FS", 3: "GS"}
+
+// GoSyntax returns the Go assembler syntax for the instruction.
+// The syntax was originally defined by Plan 9.
+// The inst relates to single instruction.
+// The pc is the program counter of the instruction, used for
+// expanding PC-relative addresses into absolute ones.
+// The symname function queries the symbol table for the program
+// being disassembled. Given a target address it returns the name
+// and base address of the symbol containing the target, if any;
+// otherwise it returns "", 0.
+func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) string {
+	if symname == nil {
+		symname = func(uint64) (string, uint64) { return "", 0 }
+	}
+
+	var args []string
+	opString := inst.Op.String()
+	op := strings.ToUpper(opString)
+	for i := 0; i < len(inst.Args); i++ {
+		if inst.Args[i] == nil {
+			break
+		}
+		switch inst.Args[i].(type) {
+		case Disp12, Disp20:
+			var temp []string
+			switch inst.Args[i+1].(type) {
+			case Index: // D(X,B)
+				for j := 0; j < 3; j++ {
+					temp = append(temp, plan9Arg(&inst, pc, symname, inst.Args[i+j]))
+				}
+				args = append(args, mem_operandx(temp))
+				i = i + 2
+			case Base: // D(B)
+				for j := 0; j < 2; j++ {
+					temp = append(temp, plan9Arg(&inst, pc, symname, inst.Args[i+j]))
+				}
+				args = append(args, mem_operand(temp))
+				i = i + 1
+			case VReg: // D(B)
+				for j := 0; j < 3; j++ {
+					temp = append(temp, plan9Arg(&inst, pc, symname, inst.Args[i+j]))
+				}
+				args = append(args, mem_operandv(temp))
+				i = i + 2
+			case Len: // D(L,B)
+				for j := 0; j < 3; j++ {
+					temp = append(temp, plan9Arg(&inst, pc, symname, inst.Args[i+j]))
+				}
+				ar1, ar2 := mem_operandl(temp)
+				args = append(args, ar1, ar2)
+				i = i + 2
+			default: // D(R,B)
+				for j := 0; j < 3; j++ {
+					temp = append(temp, plan9Arg(&inst, pc, symname, inst.Args[i+j]))
+				}
+				args = append(args, mem_operandx(temp))
+				i = i + 2
+			}
+		default:
+			args = append(args, plan9Arg(&inst, pc, symname, inst.Args[i]))
+		}
+	}
+	if strings.HasPrefix(op, "V") || strings.Contains(op, "WFC") || strings.Contains(op, "WFK") {
+		args = args[:len(args)-1]
+	}
+
+	switch inst.Op {
+	default:
+		switch len(args) {
+		case 0:
+			return op
+		case 1:
+			return fmt.Sprintf("%s %s", op, args[0])
+		case 2:
+			if reverseOperandOrder(inst.Op) {
+				args[0], args[1] = args[1], args[0]
+			}
+		case 3:
+			if reverseOperandOrder(inst.Op) {
+				args[0], args[2] = args[2], args[0]
+			} else if reverseAllOperands(inst.Op) {
+				args[0], args[1], args[2] = args[1], args[2], args[0]
+			}
+		case 4:
+			if reverseOperandOrder(inst.Op) {
+				args[0], args[3] = args[3], args[0]
+			} else if reverseAllOperands(inst.Op) {
+				args[0], args[1], args[2], args[3] = args[1], args[2], args[3], args[0]
+			}
+		}
+	case LCGR, LCGFR:
+		switch inst.Op {
+		case LCGR:
+			op = "NEG"
+		case LCGFR:
+			op = "NEGW"
+		}
+		if args[0] == args[1] {
+			args = args[:1]
+		} else {
+			args[0], args[1] = args[1], args[0]
+		}
+	case LD, LE, LG, LGF, LLGF, LGH, LLGH, LGB, LLGC, LDY, LEY, LRVG, LRV, LRVH:
+		args[0], args[1] = args[1], args[0]
+		switch inst.Op {
+		case LG:
+			op = "MOVD"
+		case LGF:
+			op = "MOVW"
+		case LLGF:
+			op = "MOVWZ"
+		case LGH:
+			op = "MOVH"
+		case LLGH:
+			op = "MOVHZ"
+		case LGB:
+			op = "MOVB"
+		case LLGC:
+			op = "MOVBZ"
+		case LDY, LD:
+			op = "FMOVD"
+		case LEY, LE:
+			op = "FMOVS"
+		case LRVG:
+			op = "MOVDBR"
+		case LRV:
+			op = "MOVWBR"
+		case LRVH:
+			op = "MOVHBR"
+		}
+	case LA, LAY:
+		args[0], args[1] = args[1], args[0]
+		op = "MOVD"
+
+	case LAA, LAAG, LAAL, LAALG, LAN, LANG, LAX, LAXG, LAO, LAOG:
+		args[0], args[1] = args[1], args[0]
+	case LM, LMY, LMG: // Load Multiple
+		switch inst.Op {
+		case LM, LMY:
+			op = "LMY"
+		}
+		args[0], args[1], args[2] = args[2], args[0], args[1]
+
+	case STM, STMY, STMG: // Store Multiple
+		switch inst.Op {
+		case STM, STMY:
+			op = "STMY"
+		}
+	case ST, STY, STG, STHY, STCY, STRVG, STRV:
+		switch inst.Op {
+		case ST, STY:
+			op = "MOVW"
+		case STHY:
+			op = "MOVH"
+		case STCY:
+			op = "MOVB"
+		case STG:
+			op = "MOVD"
+		case STRVG:
+			op = "MOVDBR"
+		case STRV:
+			op = "MOVWBR"
+		}
+	case LGR, LGFR, LGHR, LGBR, LLGFR, LLGHR, LLGCR, LRVGR, LRVR, LDR:
+		switch inst.Op {
+		case LGR:
+			op = "MOVD"
+		case LGFR:
+			op = "MOVW"
+		case LGHR:
+			op = "MOVH"
+		case LGBR:
+			op = "MOVB"
+		case LLGFR:
+			op = "MOVWZ"
+		case LLGHR:
+			op = "MOVHZ"
+		case LLGCR:
+			op = "MOVBZ"
+		case LRVGR:
+			op = "MOVDBR"
+		case LRVR:
+			op = "MOVWBR"
+		case LDR:
+			op = "FMOVD"
+		}
+		args[0], args[1] = args[1], args[0]
+	case LZDR:
+		op = "FMOVD"
+		return op + " " + "$0" + ", " + args[0]
+	case LZER:
+		op = "FMOVS"
+		return op + " " + "$0" + ", " + args[0]
+	case STD, STDY, STE, STEY:
+		switch inst.Op {
+		case STD, STDY:
+			op = "FMOVD"
+		case STE, STEY:
+			op = "FMOVS"
+		}
+
+	case LGHI, LLILH, LLIHL, LLIHH, LGFI, LLILF, LLIHF:
+		switch inst.Op {
+		case LGFI:
+			op = "MOVW"
+		case LGHI:
+			num, err := strconv.ParseInt(args[1][1:], 10, 16)
+			if err != nil {
+				return fmt.Sprintf("plan9Arg: error in converting ParseInt:%s", err)
+			}
+			if num == int64(int8(num)) {
+				op = "MOVB"
+			} else {
+				op = "MOVH"
+			}
+		default:
+			op = "MOVD"
+		}
+		args[0], args[1] = args[1], args[0]
+	case ARK, AGRK, ALGRK:
+		switch inst.Op {
+		case ARK:
+			op = "ADDW"
+		case AGRK:
+			op = "ADD"
+		case ALGRK:
+			op = "ADDC"
+		}
+		if args[0] == args[1] {
+			args[0], args[1] = args[2], args[0]
+			args = args[:2]
+		} else {
+			args[0], args[1], args[2] = args[2], args[1], args[0]
+		}
+	case AGHIK, AHIK, ALGHSIK:
+		num, err := strconv.ParseInt(args[2][1:], 10, 32)
+		if err != nil {
+			return fmt.Sprintf("plan9Arg: error in converting ParseInt:%s", err)
+		}
+		switch inst.Op {
+		case AGHIK:
+			if num < 0 {
+				op = "SUB"
+				args[2] = args[2][:1] + args[2][2:]
+			} else {
+				op = "ADD"
+			}
+		case AHIK:
+			op = "ADDW"
+		case ALGHSIK:
+			if num < 0 {
+				op = "SUBC"
+				args[2] = args[2][:1] + args[2][2:]
+			} else {
+				op = "ADDC"
+			}
+		}
+		args[0], args[1], args[2] = args[2], args[1], args[0]
+	case AGHI, AHI, AGFI, AFI, AR, ALCGR:
+		num, err := strconv.ParseInt(args[1][1:], 10, 32)
+		if err != nil {
+			return fmt.Sprintf("plan9Arg: error in converting ParseInt:%s", err)
+		}
+		switch inst.Op {
+		case AGHI, AGFI:
+			if num < 0 {
+				op = "SUB"
+				args[1] = args[1][:1] + args[1][2:]
+			} else {
+				op = "ADD"
+			}
+		case AHI, AFI, AR:
+			op = "ADDW"
+		case ALCGR:
+			op = "ADDE"
+		}
+		args[0], args[1] = args[1], args[0]
+	case AEBR, ADBR, DDBR, DEBR, MDBR, MEEBR, SDBR, SEBR, LPDBR, LNDBR, LPDFR, LNDFR, LCDFR, LCEBR, LEDBR, LDEBR, SQDBR, SQEBR:
+		switch inst.Op {
+		case AEBR:
+			op = "FADDS"
+		case ADBR:
+			op = "FADD"
+		case DDBR:
+			op = "FDIV"
+		case DEBR:
+			op = "FDIVS"
+		case MDBR:
+			op = "FMUL"
+		case MEEBR:
+			op = "FMULS"
+		case SDBR:
+			op = "FSUB"
+		case SEBR:
+			op = "FSUBS"
+		case LPDBR:
+			op = "FABS"
+		case LNDBR:
+			op = "FNABS"
+		case LCDFR:
+			op = "FNEG"
+		case LCEBR:
+			op = "FNEGS"
+		case SQDBR:
+			op = "FSQRT"
+		case SQEBR:
+			op = "FSQRTS"
+		}
+		args[0], args[1] = args[1], args[0]
+	case SR, SGR, SLGR, SLFI:
+		switch inst.Op {
+		case SR, SLFI:
+			op = "SUBW"
+		case SGR:
+			op = "SUB"
+		case SLGR:
+			op = "SUBC"
+		}
+		args[0], args[1] = args[1], args[0]
+	case SGRK, SLGRK, SRK:
+		switch inst.Op {
+		case SGRK:
+			op = "SUB"
+		case SLGRK:
+			op = "SUBC"
+		case SRK:
+			op = "SUBW"
+		}
+		if args[0] == args[1] {
+			args[0], args[1] = args[2], args[0]
+			args = args[:2]
+		} else {
+			args[0], args[1], args[2] = args[2], args[1], args[0]
+		}
+	case SLBGR:
+		op = "SUBE"
+		args[0], args[1] = args[1], args[0]
+	case MSGFR, MHI, MSFI, MSGFI:
+		switch inst.Op {
+		case MSGFR, MHI, MSFI:
+			op = "MULLW"
+		case MSGFI:
+			op = "MULLD"
+		}
+		args[0], args[1] = args[1], args[0]
+
+	case NGR, NR, NILL, NILF, NILH, OGR, OR, OILL, OILF, OILH, XGR, XR, XILF:
+		op = bitwise_op(inst.Op)
+		args[0], args[1] = args[1], args[0]
+		switch inst.Op {
+		case NILL:
+			if int(inst.Args[1].(Sign16)) < 0 {
+				op = "ANDW"
+			}
+
+		case NILF:
+			if int(inst.Args[1].(Sign32)) < 0 {
+				op = "AND"
+			}
+		case OILF:
+			if int(inst.Args[1].(Sign32)) < 0 {
+				op = "ORW"
+			}
+		case XILF:
+			if int(inst.Args[1].(Sign32)) < 0 {
+				op = "XORW"
+			}
+		}
+
+	case NGRK, NRK, OGRK, ORK, XGRK, XRK: // opcode R1, R2, R3
+		op = bitwise_op(inst.Op)
+		args[0], args[1], args[2] = args[1], args[2], args[0]
+	case SLLG, SRLG, SLLK, SRLK, RLL, RLLG, SRAK, SRAG:
+		switch inst.Op {
+		case SLLG:
+			op = "SLD"
+		case SRLG:
+			op = "SRD"
+		case SLLK:
+			op = "SLW"
+		case SRLK:
+			op = "SRW"
+		case SRAK:
+			op = "SRAW"
+		case SRAG:
+			op = "SRAD"
+		}
+		args[0], args[1], args[2] = args[2], args[1], args[0]
+	case TRAP2, SVC:
+		op = "SYSALL"
+	case CR, CLR, CGR, CLGR, KDBR, CDBR, CEBR, CGHI, CHI, CGFI, CLGFI, CFI, CLFI:
+		switch inst.Op {
+		case CGHI, CGFI, CGR:
+			op = "CMP"
+		case CHI, CFI, CR:
+			op = "CMPW"
+		case CLGFI, CLGR:
+			op = "CMPU"
+		case CLFI, CLR:
+			op = "CMPWU"
+		case CDBR:
+			op = "FCMPU"
+		case KDBR:
+			op = "FCMPO"
+		}
+	case CEFBRA, CDFBRA, CEGBRA, CDGBRA, CELFBR, CDLFBR, CELGBR, CDLGBR, CFEBRA, CFDBRA, CGEBRA, CGDBRA, CLFEBR, CLFDBR, CLGEBR, CLGDBR:
+		args[0], args[1] = args[2], args[0]
+		args = args[:2]
+	case CGRJ, CGIJ:
+		mask, err := strconv.Atoi(args[2][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		var check bool
+		switch mask & 0xf {
+		case 2:
+			op = "CMPBGT"
+			check = true
+		case 4:
+			op = "CMPBLT"
+			check = true
+		case 6:
+			op = "CMPBNE"
+			check = true
+		case 8:
+			op = "CMPBEQ"
+			check = true
+		case 10:
+			op = "CMPBGE"
+			check = true
+		case 12:
+			op = "CMPBLE"
+			check = true
+		}
+		if check {
+			args[2] = args[3]
+			args = args[:3]
+		}
+	case CLGRJ, CLGIJ:
+		mask, err := strconv.Atoi(args[2][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		var check bool
+		switch mask & 0xf {
+		case 2:
+			op = "CMPUBGT"
+			check = true
+		case 4:
+			op = "CMPUBLT"
+			check = true
+		case 7:
+			op = "CMPUBNE"
+			check = true
+		case 8:
+			op = "CMPUBEQ"
+			check = true
+		case 10:
+			op = "CMPUBGE"
+			check = true
+		case 12:
+			op = "CMPUBLE"
+			check = true
+		}
+		if check {
+			args[2] = args[3]
+			args = args[:3]
+		}
+	case CLRJ, CRJ, CIJ, CLIJ:
+		args[0], args[1], args[2], args[3] = args[2], args[0], args[1], args[3]
+	case BRC, BRCL:
+		mask, err := strconv.Atoi(args[0][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		opStr, check := branch_relative_op(mask, inst.Op)
+		if opStr != "" {
+			op = opStr
+		}
+		if check {
+			args[0] = args[1]
+			args = args[:1]
+		}
+	case BCR:
+		mask, err := strconv.Atoi(args[0][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		opStr, check := branchOnConditionOp(mask, inst.Op)
+		if opStr != "" {
+			op = opStr
+		}
+		if op == "SYNC" || op == "NOPH" {
+			return op
+		}
+		if check {
+			args[0] = args[1]
+			args = args[:1]
+		}
+	case LOCGR:
+		mask, err := strconv.Atoi(args[2][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		var check bool
+		switch mask & 0xf {
+		case 2: //Greaterthan (M=2)
+			op = "MOVDGT"
+			check = true
+		case 4: //Lessthan (M=4)
+			op = "MOVDLT"
+			check = true
+		case 7: // Not Equal (M=7)
+			op = "MOVDNE"
+			check = true
+		case 8: // Equal (M=8)
+			op = "MOVDEQ"
+			check = true
+		case 10: // Greaterthan or Equal (M=10)
+			op = "MOVDGE"
+			check = true
+		case 12: // Lessthan or Equal (M=12)
+			op = "MOVDLE"
+			check = true
+		}
+		if check {
+			args[0], args[1] = args[1], args[0]
+			args = args[:2]
+		} else {
+			args[0], args[1], args[2] = args[2], args[1], args[0]
+		}
+	case BRASL:
+		op = "CALL" // BL
+		args[0] = args[1]
+		args = args[:1]
+	case X, XY, XG:
+		switch inst.Op {
+		case X, XY:
+			op = "XORW"
+		case XG:
+			op = "XOR"
+		}
+	case N, NY, NG, O, OY, OG, XC, NC, OC, MVC, MVCIN, CLC:
+		switch inst.Op {
+		case N, NY:
+			op = "ANDW"
+		case NG:
+			op = "AND"
+		case O, OY:
+			op = "ORW"
+		case OG:
+			op = "OR"
+		}
+		args[0], args[1] = args[1], args[0]
+	case S, SY, SLBG, SLG, SG:
+		switch inst.Op {
+		case S, SY:
+			op = "SUBW"
+		case SLBG:
+			op = "SUBE"
+		case SLG:
+			op = "SUBC"
+		case SG:
+			op = "SUB"
+		}
+		args[0], args[1] = args[1], args[0]
+	case MSG, MSY, MS:
+		switch inst.Op {
+		case MSG:
+			op = "MULLD"
+		case MSY, MS:
+			op = "MULLW"
+		}
+	case A, AY, ALCG, ALG, AG:
+		switch inst.Op {
+		case A, AY:
+			op = "ADDW"
+		case ALCG:
+			op = "ADDE"
+		case ALG:
+			op = "ADDC"
+		case AG:
+			op = "ADD"
+		}
+		args[0], args[1] = args[1], args[0]
+	case RISBG, RISBGN, RISBHG, RISBLG, RNSBG, RXSBG, ROSBG:
+		switch inst.Op {
+		case RNSBG, RXSBG, ROSBG:
+			num, err := strconv.Atoi(args[2][1:])
+			if err != nil {
+				return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+			}
+			if ((num >> 7) & 0x1) != 0 {
+				op = op + "T"
+			}
+		case RISBG, RISBGN, RISBHG, RISBLG:
+			num, err := strconv.Atoi(args[3][1:])
+			if err != nil {
+				return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+			}
+			if ((num >> 7) & 0x1) != 0 {
+				op = op + "Z"
+			}
+		}
+		if len(args) == 5 {
+			args[0], args[1], args[2], args[3], args[4] = args[2], args[3], args[4], args[1], args[0]
+		} else {
+			args[0], args[1], args[2], args[3] = args[2], args[3], args[1], args[0]
+		}
+
+	case VEC, VECL, VCLZ, VCTZ, VREPI, VPOPCT: //mnemonic V1, V2, M3
+		mask, err := strconv.Atoi(args[2][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi for %q:%s", op, err)
+		}
+		val := mask & 0x7
+		if val >= 0 && val < 4 {
+			op = op + vectorSize[val]
+			args = args[:2]
+		} else {
+			return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask)
+		}
+		switch inst.Op {
+		case VCLZ, VCTZ, VREPI, VPOPCT:
+			args[0], args[1] = args[1], args[0]
+		default:
+		}
+		//Mnemonic V1, V2, V3, M4 or Mnemonic V1, I2, I3, M4 or Mnemonic V1, V3, I2, M4
+	case VA, VS, VACC, VAVG, VAVGL, VMX, VMXL, VMN, VMNL, VGFM, VGM, VREP, VERLLV, VESLV, VSCBI, VSUM, VSUMG, VSUMQ, VMH, VMLH, VML, VME, VMLE, VMO, VMLO:
+		mask, err := strconv.Atoi(args[3][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		val := mask & 0x7
+		switch inst.Op {
+		case VA, VS, VACC, VSCBI:
+			if val >= 0 && val < 5 {
+				if args[0] == args[2] {
+					args[0], args[1] = args[1], args[0]
+					args = args[:2]
+				} else if inst.Op == VS {
+					if args[0] == args[1] {
+						args[0] = args[2]
+						args = args[:2]
+					} else {
+						args[0], args[2] = args[2], args[0]
+						args = args[:3]
+					}
+				} else {
+					args[0], args[1], args[2] = args[1], args[2], args[0]
+					args = args[:3]
+				}
+				op = op + vectorSize[val]
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask)
+			}
+		case VAVG, VAVGL, VMX, VMXL, VMN, VMNL, VGFM, VGM:
+			if val >= 0 && val < 4 {
+				op = op + vectorSize[val]
+				args[0], args[1], args[2] = args[1], args[2], args[0]
+				args = args[:3]
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask)
+			}
+		case VREP, VERLLV, VESLV:
+			if val >= 0 && val < 4 {
+				op = op + vectorSize[val]
+				args[0], args[1], args[2] = args[2], args[1], args[0]
+				args = args[:3]
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask)
+			}
+		case VSUM, VSUMG, VSUMQ:
+			var off int
+			switch inst.Op {
+			case VSUM:
+				off = 0
+			case VSUMG:
+				off = 1
+			case VSUMQ:
+				off = 2
+			}
+			if (val > (-1 + off)) && (val < (2 + off)) {
+				op = op + vectorSize[val]
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask)
+			}
+			args = args[:3]
+		case VML, VMH, VMLH, VME, VMLE, VMO, VMLO:
+			if val >= 0 && val < 3 {
+				op = op + vectorSize[val]
+			}
+			if op == "VML" && val == 2 {
+				op = op + "W"
+			}
+			if args[0] == args[2] {
+				args[0], args[1] = args[1], args[0]
+				args = args[:2]
+			} else {
+				args[0], args[1], args[2] = args[1], args[2], args[0]
+				args = args[:3]
+			}
+		}
+
+	case VGFMA, VERIM, VMAH, VMALH: // Mnemonic V1, V2, V3, V4/I4, M5
+		mask, err := strconv.Atoi(args[4][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		val := mask & 0x7
+		args = args[:4]
+		var off int
+		switch inst.Op {
+		case VMAH, VMALH:
+			off = -1
+		}
+
+		if val >= 0 && val < (4+off) {
+			op = op + vectorSize[val]
+		} else {
+			return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask)
+		}
+		switch inst.Op {
+		case VGFMA, VMAH, VMALH:
+			args[0], args[1], args[2], args[3] = args[1], args[2], args[3], args[0]
+		default:
+			args[0], args[3] = args[3], args[0]
+		}
+	case VSTRC, VFAE, VFEE, VFENE:
+		var off uint8
+		switch inst.Op {
+		case VSTRC:
+			off = uint8(1)
+		default:
+			off = uint8(0)
+		}
+		m1, err := strconv.Atoi(args[3+off][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		m2, err := strconv.Atoi(args[4+off][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		index := m1 & 0x3
+		if index < 0 || index > 2 {
+			return fmt.Sprintf("specification exception is recognized for %q with mask values: %v, %v \n", op, m1, m2)
+		}
+		switch m2 {
+		case 0:
+			op = op + vectorSize[index]
+		case 1:
+			op = op + vectorCS[index]
+		case 2:
+			op = op + "Z" + vectorSize[index]
+		case 3:
+			op = op + "Z" + vectorCS[index]
+		default:
+			return fmt.Sprintf("specification exception is recognized for %q with mask values: %v, %v \n", op, m1, m2)
+		}
+		switch inst.Op {
+		case VSTRC:
+			args[0], args[1], args[2], args[3] = args[1], args[2], args[3], args[0]
+		default:
+			args[0], args[1], args[2] = args[1], args[2], args[0]
+		}
+		args = args[:3+off]
+
+	case VCEQ, VCH, VCHL: // Mnemonic V1, V2, V3, M4, M5
+		m4, err := strconv.Atoi(args[3][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err)
+		}
+		m5, err := strconv.Atoi(args[4][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err)
+		}
+		val := (m4 & 0x7)
+		if m5 == 0 {
+			if val >= 0 && val < 4 {
+				op = op + vectorSize[val]
+				args[0], args[1], args[2] = args[1], args[2], args[0]
+				args = args[:3]
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with mask(m4) value: %v \n", op, m4)
+			}
+		} else if m5 == 1 {
+			if val >= 0 && val < 4 {
+				op = op + vectorCS[val]
+				args[0], args[1], args[2] = args[1], args[2], args[0]
+				args = args[:3]
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with mask(m4) value: %v \n", op, m4)
+			}
+		} else {
+			return fmt.Sprintf("specification exception is recognized for %q with mask(m5) value: %v \n", op, m5)
+		}
+	case VFMA, VFMS, VMSL: //Mnemonic V1, V2, V3, V4, M5, M6
+		m5, err := strconv.Atoi(args[4][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err)
+		}
+		m6, err := strconv.Atoi(args[5][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err)
+		}
+		switch inst.Op {
+		case VMSL:
+			if m5 == 3 && m6 == 8 {
+				op = op + "EG"
+			} else if m5 == 3 && m6 == 4 {
+				op = op + "OG"
+			} else if m5 == 3 && m6 == 12 {
+				op = op + "EOG"
+			} else if m5 == 3 {
+				op = op + "G"
+			}
+		default:
+			if m5 == 0 && m6 == 3 {
+				op = op + "DB"
+			} else if m5 == 8 && m6 == 3 {
+				op = "W" + op[1:] + "DB"
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with m5: %v m6: %v \n", op, m5, m6)
+			}
+		}
+		args[0], args[1], args[2], args[3] = args[1], args[2], args[3], args[0]
+		args = args[:4]
+
+	case VFCE, VFCH, VFCHE: //Mnemonic V1,V2,V3,M4,M5,M6
+		m4, err := strconv.Atoi(args[3][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err)
+		}
+		m5, err := strconv.Atoi(args[4][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err)
+		}
+		m6, err := strconv.Atoi(args[5][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err)
+		}
+		if m5 == 0 {
+			if m4 == 3 && m6 == 0 {
+				op = op + "DB"
+			} else if m4 == 3 && m6 == 1 {
+				op = op + "DBS"
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with m4: %v, m6: %v \n", op, m4, m6)
+			}
+
+		} else if m5 == 8 {
+			if m4 == 3 && m6 == 0 {
+				op = "W" + op[1:] + "DB"
+			} else if m4 == 3 && m6 == 1 {
+				op = "W" + op[1:] + "DBS"
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with m4: %v, m6: %v \n", op, m4, m6)
+			}
+		} else {
+			return fmt.Sprintf("specification exception is recognized for %q with m5: %v \n", op, m5)
+		}
+		args[0], args[1], args[2] = args[1], args[2], args[0]
+		args = args[:3]
+
+	case VFTCI:
+		m4, err := strconv.Atoi(args[3][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err)
+		}
+		m5, err := strconv.Atoi(args[4][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: %q error in converting Atoi:%s", op, err)
+		}
+		val := (m4 & 0x7)
+		if m5 == 0 {
+			switch val {
+			case 2:
+				op = op + "SB"
+			case 3:
+				op = op + "DB"
+			default:
+				return fmt.Sprintf("specification exception is recognized for %q with mask(m4) value: %v \n", op, m4)
+			}
+		} else if m5 == 8 {
+			switch val {
+			case 2:
+				op = "W" + op[1:] + "SB"
+			case 3:
+				op = "W" + op[1:] + "DB"
+			case 4:
+				op = "W" + op[1:] + "XB"
+			default:
+				return fmt.Sprintf("specification exception is recognized for %q with mask(m4) value: %v \n", op, m4)
+			}
+		} else {
+			return fmt.Sprintf("specification exception is recognized for %q with mask(m5) value: %v \n", op, m5)
+		}
+		args[0], args[1], args[2] = args[2], args[1], args[0]
+		args = args[:3]
+	case VAC, VACCC:
+		mask, err := strconv.Atoi(args[4][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		if mask&0x04 == 0 {
+			return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask)
+		}
+		op = op + "Q"
+		args[0], args[1], args[2], args[3] = args[1], args[2], args[3], args[0]
+		args = args[:4]
+	case VL, VLREP:
+		switch inst.Op {
+		case VL:
+			args[0], args[1] = args[1], args[0]
+		case VLREP:
+			args[0], args[1] = args[1], args[0]
+			mask, err := strconv.Atoi(args[2][1:])
+			if err != nil {
+				return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+			}
+			if mask >= 0 && mask < 4 {
+				op = op + vectorSize[mask]
+			}
+		}
+		args = args[:2]
+	case VST, VSTEB, VSTEH, VSTEF, VSTEG, VLEB, VLEH, VLEF, VLEG: //Mnemonic V1, D2(X2,B2), M3
+		m, err := strconv.Atoi(args[2][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		switch inst.Op {
+		case VST:
+			if m == 0 || (m > 2 && m < 5) {
+				args = args[:2]
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, m)
+			}
+		case VLEB, VLEH, VLEF, VLEG:
+			args[0], args[2] = args[2], args[0]
+		default:
+			args[0], args[1], args[2] = args[2], args[0], args[1]
+		}
+	case VSTM, VSTL, VESL, VESRA, VLM, VERLL, VLVG: //Mnemonic V1, V3, D2(B2)[,M4] or V1, R3,D2(B2)
+		switch inst.Op {
+		case VSTM, VLM:
+			m, err := strconv.Atoi(args[3][1:])
+			if err != nil {
+				return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+			}
+			if !(m == 0 || (m > 2 && m < 5)) {
+				return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, m)
+			}
+			if inst.Op == VLM {
+				args[0], args[1], args[2] = args[2], args[0], args[1]
+			}
+			args = args[:3]
+		case VESL, VESRA, VERLL, VLVG:
+			m, err := strconv.Atoi(args[3][1:])
+			if err != nil {
+				return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+			}
+			if m >= 0 && m < 4 {
+				op = op + vectorSize[m]
+			} else {
+				return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, m)
+			}
+			switch inst.Op {
+			case VLVG:
+				args[0], args[2] = args[2], args[0]
+				args = args[:3]
+			default:
+				if args[0] == args[1] {
+					args[0], args[1] = args[2], args[1]
+					args = args[:2]
+					break
+				}
+				args[0], args[2] = args[2], args[0]
+				args = args[:3]
+			}
+		case VSTL:
+			args[0], args[1] = args[1], args[0]
+			args = args[:3]
+		}
+	case VGBM:
+		val, err := strconv.Atoi(args[1][1:])
+		if err != nil {
+			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
+		}
+		if val == 0 {
+			op = "VZERO"
+			args = args[:1]
+		} else if val == 0xffff {
+			op = "VONE"
+			args = args[:1]
+		} else {
+			args[0], args[1] = args[1], args[0]
+			args = args[:2]
+		}
+	case VN, VNC, VO, VX, VNO: //mnemonic V1, V2, V3
+		if args[0] == args[2] {
+			args = args[:2]
+			args[0], args[1] = args[1], args[0]
+		} else {
+			args[0], args[1], args[2] = args[1], args[2], args[0]
+		}
+		if op == "VNO" {
+			op = op + "T"
+		}
+	case VGEG, VGEF, VSCEG, VSCEF: //Mnemonic V1, D2(V2, B2), M3
+		args[0], args[2] = args[2], args[0]
+
+	}
+	if args != nil {
+		op += " " + strings.Join(args, ", ")
+	}
+
+	return op
+}
+
+// This function returns corresponding extended mnemonic for the given
+// branch on relative mnemonic.
+func branch_relative_op(mask int, opconst Op) (op string, check bool) {
+	switch mask & 0xf {
+	case 2:
+		op = "BGT"
+		check = true
+	case 4:
+		op = "BLT"
+		check = true
+	case 5:
+		op = "BLTU"
+		check = true
+	case 7:
+		op = "BNE"
+		check = true
+	case 8:
+		op = "BEQ"
+		check = true
+	case 10:
+		op = "BGE"
+		check = true
+	case 12:
+		op = "BLE"
+		check = true
+	case 13:
+		op = "BLEU"
+		check = true
+	case 15:
+		op = "JMP" // BR
+		check = true
+	}
+	return op, check
+}
+
+// This function returns corresponding extended mnemonic for the given
+// brach on condition mnemonic.
+func branchOnConditionOp(mask int, opconst Op) (op string, check bool) {
+	switch mask & 0xf {
+	case 0:
+		op = "NOPH"
+	case 14:
+		op = "SYNC"
+	case 15:
+		op = "JMP"
+		check = true
+	}
+	return op, check
+}
+
+// This function returns corresponding plan9 mnemonic for the native bitwise mnemonic.
+func bitwise_op(op Op) string {
+	var ret string
+	switch op {
+	case NGR, NGRK, NILL:
+		ret = "AND"
+	case NR, NRK, NILH, NILF:
+		ret = "ANDW"
+	case OGR, OGRK, OILF:
+		ret = "OR"
+	case OR, ORK, OILH, OILL:
+		ret = "ORW"
+	case XGR, XGRK, XILF:
+		ret = "XOR"
+	case XR, XRK:
+		ret = "XORW"
+	}
+	return ret
+}
+
+// This function parses memory operand of type D(B)
+func mem_operand(args []string) string {
+	if args[0] != "" && args[1] != "" {
+		args[0] = fmt.Sprintf("%s(%s)", args[0], args[1])
+	} else if args[0] != "" {
+		args[0] = fmt.Sprintf("$%s", args[0])
+	} else if args[1] != "" {
+		args[0] = fmt.Sprintf("(%s)", args[1])
+	} else {
+		args[0] = ""
+	}
+	return args[0]
+}
+
+// This function parses memory operand of type D(X,B)
+func mem_operandx(args []string) string {
+	if args[1] != "" && args[2] != "" {
+		args[1] = fmt.Sprintf("(%s)(%s*1)", args[2], args[1])
+	} else if args[1] != "" {
+		args[1] = fmt.Sprintf("(%s)", args[1])
+	} else if args[2] != "" {
+		args[1] = fmt.Sprintf("(%s)", args[2])
+	} else if args[0] != "" {
+		args[1] = ""
+	}
+	if args[0] != "" && args[1] != "" {
+		args[0] = fmt.Sprintf("%s%s", args[0], args[1])
+	} else if args[0] != "" {
+		args[0] = fmt.Sprintf("$%s", args[0])
+	} else if args[1] != "" {
+		args[0] = fmt.Sprintf("%s", args[1])
+	} else {
+		args[0] = ""
+	}
+	return args[0]
+}
+
+// This function parses memory operand of type D(V,B)
+func mem_operandv(args []string) string {
+	if args[1] != "" && args[2] != "" {
+		args[1] = fmt.Sprintf("(%s)(%s*1)", args[2], args[1])
+	} else if args[1] != "" {
+		args[1] = fmt.Sprintf("(%s*1)", args[1])
+	} else if args[2] != "" {
+		args[1] = fmt.Sprintf("(%s)", args[2])
+	} else if args[0] != "" {
+		args[1] = ""
+	}
+	if args[0] != "" && args[1] != "" {
+		args[0] = fmt.Sprintf("%s%s", args[0], args[1])
+	} else if args[0] != "" {
+		args[0] = fmt.Sprintf("$%s", args[0])
+	} else if args[1] != "" {
+		args[0] = fmt.Sprintf("%s", args[1])
+	} else {
+		args[0] = ""
+	}
+	return args[0]
+}
+
+// This function parses memory operand of type D(L,B)
+func mem_operandl(args []string) (string, string) {
+	if args[0] != "" && args[2] != "" {
+		args[0] = fmt.Sprintf("%s(%s)", args[0], args[2])
+	} else if args[2] != "" {
+		args[0] = fmt.Sprintf("(%s)", args[2])
+	} else {
+		args[0] = fmt.Sprintf("%s", args[0])
+	}
+	return args[0], args[1]
+}
+
+// plan9Arg formats arg (which is the argIndex's arg in inst) according to Plan 9 rules.
+// NOTE: because Plan9Syntax is the only caller of this func, and it receives a copy
+// of inst, it's ok to modify inst.Args here.
+func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg Arg) string {
+	switch arg.(type) {
+	case Reg:
+		if arg == R13 {
+			return "g"
+		}
+		return strings.ToUpper(arg.String(pc)[1:])
+	case Base:
+		if arg == R13 {
+			return "g"
+		}
+		s := arg.String(pc)
+		if s != "" {
+			return strings.ToUpper(s[1 : len(s)-1])
+		}
+		return "R0"
+	case Index:
+		if arg == R13 {
+			return "g"
+		}
+		s := arg.String(pc)
+		if s != "" {
+			return strings.ToUpper(s[1:])
+		}
+		return ""
+	case VReg:
+		return strings.ToUpper(arg.String(pc)[1:])
+	case Disp20, Disp12:
+		numstr := arg.String(pc)
+		num, err := strconv.Atoi(numstr[:len(numstr)])
+		if err != nil {
+			return fmt.Sprintf("plan9Arg: error in converting Atoi:%s", err)
+		}
+		if num == 0 {
+			return ""
+		} else {
+			return strconv.Itoa(num)
+		}
+	case RegIm12, RegIm16, RegIm24, RegIm32:
+		addr, err := strconv.ParseUint(arg.String(pc)[2:], 16, 64)
+		if err != nil {
+			return fmt.Sprintf("plan9Arg: error in converting ParseUint:%s", err)
+		}
+		off := int(addr - pc)
+		s, base := symname(addr)
+		if s != "" && addr == base {
+			return fmt.Sprintf("%s(SB)", s)
+		}
+		off = off / inst.Len
+		return fmt.Sprintf("%v(PC)", off)
+	case Imm, Sign8, Sign16, Sign32:
+		numImm := arg.String(pc)
+		switch arg.(type) {
+		case Sign32, Sign16, Imm:
+			num, err := strconv.ParseInt(numImm, 10, 64)
+			if err != nil {
+				return fmt.Sprintf("plan9Arg: error in converting ParseInt:%s", err)
+			}
+			switch inst.Op {
+			case LLIHF:
+				num = num << 32
+			case LLILH:
+				num = num << 16
+			case NILH:
+				num = (num << 16) | int64(0xFFFF)
+			case OILH:
+				num = num << 16
+			}
+			numImm = fmt.Sprintf("%d", num)
+		}
+		return fmt.Sprintf("$%s", numImm)
+	case Mask, Len:
+		num := arg.String(pc)
+		return fmt.Sprintf("$%s", num)
+	}
+	return fmt.Sprintf("???(%v)", arg)
+}
+
+// It checks any 2 args of given instructions to swap or not
+func reverseOperandOrder(op Op) bool {
+	switch op {
+	case LOCR, MLGR:
+		return true
+	case LTEBR, LTDBR:
+		return true
+	case VLEIB, VLEIH, VLEIF, VLEIG, VPDI:
+		return true
+	case VSLDB:
+		return true
+	}
+	return false
+}
+
+// It checks whether to reverse all the args of given mnemonic or not
+func reverseAllOperands(op Op) bool {
+	switch op {
+	case VLVGP: //3-operand list
+		return true
+	case VSEL, VPERM: //4-Operand list
+		return true
+	}
+	return false
+}
diff --git a/s390x/s390xasm/testdata/decode.txt b/s390x/s390xasm/testdata/decode.txt
new file mode 100644
index 00000000..f04715b2
--- /dev/null
+++ b/s390x/s390xasm/testdata/decode.txt
@@ -0,0 +1,162 @@
+    b9040021|	plan9	MOVD R1, R2
+    b9140043|	plan9	MOVW R3, R4
+    b9070065|	plan9	MOVH R5, R6
+    b9060087|	plan9	MOVB R7, R8
+    b9160021|	plan9	MOVWZ R1, R2
+    b9850032|	plan9	MOVHZ R2, R3
+    b9840054|	plan9	MOVBZ R4, R5
+    b90f0021|	plan9	MOVDBR R1, R2
+    b91f0043|	plan9	MOVWBR R3, R4
+    b9e28010|	plan9	MOVDEQ R0, R1
+    b9e2a032|	plan9	MOVDGE R2, R3
+    b9e22054|	plan9	MOVDGT R4, R5
+    b9e2c076|	plan9	MOVDLE R6, R7
+    b9e24098|	plan9	MOVDLT R8, R9
+    b9e270ba|	plan9	MOVDNE R10, R11
+    b9f23012|	plan9	LOCR $3, R2, R1
+    b9e27065|	plan9	MOVDNE R5, R6
+e310f0000004|	plan9	MOVD (R15), R1
+e320f0000014|	plan9	MOVW (R15), R2
+e330f0000015|	plan9	MOVH (R15), R3
+e340f0000077|	plan9	MOVB (R15), R4
+e350f0000016|	plan9	MOVWZ (R15), R5
+e360f0000091|	plan9	MOVHZ (R15), R6
+e370f0000090|	plan9	MOVBZ (R15), R7
+e380f000000f|	plan9	MOVDBR (R15), R8
+e390f000001e|	plan9	MOVWBR (R15), R9
+e310fff8ff24|	plan9	MOVD R1, -8(R15)
+e320fff8ff50|	plan9	MOVW R2, -8(R15)
+e330fff8ff70|	plan9	MOVH R3, -8(R15)
+e340fff8ff72|	plan9	MOVB R4, -8(R15)
+e350fff8ff2f|	plan9	MOVDBR R5, -8(R15)
+e360fff8ff3e|	plan9	MOVWBR R6, -8(R15)
+c01efffffffe|	plan9	MOVD $-8589934592, R1
+c021fffe0000|	plan9	MOVW $-131072, R2
+    a739fe00|	plan9	MOVH $-512, R3
+    a749ffff|	plan9	MOVB $-1, R4
+    b9e81022|	plan9	ADD R1, R2
+    b9e81032|	plan9	ADD R1, R2, R3
+    a71b2000|	plan9	ADD $8192, R1
+ec21200000d9|	plan9	ADD $8192, R1, R2
+c21800008000|	plan9	ADD $32768, R1
+    b9ea1022|	plan9	ADDC R1, R2
+    b9ea1032|	plan9	ADDC R1, R2, R3
+ec21000100db|	plan9	ADDC $1, R1, R2
+ec21ffff00db|	plan9	SUBC $1, R1, R2
+        1a21|	plan9	ADDW R1, R2
+    b9f81032|	plan9	ADDW R1, R2, R3
+    a71a2000|	plan9	ADDW $8192, R1
+ec21200000d8|	plan9	ADDW $8192, R1, R2
+    b9880021|	plan9	ADDE R1, R2
+e3201000000a|	plan9	ADDC (R1), R2
+    5a605000|	plan9	ADDW (R5), R6
+    5a807fff|	plan9	ADDW 4095(R7), R8
+e3201fffff5a|	plan9	ADDW -1(R1), R2
+e34030000188|	plan9	ADDE 4096(R3), R4
+e34230000188|	plan9	ADDE 4096(R3)(R2*1), R4
+    b9090043|	plan9	SUB R3, R4
+    b9e93054|	plan9	SUB R3, R4, R5
+    a73be000|	plan9	SUB $8192, R3
+ec43e00000d9|	plan9	SUB $8192, R3, R4
+    b90b0021|	plan9	SUBC R1, R2
+ec43ffff00db|	plan9	SUBC $1, R3, R4
+    b9eb2043|	plan9	SUBC R2, R3, R4
+        1b43|	plan9	SUBW R3, R4
+    b9f93054|	plan9	SUBW R3, R4, R5
+c21500002000|	plan9	SUBW $8192, R1
+e320400f0089|	plan9	SUBE 15(R4), R2
+e32040080009|	plan9	SUB 8(R4), R2
+    5b204000|	plan9	SUBW (R4), R2
+e3204fffff5b|	plan9	SUBW -1(R4), R2
+    b91c0076|	plan9	MULLW R6, R7
+    a76c2000|	plan9	MULLW $8192, R6
+c2810000000f|	plan9	MULLW $15, R8
+c281ffff7fff|	plan9	MULLW $-32769, R8
+c21080000000|	plan9	MULLD $-2147483648, R1
+    b9860021|	plan9	MLGR R1, R2
+    b9030011|	plan9	NEG R1
+    b9030021|	plan9	NEG R1, R2
+    b9130011|	plan9	NEGW R1
+    b9130021|	plan9	NEGW R1, R2
+    b9830022|	plan9	FLOGR R2, R2
+    b9800021|	plan9	AND R1, R2
+    b9e42031|	plan9	AND R1, R2, R3
+    a517ffff|	plan9	ANDW $-1, R1
+c01bffff0000|	plan9	AND $-65536, R1
+        1421|	plan9	ANDW R1, R2
+    b9f42031|	plan9	ANDW R1, R2, R3
+c01b00000001|	plan9	ANDW $1, R1
+    a5160001|	plan9	ANDW $131071, R1
+c01b00010000|	plan9	ANDW $65536, R1
+    a517fffe|	plan9	ANDW $-2, R1
+    a517000f|	plan9	AND $15, R1
+e32010000080|	plan9	AND (R1), R2
+    54201000|	plan9	ANDW (R1), R2
+e32010000154|	plan9	ANDW 4096(R1), R2
+    b9810021|	plan9	OR R1, R2
+    b9e62031|	plan9	OR R1, R2, R3
+    a51a0001|	plan9	ORW $65536, R1
+    a51bffff|	plan9	ORW $-1, R1
+    a51b0001|	plan9	ORW $1, R1
+        1621|	plan9	ORW R1, R2
+c01d0001ffff|	plan9	OR $131071, R1
+c01dffffffff|	plan9	ORW $-1, R1
+    b9f62031|	plan9	ORW R1, R2, R3
+e32010000081|	plan9	OR (R1), R2
+    56201000|	plan9	ORW (R1), R2
+e3201fffff56|	plan9	ORW -1(R1), R2
+    b9820021|	plan9	XOR R1, R2
+    b9e72031|	plan9	XOR R1, R2, R3
+c01700000001|	plan9	XOR $1, R1
+c0170001ffff|	plan9	XOR $131071, R1
+c01700010000|	plan9	XOR $65536, R1
+        1721|	plan9	XORW R1, R2
+    b9f72031|	plan9	XORW R1, R2, R3
+c017fffffffe|	plan9	XORW $-2, R1
+        0700|	plan9	NOPH
+        07e0|	plan9	SYNC
+    b92e0024|	plan9	KM R2, R4
+    b92f0026|	plan9	KMC R2, R6
+    b93f0028|	plan9	KLMD R2, R8
+    b93e0004|	plan9	KIMD R0, R4
+    b93a0008|	plan9	KDSA R0, R8
+    b9296024|	plan9	KMA R2, R6, R4
+    b92d6024|	plan9	KMCTR R2, R6, R4
+e743400000f3|	plan9	VAB V3, V4
+e743600000f3|	plan9	VAB V3, V6, V4
+e743400010f3|	plan9	VAH V3, V4
+e743600010f3|	plan9	VAH V3, V6, V4
+e743400020f3|	plan9	VAF V3, V4
+e743600020f3|	plan9	VAF V3, V6, V4
+e743400030f3|	plan9	VAG V3, V4
+e743600030f3|	plan9	VAG V3, V6, V4
+e743400040f3|	plan9	VAQ V3, V4
+e743600040f3|	plan9	VAQ V3, V6, V4
+e734600000f7|	plan9	VSB V6, V4, V3
+e722100000f7|	plan9	VSB V1, V2
+e734600010f7|	plan9	VSH V6, V4, V3
+e722100010f7|	plan9	VSH V1, V2
+e734600020f7|	plan9	VSF V6, V4, V3
+e722100020f7|	plan9	VSF V1, V2
+e734600030f7|	plan9	VSG V6, V4, V3
+e722100030f7|	plan9	VSG V1, V2
+e734600040f7|	plan9	VSQ V6, V4, V3
+e722100040f7|	plan9	VSQ V1, V2
+e7824000608a|	plan9	VSTRCB V2, V4, V6, V8
+e7824100608a|	plan9	VSTRCH V2, V4, V6, V8
+e7824200608a|	plan9	VSTRCF V2, V4, V6, V8
+e7824010608a|	plan9	VSTRCBS V2, V4, V6, V8
+e7824110608a|	plan9	VSTRCHS V2, V4, V6, V8
+e7824210608a|	plan9	VSTRCFS V2, V4, V6, V8
+e710ffff0044|	plan9	VONE V1
+e70000000844|	plan9	VZERO V16
+e70210000068|	plan9	VN V2, V1, V0
+e71010000468|	plan9	VN V16, V1
+e70210000069|	plan9	VNC V2, V1, V0
+e71010000469|	plan9	VNC V16, V1
+e7021000006a|	plan9	VO V2, V1, V0
+e7101000046a|	plan9	VO V16, V1
+e7021000006d|	plan9	VX V2, V1, V0
+e7101000046d|	plan9	VX V16, V1
+e7101000046b|	plan9	VNOT V16, V1
+e78340000062|	plan9	VLVGP R3, R4, V8

From ec82f99f1e535a0f28111fed3c35e456e2dad2c5 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <iant@golang.org>
Date: Tue, 5 Nov 2024 11:29:31 -0800
Subject: [PATCH 035/200] README: mention the git repo

Change-Id: Ib185a246393d19c57c5a42d0c59e92b0cb4a724d
Reviewed-on: https://go-review.googlesource.com/c/arch/+/625655
Commit-Queue: Ian Lance Taylor <iant@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
---
 README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d2351348..9995b5e9 100644
--- a/README.md
+++ b/README.md
@@ -8,8 +8,10 @@ The parts needed in the main Go repository are copied in.
 ## Report Issues / Send Patches
 
 This repository uses Gerrit for code changes. To learn how to submit changes to
-this repository, see https://golang.org/doc/contribute.html.
+this repository, see https://go.dev/doc/contribute.
+
+The git repository is https://go.googlesource.com/arch.
 
 The main issue tracker for the arch repository is located at
-https://github.com/golang/go/issues. Prefix your issue with "x/arch:" in the
+https://go.dev/issues. Prefix your issue with "x/arch:" in the
 subject line, so it is easy to find.

From f977c2e4e3f4a03cfac4ffe5a928d04a3e933b64 Mon Sep 17 00:00:00 2001
From: Srinivas Pokala <Pokala.Srinivas@ibm.com>
Date: Wed, 6 Nov 2024 09:31:05 +0100
Subject: [PATCH 036/200] s390x/s390xasm: fix self-assignment error

It removes all the argument self-assignments.

Change-Id: Ifabd0629a205211d5aaf8fc00847b70806e0ddc8
Reviewed-on: https://go-review.googlesource.com/c/arch/+/625875
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Vishwanatha HD <vishwanatha.hd@ibm.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
---
 s390x/s390xasm/plan9.go | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/s390x/s390xasm/plan9.go b/s390x/s390xasm/plan9.go
index b4df0b89..95464294 100644
--- a/s390x/s390xasm/plan9.go
+++ b/s390x/s390xasm/plan9.go
@@ -244,7 +244,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 			args[0], args[1] = args[2], args[0]
 			args = args[:2]
 		} else {
-			args[0], args[1], args[2] = args[2], args[1], args[0]
+			args[0], args[2] = args[2], args[0]
 		}
 	case AGHIK, AHIK, ALGHSIK:
 		num, err := strconv.ParseInt(args[2][1:], 10, 32)
@@ -269,7 +269,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 				op = "ADDC"
 			}
 		}
-		args[0], args[1], args[2] = args[2], args[1], args[0]
+		args[0], args[2] = args[2], args[0]
 	case AGHI, AHI, AGFI, AFI, AR, ALCGR:
 		num, err := strconv.ParseInt(args[1][1:], 10, 32)
 		if err != nil {
@@ -344,7 +344,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 			args[0], args[1] = args[2], args[0]
 			args = args[:2]
 		} else {
-			args[0], args[1], args[2] = args[2], args[1], args[0]
+			args[0], args[2] = args[2], args[0]
 		}
 	case SLBGR:
 		op = "SUBE"
@@ -399,7 +399,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		case SRAG:
 			op = "SRAD"
 		}
-		args[0], args[1], args[2] = args[2], args[1], args[0]
+		args[0], args[2] = args[2], args[0]
 	case TRAP2, SVC:
 		op = "SYSALL"
 	case CR, CLR, CGR, CLGR, KDBR, CDBR, CEBR, CGHI, CHI, CGFI, CLGFI, CFI, CLFI:
@@ -481,7 +481,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 			args = args[:3]
 		}
 	case CLRJ, CRJ, CIJ, CLIJ:
-		args[0], args[1], args[2], args[3] = args[2], args[0], args[1], args[3]
+		args[0], args[1], args[2] = args[2], args[0], args[1]
 	case BRC, BRCL:
 		mask, err := strconv.Atoi(args[0][1:])
 		if err != nil {
@@ -541,7 +541,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 			args[0], args[1] = args[1], args[0]
 			args = args[:2]
 		} else {
-			args[0], args[1], args[2] = args[2], args[1], args[0]
+			args[0], args[2] = args[2], args[0]
 		}
 	case BRASL:
 		op = "CALL" // BL
@@ -679,7 +679,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		case VREP, VERLLV, VESLV:
 			if val >= 0 && val < 4 {
 				op = op + vectorSize[val]
-				args[0], args[1], args[2] = args[2], args[1], args[0]
+				args[0], args[2] = args[2], args[0]
 				args = args[:3]
 			} else {
 				return fmt.Sprintf("specification exception is recognized for %q with mask value: %v \n", op, mask)
@@ -910,7 +910,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		} else {
 			return fmt.Sprintf("specification exception is recognized for %q with mask(m5) value: %v \n", op, m5)
 		}
-		args[0], args[1], args[2] = args[2], args[1], args[0]
+		args[0], args[2] = args[2], args[0]
 		args = args[:3]
 	case VAC, VACCC:
 		mask, err := strconv.Atoi(args[4][1:])
@@ -985,7 +985,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 				args = args[:3]
 			default:
 				if args[0] == args[1] {
-					args[0], args[1] = args[2], args[1]
+					args[0] = args[2]
 					args = args[:2]
 					break
 				}

From f23035dd65d1c4767490f13c554b34562902d91f Mon Sep 17 00:00:00 2001
From: Lin Runze <lrzlin9@gmail.com>
Date: Sun, 4 Aug 2024 19:23:00 +0800
Subject: [PATCH 037/200] riscv64: add tests for riscv64asm

Add validation tests for riscv64asm GNU/Plan9 decoder, including
objdump test and external test.

Change-Id: Id7442704ea7e10c22ca4a799cdfc9f7d043f85c3
Reviewed-on: https://go-review.googlesource.com/c/arch/+/602916
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 riscv64/riscv64asm/decode_test.go          |  80 +++++
 riscv64/riscv64asm/ext_test.go             | 330 +++++++++++++++++
 riscv64/riscv64asm/objdump_test.go         |  86 +++++
 riscv64/riscv64asm/objdumpext_test.go      | 299 ++++++++++++++++
 riscv64/riscv64asm/testdata/Makefile       |   9 +
 riscv64/riscv64asm/testdata/gnucases.txt   | 390 +++++++++++++++++++++
 riscv64/riscv64asm/testdata/plan9cases.txt | 336 ++++++++++++++++++
 7 files changed, 1530 insertions(+)
 create mode 100644 riscv64/riscv64asm/decode_test.go
 create mode 100644 riscv64/riscv64asm/ext_test.go
 create mode 100644 riscv64/riscv64asm/objdump_test.go
 create mode 100644 riscv64/riscv64asm/objdumpext_test.go
 create mode 100644 riscv64/riscv64asm/testdata/Makefile
 create mode 100644 riscv64/riscv64asm/testdata/gnucases.txt
 create mode 100644 riscv64/riscv64asm/testdata/plan9cases.txt

diff --git a/riscv64/riscv64asm/decode_test.go b/riscv64/riscv64asm/decode_test.go
new file mode 100644
index 00000000..1590aaac
--- /dev/null
+++ b/riscv64/riscv64asm/decode_test.go
@@ -0,0 +1,80 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv64asm
+
+import (
+	"bufio"
+	"encoding/hex"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func testDecode(t *testing.T, syntax string) {
+	input := filepath.Join("testdata", syntax+"cases.txt")
+	f, err := os.Open(input)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.SplitN(line, "\t", 2)
+		i := strings.Index(f[0], "|")
+
+		if i < 0 {
+			t.Errorf("parsing %q: missing | separator", f[0])
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("parsing %q: misaligned | separator", f[0])
+		}
+		code, err := hex.DecodeString(f[0][:i] + f[0][i+1:])
+		if err != nil {
+			t.Errorf("parsing %q: %v", f[0], err)
+			continue
+		}
+		asm0 := strings.Replace(f[1], "	", " ", -1)
+		asm := strings.TrimSpace(asm0)
+		inst, decodeErr := Decode(code)
+		if decodeErr != nil && decodeErr != errUnknown {
+			if asm == "illegalins" && decodeErr == errShort {
+				continue
+			}
+			// Some rarely used system instructions are not supported
+			// Following logicals will filter such unknown instructions
+			t.Errorf("parsing %x: %s", code, decodeErr)
+			continue
+		}
+
+		var out string
+		switch syntax {
+		case "gnu":
+			out = GNUSyntax(inst)
+		case "plan9":
+			out = GoSyntax(inst, 0, nil, nil)
+		default:
+			t.Errorf("unknown syntax %q", syntax)
+			continue
+		}
+
+		if asm != out {
+			t.Errorf("Decode(%s) [%s] = %s want %s", f[0], syntax, out, asm)
+		}
+	}
+}
+
+func TestDecodeGNUSyntax(t *testing.T) {
+	testDecode(t, "gnu")
+}
+
+func TestDecodeGoSyntax(t *testing.T) {
+	testDecode(t, "plan9")
+}
diff --git a/riscv64/riscv64asm/ext_test.go b/riscv64/riscv64asm/ext_test.go
new file mode 100644
index 00000000..fa6961f2
--- /dev/null
+++ b/riscv64/riscv64asm/ext_test.go
@@ -0,0 +1,330 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Support for testing against external disassembler program.
+
+package riscv64asm
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/hex"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"math/rand"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+var (
+	dumpTest = flag.Bool("dump", false, "dump all encodings")
+	mismatch = flag.Bool("mismatch", false, "log allowed mismatches")
+	keep     = flag.Bool("keep", false, "keep object files around")
+	debug    = false
+)
+
+// An ExtInst represents a single decoded instruction parsed
+// from an external disassembler's output.
+type ExtInst struct {
+	addr uint64
+	enc  [4]byte
+	nenc int
+	text string
+}
+
+func (r ExtInst) String() string {
+	return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
+}
+
+// An ExtDis is a connection between an external disassembler and a test.
+type ExtDis struct {
+	Dec  chan ExtInst
+	File *os.File
+	Size int
+	Cmd  *exec.Cmd
+}
+
+// Run runs the given command - the external disassembler - and returns
+// a buffered reader of its standard output.
+func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
+	if *keep {
+		log.Printf("%s\n", strings.Join(cmd, " "))
+	}
+	ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
+	out, err := ext.Cmd.StdoutPipe()
+	if err != nil {
+		return nil, fmt.Errorf("stdoutpipe: %v", err)
+	}
+	if err := ext.Cmd.Start(); err != nil {
+		return nil, fmt.Errorf("exec: %v", err)
+	}
+
+	b := bufio.NewReaderSize(out, 1<<20)
+	return b, nil
+}
+
+// Wait waits for the command started with Run to exit.
+func (ext *ExtDis) Wait() error {
+	return ext.Cmd.Wait()
+}
+
+// testExtDis tests a set of byte sequences against an external disassembler.
+// The disassembler is expected to produce the given syntax and run
+// in the given architecture mode (16, 32, or 64-bit).
+// The extdis function must start the external disassembler
+// and then parse its output, sending the parsed instructions on ext.Dec.
+// The generate function calls its argument f once for each byte sequence
+// to be tested. The generate function itself will be called twice, and it must
+// make the same sequence of calls to f each time.
+// When a disassembly does not match the internal decoding,
+// allowedMismatch determines whether this mismatch should be
+// allowed, or else considered an error.
+func testExtDis(
+	t *testing.T,
+	syntax string,
+	extdis func(ext *ExtDis) error,
+	generate func(f func([]byte)),
+	allowedMismatch func(text string, inst *Inst, dec ExtInst) bool,
+) {
+	start := time.Now()
+	ext := &ExtDis{
+		Dec: make(chan ExtInst),
+	}
+	errc := make(chan error)
+
+	// First pass: write instructions to input file for external disassembler.
+	file, f, size, err := writeInst(generate)
+	if err != nil {
+		t.Fatal(err)
+	}
+	ext.Size = size
+	ext.File = f
+	defer func() {
+		f.Close()
+		if !*keep {
+			os.Remove(file)
+		}
+	}()
+
+	// Second pass: compare disassembly against our decodings.
+	var (
+		totalTests  = 0
+		totalSkips  = 0
+		totalErrors = 0
+
+		errors = make([]string, 0, 100) // Sampled errors, at most cap
+	)
+	go func() {
+		errc <- extdis(ext)
+	}()
+
+	generate(func(enc []byte) {
+		dec, ok := <-ext.Dec
+		if !ok {
+			t.Errorf("decoding stream ended early")
+			return
+		}
+		inst, text := disasm(syntax, pad(enc))
+
+		totalTests++
+		if *dumpTest {
+			fmt.Printf("%x -> %s [%d]\n", enc, dec.text, dec.nenc)
+		}
+
+		if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" {
+			suffix := ""
+			if allowedMismatch(text, &inst, dec) {
+				totalSkips++
+				if !*mismatch {
+					return
+				}
+				suffix += " (allowed mismatch)"
+			}
+			totalErrors++
+			cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix)
+
+			if len(errors) >= cap(errors) {
+				j := rand.Intn(totalErrors)
+				if j >= cap(errors) {
+					return
+				}
+				errors = append(errors[:j], errors[j+1:]...)
+			}
+			errors = append(errors, cmp)
+		}
+	})
+
+	if *mismatch {
+		totalErrors -= totalSkips
+	}
+
+	fmt.Printf("totalTest: %d total skip: %d total error: %d\n", totalTests, totalSkips, totalErrors)
+	// Here are some errors about mismatches(44)
+	for _, b := range errors {
+		t.Log(b)
+	}
+
+	if totalErrors > 0 {
+		t.Fail()
+	}
+	t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
+	t.Logf("decoder coverage: %.1f%%;\n", decodeCoverage())
+}
+
+// Start address of text.
+const start = 0x8000
+
+// writeInst writes the generated byte sequences to a new file
+// starting at offset start. That file is intended to be the input to
+// the external disassembler.
+func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
+	f, err = ioutil.TempFile("", "riscv64asm")
+	if err != nil {
+		return
+	}
+
+	file = f.Name()
+
+	f.Seek(start, io.SeekStart)
+	w := bufio.NewWriter(f)
+	defer w.Flush()
+	size = 0
+	generate(func(x []byte) {
+		if debug {
+			fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
+		}
+		w.Write(x)
+		w.Write(zeros[len(x):])
+		size += len(zeros)
+	})
+	return file, f, size, nil
+}
+
+var zeros = []byte{0, 0, 0, 0}
+
+// pad pads the code sequence with pops.
+func pad(enc []byte) []byte {
+	if len(enc) < 4 {
+		enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...)
+	}
+	return enc
+}
+
+// disasm returns the decoded instruction and text
+// for the given source bytes, using the given syntax and mode.
+func disasm(syntax string, src []byte) (inst Inst, text string) {
+	var err error
+	inst, err = Decode(src)
+	if err != nil {
+		text = "error: " + err.Error()
+		return
+	}
+	text = inst.String()
+	switch syntax {
+	case "gnu":
+		text = GNUSyntax(inst)
+	case "plan9": // [sic]
+		text = GoSyntax(inst, 0, nil, nil)
+	default:
+		text = "error: unknown syntax " + syntax
+	}
+	return
+}
+
+// decodecoverage returns a floating point number denoting the
+// decoder coverage.
+func decodeCoverage() float64 {
+	n := 0
+	for _, t := range decoderCover {
+		if t {
+			n++
+		}
+	}
+	return 100 * float64(1+n) / float64(1+len(decoderCover))
+}
+
+// Helpers for writing disassembler output parsers.
+
+// isHex reports whether b is a hexadecimal character (0-9a-fA-F).
+func isHex(b byte) bool {
+	return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F')
+}
+
+// parseHex parses the hexadecimal byte dump in src,
+// appending the parsed bytes to raw and returning the updated slice.
+// The returned bool reports whether any invalid hex was found.
+// Spaces and tabs between bytes are okay but any other non-hex is not.
+func parseHex(src []byte, raw []byte) ([]byte, bool) {
+	src = bytes.TrimSpace(src)
+	raw, err := hex.AppendDecode(raw, src)
+	if err != nil {
+		return nil, false
+	}
+	return raw, true
+}
+
+// Generators.
+//
+// The test cases are described as functions that invoke a callback repeatedly,
+// with a new input sequence each time. These helpers make writing those
+// a little easier.
+
+// hexCases generates the cases written in hexadecimal in the encoded string.
+// Spaces in 'encoded' separate entire test cases, not individual bytes.
+func hexCases(t *testing.T, encoded string) func(func([]byte)) {
+	return func(try func([]byte)) {
+		for _, x := range strings.Fields(encoded) {
+			src, err := hex.DecodeString(x)
+			if err != nil {
+				t.Errorf("parsing %q: %v", x, err)
+			}
+			try(src)
+		}
+	}
+}
+
+// testdataCases generates the test cases recorded in testdata/cases.txt.
+// It only uses the inputs; it ignores the answers recorded in that file.
+func testdataCases(t *testing.T, syntax string) func(func([]byte)) {
+	var codes [][]byte
+	input := filepath.Join("testdata", syntax+"cases.txt")
+	data, err := ioutil.ReadFile(input)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, line := range strings.Split(string(data), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" || strings.HasPrefix(line, "#") {
+			continue
+		}
+		f := strings.Fields(line)[0]
+		i := strings.Index(f, "|")
+		if i < 0 {
+			t.Errorf("parsing %q: missing | separator", f)
+			continue
+		}
+		if i%2 != 0 {
+			t.Errorf("parsing %q: misaligned | separator", f)
+		}
+		code, err := hex.DecodeString(f[:i] + f[i+1:])
+		if err != nil {
+			t.Errorf("parsing %q: %v", f, err)
+			continue
+		}
+		codes = append(codes, code)
+	}
+
+	return func(try func([]byte)) {
+		for _, code := range codes {
+			try(code)
+		}
+	}
+}
diff --git a/riscv64/riscv64asm/objdump_test.go b/riscv64/riscv64asm/objdump_test.go
new file mode 100644
index 00000000..479301bb
--- /dev/null
+++ b/riscv64/riscv64asm/objdump_test.go
@@ -0,0 +1,86 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv64asm
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestObjdumpRISCV64TestDecodeGNUSyntaxdata(t *testing.T) {
+	testObjdumpRISCV64(t, testdataCases(t, "gnu"))
+}
+func TestObjdumpRISCV64TestDecodeGoSyntaxdata(t *testing.T) {
+	testObjdumpRISCV64(t, testdataCases(t, "plan9"))
+}
+
+func TestObjdumpRISCV64Manual(t *testing.T) {
+	testObjdumpRISCV64(t, hexCases(t, objdumpManualTests))
+}
+
+// objdumpManualTests holds test cases that will be run by TestObjdumpRISCV64Manual.
+// If you are debugging a few cases that turned up in a longer run, it can be useful
+// to list them here and then use -run=Manual, particularly with tracing enabled.
+// Note that these are byte sequences, so they must be reversed from the usual
+// word presentation.
+var objdumpManualTests = `
+93020300
+13000000
+9b020300
+afb5b50e
+73b012c0
+73f01fc0
+73a012c0
+73e01fc0
+f3223000
+f3221000
+f3222000
+f3123300
+f3121300
+f3122300
+739012c0
+73d01fc0
+53a01022
+53a01020
+53801022
+53801020
+53901022
+53901020
+67800000
+67800200
+b3026040
+bb026040
+9342f3ff
+f32200c0
+f32200c8
+f32220c0
+f32220c8
+f32210c0
+f32210c8
+`
+
+// allowedMismatchObjdump reports whether the mismatch between text and dec
+// should be allowed by the test.
+func allowedMismatchObjdump(text string, inst *Inst, dec ExtInst) bool {
+	// Allow the mismatch of Branch/Jump instruction's offset.
+	decsp := strings.Split(dec.text, ",")
+
+	switch inst.Op {
+	case BEQ, BGE, BGEU, BLT, BLTU, BNE:
+		if inst.Args[2].(Simm).String() != decsp[len(decsp)-1] {
+			return true
+		}
+	case JAL:
+		if inst.Args[1].(Simm).String() != decsp[len(decsp)-1] {
+			return true
+		}
+	case JALR:
+		if inst.Args[1].(RegOffset).Ofs.String() != decsp[len(decsp)-1] {
+			return true
+		}
+	}
+
+	return false
+}
diff --git a/riscv64/riscv64asm/objdumpext_test.go b/riscv64/riscv64asm/objdumpext_test.go
new file mode 100644
index 00000000..4f1f21a5
--- /dev/null
+++ b/riscv64/riscv64asm/objdumpext_test.go
@@ -0,0 +1,299 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv64asm
+
+import (
+	"bytes"
+	"debug/elf"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"os/exec"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+var objdumpPath = "riscv64-linux-gnu-objdump"
+
+func testObjdumpRISCV64(t *testing.T, generate func(func([]byte))) {
+	testObjdumpArch(t, generate)
+}
+
+func testObjdumpArch(t *testing.T, generate func(func([]byte))) {
+	checkObjdumpRISCV64(t)
+	testExtDis(t, "gnu", objdump, generate, allowedMismatchObjdump)
+	testExtDis(t, "plan9", objdump, generate, allowedMismatchObjdump)
+}
+
+func checkObjdumpRISCV64(t *testing.T) {
+	objdumpPath, err := exec.LookPath(objdumpPath)
+	if err != nil {
+		objdumpPath = "objdump"
+	}
+	out, err := exec.Command(objdumpPath, "-i").Output()
+	if err != nil {
+		t.Skipf("cannot run objdump: %v\n%s", err, out)
+	}
+	if !strings.Contains(string(out), "riscv") {
+		t.Skip("objdump does not have RISC-V support")
+	}
+}
+
+func objdump(ext *ExtDis) error {
+	// File already written with instructions; add ELF header.
+	if err := writeELF64(ext.File, ext.Size); err != nil {
+		return err
+	}
+
+	b, err := ext.Run(objdumpPath, "-M numeric", "-d", "-z", ext.File.Name())
+	if err != nil {
+		return err
+	}
+
+	var (
+		nmatch  int
+		reading bool
+		next    uint64 = start
+		addr    uint64
+		encbuf  [4]byte
+		enc     []byte
+		text    string
+	)
+	flush := func() {
+		if addr == next {
+			// PC-relative addresses are translated to absolute addresses based on PC by GNU objdump
+			// Following logical rewrites the absolute addresses back to PC-relative ones for comparing
+			// with our disassembler output which are PC-relative
+			if text == "undefined" && len(enc) == 4 {
+				text = "error: unknown instruction"
+				enc = nil
+			}
+			if len(enc) == 4 {
+				// prints as word but we want to record bytes
+				enc[0], enc[3] = enc[3], enc[0]
+				enc[1], enc[2] = enc[2], enc[1]
+			}
+			ext.Dec <- ExtInst{addr, encbuf, len(enc), text}
+			encbuf = [4]byte{}
+			enc = nil
+			next += 4
+		}
+	}
+	var textangle = []byte("<.text>:")
+	for {
+		line, err := b.ReadSlice('\n')
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("reading objdump output: %v", err)
+		}
+		if bytes.Contains(line, textangle) {
+			reading = true
+			continue
+		}
+		if !reading {
+			continue
+		}
+		if debug {
+			os.Stdout.Write(line)
+		}
+		if enc1 := parseContinuation(line, encbuf[:len(enc)]); enc1 != nil {
+			enc = enc1
+			continue
+		}
+		flush()
+		nmatch++
+		addr, enc, text = parseLine(line, encbuf[:0])
+		if addr > next {
+			return fmt.Errorf("address out of sync expected <= %#x at %q in:\n%s", next, line, line)
+		}
+	}
+	flush()
+	if next != start+uint64(ext.Size) {
+		return fmt.Errorf("not enough results found [%d %d]", next, start+ext.Size)
+	}
+	if err := ext.Wait(); err != nil {
+		return fmt.Errorf("exec: %v", err)
+	}
+
+	return nil
+}
+
+var (
+	undefined     = []byte("undefined")
+	unpredictable = []byte("unpredictable")
+	slashslash    = []byte("//")
+)
+
+func parseLine(line []byte, encstart []byte) (addr uint64, enc []byte, text string) {
+	ok := false
+	oline := line
+	i := bytes.Index(line, []byte(":\t"))
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	x, err := strconv.ParseUint(string(bytes.TrimSpace(line[:i])), 16, 32)
+	if err != nil {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	addr = uint64(x)
+	line = line[i+2:]
+	i = bytes.IndexByte(line, '\t')
+	if i < 0 {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	enc, ok = parseHex(line[:i], encstart)
+	if !ok {
+		log.Fatalf("cannot parse disassembly: %q", oline)
+	}
+	line = bytes.TrimSpace(line[i:])
+	if bytes.Contains(line, undefined) {
+		text = "undefined"
+		return
+	}
+	if false && bytes.Contains(line, unpredictable) {
+		text = "unpredictable"
+		return
+	}
+	// Strip trailing comment starting with '#'
+	if i := bytes.IndexByte(line, '#'); i >= 0 {
+		line = bytes.TrimSpace(line[:i])
+	}
+	// Strip trailing comment starting with "//"
+	if i := bytes.Index(line, slashslash); i >= 0 {
+		line = bytes.TrimSpace(line[:i])
+	}
+	text = string(fixSpace(line))
+	return
+}
+
+// fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
+// If s must be rewritten, it is rewritten in place.
+func fixSpace(s []byte) []byte {
+	s = bytes.TrimSpace(s)
+	for i := 0; i < len(s); i++ {
+		if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
+			goto Fix
+		}
+	}
+	return s
+
+Fix:
+	b := s
+	w := 0
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c == '\t' || c == '\n' {
+			c = ' '
+		}
+		if c == ' ' && w > 0 && b[w-1] == ' ' {
+			continue
+		}
+		b[w] = c
+		w++
+	}
+	if w > 0 && b[w-1] == ' ' {
+		w--
+	}
+	return b[:w]
+}
+
+func parseContinuation(line []byte, enc []byte) []byte {
+	i := bytes.Index(line, []byte(":\t"))
+	if i < 0 {
+		return nil
+	}
+	line = line[i+1:]
+	enc, _ = parseHex(line, enc)
+	return enc
+}
+
+// writeELF64 writes an ELF64 header to the file, describing a text
+// segment that starts at start (0x8000) and extends for size bytes.
+func writeELF64(f *os.File, size int) error {
+	f.Seek(0, io.SeekStart)
+	var hdr elf.Header64
+	var prog elf.Prog64
+	var sect elf.Section64
+	var buf bytes.Buffer
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	off1 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	off2 := buf.Len()
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	off3 := buf.Len()
+	buf.Reset()
+	data := byte(elf.ELFDATA2LSB)
+	hdr = elf.Header64{
+		Ident:     [16]byte{0x7F, 'E', 'L', 'F', 2, data, 1},
+		Type:      2,
+		Machine:   uint16(elf.EM_RISCV),
+		Version:   1,
+		Entry:     start,
+		Phoff:     uint64(off1),
+		Shoff:     uint64(off2),
+		Flags:     0x5,
+		Ehsize:    uint16(off1),
+		Phentsize: uint16(off2 - off1),
+		Phnum:     1,
+		Shentsize: uint16(off3 - off2),
+		Shnum:     4,
+		Shstrndx:  3,
+	}
+	binary.Write(&buf, binary.LittleEndian, &hdr)
+	prog = elf.Prog64{
+		Type:   1,
+		Off:    start,
+		Vaddr:  start,
+		Paddr:  start,
+		Filesz: uint64(size),
+		Memsz:  uint64(size),
+		Flags:  5,
+		Align:  start,
+	}
+	binary.Write(&buf, binary.LittleEndian, &prog)
+	binary.Write(&buf, binary.LittleEndian, &sect) // NULL section
+	sect = elf.Section64{
+		Name:      1,
+		Type:      uint32(elf.SHT_PROGBITS),
+		Addr:      start,
+		Off:       start,
+		Size:      uint64(size),
+		Flags:     uint64(elf.SHF_ALLOC | elf.SHF_EXECINSTR),
+		Addralign: 4,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect) // .text
+	strtabsize := len("\x00.text\x00.riscv.attributes\x00.shstrtab\x00")
+	// RISC-V objdump needs the .riscv.attributes section to identify
+	// the RV64G (not include compressed) extensions.
+	sect = elf.Section64{
+		Name:      uint32(len("\x00.text\x00")),
+		Type:      uint32(0x70000003), // SHT_RISCV_ATTRIBUTES
+		Addr:      0,
+		Off:       uint64(off2 + (off3-off2)*4 + strtabsize),
+		Size:      102,
+		Addralign: 1,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	sect = elf.Section64{
+		Name:      uint32(len("\x00.text\x00.riscv.attributes\x00")),
+		Type:      uint32(elf.SHT_STRTAB),
+		Addr:      0,
+		Off:       uint64(off2 + (off3-off2)*4),
+		Size:      uint64(strtabsize),
+		Addralign: 1,
+	}
+	binary.Write(&buf, binary.LittleEndian, &sect)
+	buf.WriteString("\x00.text\x00.riscv.attributes\x00.shstrtab\x00")
+	// Contents of .riscv.attributes section
+	// which specify the extension and priv spec version. (1.11)
+	buf.WriteString("Ae\x00\x00\x00riscv\x00\x01[\x00\x00\x00\x05rv64i2p1_m2pp_a2p1_f2p2_d2p2_q2p2_zibsr2p0_zifencei2p0_zmmul1p0_zfh1p0_zfhmin1p0\x00\x08\x01\x0a\x0b")
+	f.Write(buf.Bytes())
+	return nil
+}
diff --git a/riscv64/riscv64asm/testdata/Makefile b/riscv64/riscv64asm/testdata/Makefile
new file mode 100644
index 00000000..8ac7835b
--- /dev/null
+++ b/riscv64/riscv64asm/testdata/Makefile
@@ -0,0 +1,9 @@
+go test command:
+	cd ..; go generate
+	cd ..; go test -run 'ObjdumpRISCV64TestGUNSyntaxdata' -v -timeout 10h 2>&1 | tee -a log
+	cd ..; go test -run 'ObjdumpRISCV64TestGoSyntaxdata' -v -timeout 10h 2>&1 | tee -a log
+	cd ..; go test -run 'ObjdumpRISCV64' -v -timeout 10h 2>&1 | tee -a log
+	cd ..; go test -run 'ObjdumpRISCV64Manual' -v -timeout 10h 2>&1 | tee -a log
+	cd ..; go test -run 'TestDecodeGNUSyntax'
+	cd ..; go test -run 'TestDecodeGoSyntax'
+	cd ..; go test -run '.*'
diff --git a/riscv64/riscv64asm/testdata/gnucases.txt b/riscv64/riscv64asm/testdata/gnucases.txt
new file mode 100644
index 00000000..dad05a7e
--- /dev/null
+++ b/riscv64/riscv64asm/testdata/gnucases.txt
@@ -0,0 +1,390 @@
+b3027300|	add x5,x6,x7
+9302f3ff|	addi x5,x6,-1
+9302f37f|	addi x5,x6,2047
+1305a000|	li x10,10
+13000000|	nop
+93870900|	mv x15,x19
+9b02f37f|	addiw x5,x6,2047
+1b830a00|	sext.w x6,x21
+bb027300|	addw x5,x6,x7
+afb26300|	amoadd.d x5,x6,(x7)
+afb26304|	amoadd.d.aq x5,x6,(x7)
+afb26302|	amoadd.d.rl x5,x6,(x7)
+afa26300|	amoadd.w x5,x6,(x7)
+afa26304|	amoadd.w.aq x5,x6,(x7)
+afa26302|	amoadd.w.rl x5,x6,(x7)
+afb26360|	amoand.d x5,x6,(x7)
+afb26364|	amoand.d.aq x5,x6,(x7)
+afb26362|	amoand.d.rl x5,x6,(x7)
+afa26360|	amoand.w x5,x6,(x7)
+afa26364|	amoand.w.aq x5,x6,(x7)
+afa26362|	amoand.w.rl x5,x6,(x7)
+afb263e0|	amomaxu.d x5,x6,(x7)
+afb263e4|	amomaxu.d.aq x5,x6,(x7)
+afb263e2|	amomaxu.d.rl x5,x6,(x7)
+afa263e0|	amomaxu.w x5,x6,(x7)
+afa263e4|	amomaxu.w.aq x5,x6,(x7)
+afa263e2|	amomaxu.w.rl x5,x6,(x7)
+afb263a0|	amomax.d x5,x6,(x7)
+afb263a4|	amomax.d.aq x5,x6,(x7)
+afb263a2|	amomax.d.rl x5,x6,(x7)
+afa263a0|	amomax.w x5,x6,(x7)
+afa263a4|	amomax.w.aq x5,x6,(x7)
+afa263a2|	amomax.w.rl x5,x6,(x7)
+afb263c0|	amominu.d x5,x6,(x7)
+afb263c4|	amominu.d.aq x5,x6,(x7)
+afb263c2|	amominu.d.rl x5,x6,(x7)
+afa263c0|	amominu.w x5,x6,(x7)
+afa263c4|	amominu.w.aq x5,x6,(x7)
+afa263c2|	amominu.w.rl x5,x6,(x7)
+afb26380|	amomin.d x5,x6,(x7)
+afb26384|	amomin.d.aq x5,x6,(x7)
+afb26382|	amomin.d.rl x5,x6,(x7)
+afa26380|	amomin.w x5,x6,(x7)
+afa26384|	amomin.w.aq x5,x6,(x7)
+afa26382|	amomin.w.rl x5,x6,(x7)
+afb26340|	amoor.d x5,x6,(x7)
+afb26344|	amoor.d.aq x5,x6,(x7)
+afb26342|	amoor.d.rl x5,x6,(x7)
+afa26340|	amoor.w x5,x6,(x7)
+afa26344|	amoor.w.aq x5,x6,(x7)
+afa26342|	amoor.w.rl x5,x6,(x7)
+afb26308|	amoswap.d x5,x6,(x7)
+afb2630c|	amoswap.d.aq x5,x6,(x7)
+afb2630a|	amoswap.d.rl x5,x6,(x7)
+afa26308|	amoswap.w x5,x6,(x7)
+afa2630c|	amoswap.w.aq x5,x6,(x7)
+afa2630a|	amoswap.w.rl x5,x6,(x7)
+afb26320|	amoxor.d x5,x6,(x7)
+afb26324|	amoxor.d.aq x5,x6,(x7)
+afb26322|	amoxor.d.rl x5,x6,(x7)
+afa26320|	amoxor.w x5,x6,(x7)
+afa26324|	amoxor.w.aq x5,x6,(x7)
+afa26322|	amoxor.w.rl x5,x6,(x7)
+b3727300|	and x5,x6,x7
+9372f3ff|	andi x5,x6,-1
+9372f37f|	andi x5,x6,2047
+97020000|	auipc x5,0x0
+97028000|	auipc x5,0x800
+e38062f0|	beq x5,x6,-256
+e3de62ee|	bge x5,x6,-260
+e3fc62ee|	bgeu x5,x6,-264
+e3ca62ee|	blt x5,x6,-268
+e3e862ee|	bltu x5,x6,-272
+e39662ee|	bne x5,x6,-276
+63940200|	bnez x5,8
+63c40400|	bltz x9,8
+63447000|	bgtz x7,8
+63d40900|	bgez x19,8
+6354d001|	blez x29,8
+63040800|	beqz x16,8
+f33213c0|	csrrc x5,time,x6
+f3f21fc0|	csrrci x5,time,31
+f32213c0|	csrrs x5,time,x6
+f3e21fc0|	csrrsi x5,time,31
+f31213c0|	csrrw x5,time,x6
+f3d21fc0|	csrrwi x5,time,31
+733015c0|	csrc time,x10
+73f010c0|	csrci time,1
+73253000|	frcsr x10
+f3251000|	frflags x11
+73262000|	frrm x12
+f32400c0|	rdcycle x9
+732920c0|	rdinstret x18
+f32910c0|	rdtime x19
+f3224014|	csrr x5,sip
+73201bc0|	csrs time,x22
+736014c8|	csrsi timeh,8
+73903700|	fscsr x15
+73101800|	fsflags x16
+73902800|	fsrm x17
+731014c0|	csrw time,x8
+735016c8|	csrwi timeh,12
+b3427302|	div x5,x6,x7
+b3527302|	divu x5,x6,x7
+bb527302|	divuw x5,x6,x7
+bb427302|	divw x5,x6,x7
+73001000|	ebreak
+73000000|	ecall
+0f00f00f|	fence
+53f02002|	fadd.d f0,f1,f2
+53f02004|	fadd.h f0,f1,f2
+53f02006|	fadd.q f0,f1,f2
+53f02000|	fadd.s f0,f1,f2
+d31200e2|	fclass.d x5,f0
+d31200e4|	fclass.h x5,f0
+d31200e6|	fclass.q x5,f0
+d31200e0|	fclass.s x5,f0
+53f022d2|	fcvt.d.l f0,x5
+53f032d2|	fcvt.d.lu f0,x5
+53f03042|	fcvt.d.q f0,f1
+53800042|	fcvt.d.s f0,f1
+538002d2|	fcvt.d.w f0,x5
+538012d2|	fcvt.d.wu f0,x5
+53f022d4|	fcvt.h.l f0,x5
+53f032d4|	fcvt.h.lu f0,x5
+53f00044|	fcvt.h.s f0,f1
+53f002d4|	fcvt.h.w f0,x5
+53f012d4|	fcvt.h.wu f0,x5
+d37230c2|	fcvt.lu.d x5,f0
+d37230c4|	fcvt.lu.h x5,f0
+d37230c6|	fcvt.lu.q x5,f0
+d37230c0|	fcvt.lu.s x5,f0
+d37220c2|	fcvt.l.d x5,f0
+d37220c4|	fcvt.l.h x5,f0
+d37220c6|	fcvt.l.q x5,f0
+d37220c0|	fcvt.l.s x5,f0
+53801046|	fcvt.q.d f0,f1
+538022d6|	fcvt.q.l f0,x5
+538032d6|	fcvt.q.lu f0,x5
+53800046|	fcvt.q.s f0,f1
+538002d6|	fcvt.q.w f0,x5
+538012d6|	fcvt.q.wu f0,x5
+53f01040|	fcvt.s.d f0,f1
+53802040|	fcvt.s.h f0,f1
+53f022d0|	fcvt.s.l f0,x5
+53f032d0|	fcvt.s.lu f0,x5
+53f03040|	fcvt.s.q f0,f1
+53f002d0|	fcvt.s.w f0,x5
+53f012d0|	fcvt.s.wu f0,x5
+d37210c2|	fcvt.wu.d x5,f0
+d37210c4|	fcvt.wu.h x5,f0
+d37210c6|	fcvt.wu.q x5,f0
+d37210c0|	fcvt.wu.s x5,f0
+d37200c2|	fcvt.w.d x5,f0
+d37200c4|	fcvt.w.h x5,f0
+d37200c6|	fcvt.w.q x5,f0
+d37200c0|	fcvt.w.s x5,f0
+53f0201a|	fdiv.d f0,f1,f2
+53f0201c|	fdiv.h f0,f1,f2
+53f0201e|	fdiv.q f0,f1,f2
+53f02018|	fdiv.s f0,f1,f2
+0f00f00f|	fence
+0f100000|	fence.i
+d32210a2|	feq.d x5,f0,f1
+d32210a4|	feq.h x5,f0,f1
+d32210a6|	feq.q x5,f0,f1
+d32210a0|	feq.s x5,f0,f1
+07b0f27f|	fld f0,2047(x5)
+d30210a2|	fle.d x5,f0,f1
+d30210a4|	fle.h x5,f0,f1
+d30210a6|	fle.q x5,f0,f1
+d30210a0|	fle.s x5,f0,f1
+0790f27f|	flh f0,2047(x5)
+07c0f27f|	flq f0,2047(x5)
+d31210a2|	flt.d x5,f0,f1
+d31210a4|	flt.h x5,f0,f1
+d31210a6|	flt.q x5,f0,f1
+d31210a0|	flt.s x5,f0,f1
+07a0f27f|	flw f0,2047(x5)
+43f0201a|	fmadd.d f0,f1,f2,f3
+43f0201c|	fmadd.h f0,f1,f2,f3
+43f0201e|	fmadd.q f0,f1,f2,f3
+43f02018|	fmadd.s f0,f1,f2,f3
+5390202a|	fmax.d f0,f1,f2
+5390202c|	fmax.h f0,f1,f2
+5390202e|	fmax.q f0,f1,f2
+53902028|	fmax.s f0,f1,f2
+5380202a|	fmin.d f0,f1,f2
+5380202c|	fmin.h f0,f1,f2
+5380202e|	fmin.q f0,f1,f2
+53802028|	fmin.s f0,f1,f2
+47f0201a|	fmsub.d f0,f1,f2,f3
+47f0201c|	fmsub.h f0,f1,f2,f3
+47f0201e|	fmsub.q f0,f1,f2,f3
+47f02018|	fmsub.s f0,f1,f2,f3
+53f02012|	fmul.d f0,f1,f2
+53f02014|	fmul.h f0,f1,f2
+53f02016|	fmul.q f0,f1,f2
+53f02010|	fmul.s f0,f1,f2
+538002f2|	fmv.d.x f0,x5
+538002f4|	fmv.h.x f0,x5
+d30200e2|	fmv.x.d x5,f0
+d30200e4|	fmv.x.h x5,f0
+d30200e0|	fmv.x.w x5,f0
+4ff0201a|	fnmadd.d f0,f1,f2,f3
+4ff0201c|	fnmadd.h f0,f1,f2,f3
+4ff0201e|	fnmadd.q f0,f1,f2,f3
+4ff02018|	fnmadd.s f0,f1,f2,f3
+4bf0201a|	fnmsub.d f0,f1,f2,f3
+4bf0201c|	fnmsub.h f0,f1,f2,f3
+4bf0201e|	fnmsub.q f0,f1,f2,f3
+4bf02018|	fnmsub.s f0,f1,f2,f3
+a7bf027e|	fsd f0,2047(x5)
+53902022|	fsgnjn.d f0,f1,f2
+53902024|	fsgnjn.h f0,f1,f2
+53902026|	fsgnjn.q f0,f1,f2
+53902020|	fsgnjn.s f0,f1,f2
+53a02022|	fsgnjx.d f0,f1,f2
+53a02024|	fsgnjx.h f0,f1,f2
+53a02026|	fsgnjx.q f0,f1,f2
+53a02020|	fsgnjx.s f0,f1,f2
+53802022|	fsgnj.d f0,f1,f2
+53802024|	fsgnj.h f0,f1,f2
+53802026|	fsgnj.q f0,f1,f2
+53802020|	fsgnj.s f0,f1,f2
+53a01022|	fabs.d f0,f1
+53a49420|	fabs.s f8,f9
+d305c622|	fmv.d f11,f12
+d306e720|	fmv.s f13,f14
+d3170823|	fneg.d f15,f16
+d398f720|	fneg.s f17,f15
+a79f027e|	fsh f0,2047(x5)
+a7cf027e|	fsq f0,2047(x5)
+53f0005a|	fsqrt.d f0,f1
+53f0005c|	fsqrt.h f0,f1
+53f0005e|	fsqrt.q f0,f1
+53f00058|	fsqrt.s f0,f1
+53f0200a|	fsub.d f0,f1,f2
+53f0200c|	fsub.h f0,f1,f2
+53f0200e|	fsub.q f0,f1,f2
+53f02008|	fsub.s f0,f1,f2
+a7af027e|	fsw f0,2047(x5)
+6ff0dfcb|	j -836
+eff09fcb|	jal -840
+eff25fcb|	jal x5,-844
+67800200|	jr x5
+e7800202|	jalr x1,32(x5)
+67800000|	ret
+6700a500|	jr 10(x10)
+8302f37f|	lb x5,2047(x6)
+8342f37f|	lbu x5,2047(x6)
+af320310|	lr.d x5,(x6)
+af320314|	lr.d.aq x5,(x6)
+af320312|	lr.d.rl x5,(x6)
+af220310|	lr.w x5,(x6)
+af220314|	lr.w.aq x5,(x6)
+af220312|	lr.w.rl x5,(x6)
+b7829102|	lui x5,0x2918
+8322f37f|	lw x5,2047(x6)
+8362f37f|	lwu x5,2047(x6)
+b3027302|	mul x5,x6,x7
+b3127302|	mulh x5,x6,x7
+b3227302|	mulhsu x5,x6,x7
+b3327302|	mulhu x5,x6,x7
+bb027302|	mulw x5,x6,x7
+b3627300|	or x5,x6,x7
+93620380|	ori x5,x6,-2048
+b3627302|	rem x5,x6,x7
+b3727302|	remu x5,x6,x7
+bb627302|	remw x5,x6,x7
+a30f537e|	sb x5,2047(x6)
+afb26318|	sc.d x5,x6,(x7)
+afb2631c|	sc.d.aq x5,x6,(x7)
+afb2631a|	sc.d.rl x5,x6,(x7)
+afa26318|	sc.w x5,x6,(x7)
+afa2631c|	sc.w.aq x5,x6,(x7)
+afa2631a|	sc.w.rl x5,x6,(x7)
+a33f537e|	sd x5,2047(x6)
+23105380|	sh x5,-2048(x6)
+b3127300|	sll x5,x6,x7
+93124303|	slli x5,x6,0x34
+9b127301|	slliw x5,x6,0x17
+b3227300|	slt x5,x6,x7
+b3226000|	sgtz x5,x6
+b32e0f00|	sltz x29,x30
+9322f37f|	slti x5,x6,2047
+93320380|	sltiu x5,x6,-2048
+93321300|	seqz x5,x6
+b3327300|	sltu x5,x6,x7
+33394001|	snez x18,x20
+b3527340|	sra x5,x6,x7
+93524343|	srai x5,x6,0x34
+9b526341|	sraiw x5,x6,0x16
+bb527340|	sraw x5,x6,x7
+b3527300|	srl x5,x6,x7
+93524303|	srli x5,x6,0x34
+9b526301|	srliw x5,x6,0x16
+bb527300|	srlw x5,x6,x7
+b3027340|	sub x5,x6,x7
+b3026040|	neg x5,x6
+bb027340|	subw x5,x6,x7
+3b0ff041|	negw x30,x31
+a32f537e|	sw x5,2047(x6)
+b3427300|	xor x5,x6,x7
+9342f37f|	xori x5,x6,2047
+93c2ffff|	not x5,x31
+bb003108|	add.uw x1,x2,x3
+33a26220|	sh1add x4,x5,x6
+bb239420|	sh1add.uw x7,x8,x9
+33c5c520|	sh2add x10,x11,x12
+bb46f720|	sh2add.uw x13,x14,x15
+33e82821|	sh3add x16,x17,x18
+bb695a21|	sh3add.uw x19,x20,x21
+1b9b7b09|	slli.uw x22,x23,0x17
+33fcac41|	andn x24,x25,x26
+b36dde41|	orn x27,x28,x29
+33cf1f40|	xnor x30,x31,x1
+13910160|	clz x2,x3
+1b920260|	clzw x4,x5
+13931360|	ctz x6,x7
+1b941460|	ctzw x8,x9
+13952560|	cpop x10,x11
+1b962660|	cpopw x12,x13
+33e7070b|	max x14,x15,x16
+b378390b|	maxu x17,x18,x19
+33ca6a0b|	min x20,x21,x22
+b35b9c0b|	minu x23,x24,x25
+139d4d60|	sext.b x26,x27
+139e5e60|	sext.h x28,x29
+3bcf0f08|	zext.h x30,x31
+b3102060|	rol x1,x0,x2
+bb115260|	rolw x3,x4,x5
+33d38360|	ror x6,x7,x8
+9354a560|	rori x9,x10,0xa
+9b55e660|	roriw x11,x12,0xe
+bb56f760|	rorw x13,x14,x15
+13d87828|	orc.b x16,x17
+13d9896b|	rev8 x18,x19
+339a6a49|	bclr x20,x21,x22
+931bfc48|	bclri x23,x24,0xf
+b35cbd49|	bext x25,x26,x27
+13de8e48|	bexti x28,x29,0x8
+339f0f68|	binv x30,x31,x0
+9310016a|	binvi x1,x2,0x20
+b3115228|	bset x3,x4,x5
+1393f32b|	bseti x6,x7,0x3f
+4000|	addi x8,x2,4
+2041|	lw x8,64(x10)
+94d0|	sw x13,32(x9)
+0100|	nop
+811f|	addi x31,x31,-32
+4111|	addi x2,x2,-16
+8158|	li x17,-32
+4161|	addi x2,x2,16
+4163|	lui x6,0x10
+819b|	andi x15,x15,-32
+0d8c|	sub x8,x8,x11
+b18c|	xor x9,x9,x12
+558c|	or x8,x8,x13
+f98c|	and x9,x9,x14
+01a8|	j 16
+99c5|	beqz x11,14
+85e3|	bnez x15,32
+c248|	lw x17,16(x2)
+8283|	jr x7
+fa88|	mv x17,x30
+0290|	ebreak
+0295|	jalr x10
+c297|	add x15,x15,x16
+76c4|	sw x29,8(x2)
+8873|	ld x10,32(x15)
+00ea|	sd x8,16(x12)
+3d31|	addiw x2,x2,-17
+2180|	srli x8,x8,0x8
+c184|	srai x9,x9,0x10
+919d|	subw x11,x11,x12
+b99e|	addw x13,x13,x14
+4a01|	slli x2,x2,0x12
+027d|	ld x26,32(x2)
+a260|	ld x1,8(x2)
+864d|	lw x27,64(x2)
+2021|	fld f8,64(x10)
+8cb0|	fsd f11,32(x9)
+8624|	fld f9,64(x2)
+3eb0|	fsd f15,32(x2)
+0000|	unimp
+ab|	illegalins
+f3|	illegalins
+abc3|	illegalins
+abcde3|	illegalins
diff --git a/riscv64/riscv64asm/testdata/plan9cases.txt b/riscv64/riscv64asm/testdata/plan9cases.txt
new file mode 100644
index 00000000..d38c5eba
--- /dev/null
+++ b/riscv64/riscv64asm/testdata/plan9cases.txt
@@ -0,0 +1,336 @@
+b3027300|	ADD X7, X6, X5
+9302f3ff|	ADDI $-1, X6, X5
+9302f37f|	ADDI $2047, X6, X5
+93870900|	MOV X19, X15
+93070100|	MOV X2, X15
+9b02f37f|	ADDIW $2047, X6, X5
+1b830a00|	MOVW X21, X6
+1b810a00|	MOVW X21, X2
+bb027300|	ADDW X7, X6, X5
+afb26300|	AMOADDD X6, (X7), X5
+afb26304|	AMOADDD X6, (X7), X5
+afb26302|	AMOADDD X6, (X7), X5
+afa26300|	AMOADDW X6, (X7), X5
+afa26304|	AMOADDW X6, (X7), X5
+afa26302|	AMOADDW X6, (X7), X5
+afb26360|	AMOANDD X6, (X7), X5
+afb26364|	AMOANDD X6, (X7), X5
+afb26362|	AMOANDD X6, (X7), X5
+afa26360|	AMOANDW X6, (X7), X5
+afa26364|	AMOANDW X6, (X7), X5
+afa26362|	AMOANDW X6, (X7), X5
+afb263e0|	AMOMAXUD X6, (X7), X5
+afb263e4|	AMOMAXUD X6, (X7), X5
+afb263e2|	AMOMAXUD X6, (X7), X5
+afa263e0|	AMOMAXUW X6, (X7), X5
+afa263e4|	AMOMAXUW X6, (X7), X5
+afa263e2|	AMOMAXUW X6, (X7), X5
+afb263a0|	AMOMAXD X6, (X7), X5
+afb263a4|	AMOMAXD X6, (X7), X5
+afb263a2|	AMOMAXD X6, (X7), X5
+afa263a0|	AMOMAXW X6, (X7), X5
+afa263a4|	AMOMAXW X6, (X7), X5
+afa263a2|	AMOMAXW X6, (X7), X5
+afb263c0|	AMOMINUD X6, (X7), X5
+afb263c4|	AMOMINUD X6, (X7), X5
+afb263c2|	AMOMINUD X6, (X7), X5
+afa263c0|	AMOMINUW X6, (X7), X5
+afa263c4|	AMOMINUW X6, (X7), X5
+afa263c2|	AMOMINUW X6, (X7), X5
+afb26380|	AMOMIND X6, (X7), X5
+afb26384|	AMOMIND X6, (X7), X5
+afb26382|	AMOMIND X6, (X7), X5
+afa26380|	AMOMINW X6, (X7), X5
+afa26384|	AMOMINW X6, (X7), X5
+afa26382|	AMOMINW X6, (X7), X5
+afb26340|	AMOORD X6, (X7), X5
+afb26344|	AMOORD X6, (X7), X5
+afb26342|	AMOORD X6, (X7), X5
+afa26340|	AMOORW X6, (X7), X5
+afa26344|	AMOORW X6, (X7), X5
+afa26342|	AMOORW X6, (X7), X5
+afb26308|	AMOSWAPD X6, (X7), X5
+afb2630c|	AMOSWAPD X6, (X7), X5
+afb2630a|	AMOSWAPD X6, (X7), X5
+afa26308|	AMOSWAPW X6, (X7), X5
+afa2630c|	AMOSWAPW X6, (X7), X5
+afa2630a|	AMOSWAPW X6, (X7), X5
+afb26320|	AMOXORD X6, (X7), X5
+afb26324|	AMOXORD X6, (X7), X5
+afb26322|	AMOXORD X6, (X7), X5
+afa26320|	AMOXORW X6, (X7), X5
+afa26324|	AMOXORW X6, (X7), X5
+afa26322|	AMOXORW X6, (X7), X5
+b3727300|	AND X7, X6, X5
+9372f3ff|	ANDI $-1, X6, X5
+9372f37f|	ANDI $2047, X6, X5
+9372f30f|	MOVBU X6, X5
+97020000|	AUIPC $0, X5
+97028000|	AUIPC $2048, X5
+e38062f0|	BEQ X5, X6, -64(PC)
+e3de62ee|	BGE X5, X6, -65(PC)
+e3fc62ee|	BGEU X5, X6, -66(PC)
+e3ca62ee|	BLT X5, X6, -67(PC)
+e3e862ee|	BLTU X5, X6, -68(PC)
+e39662ee|	BNE X5, X6, -69(PC)
+e30403ee|	BEQZ X6, -70(PC)
+e35203ee|	BGEZ X6, -71(PC)
+e34003ee|	BLTZ X6, -72(PC)
+e31e03ec|	BNEZ X6, -73(PC)
+f33213c0|	CSRRC X6, TIME, X5
+f3f21fc0|	CSRRCI $31, TIME, X5
+f32213c0|	CSRRS X6, TIME, X5
+f3e21fc0|	CSRRSI $31, TIME, X5
+f31213c0|	CSRRW X6, TIME, X5
+f3d21fc0|	CSRRWI $31, TIME, X5
+733015c0|	CSRRC X10, TIME, X0
+73f010c0|	CSRRCI $1, TIME, X0
+73253000|	FRCSR X10
+f3251000|	FRFLAGS X11
+73262000|	FRRM X12
+f32400c0|	RDCYCLE X9
+732920c0|	RDINSTRET X18
+f32910c0|	RDTIME X19
+f3224014|	CSRRS X0, SIP, X5
+73201bc0|	CSRRS X22, TIME, X0
+736014c8|	CSRRSI $8, TIMEH, X0
+73903700|	FSCSR X15, X0
+73101800|	FSFLAGS X16, X0
+73902800|	FSRM X17, X0
+731014c0|	CSRRW X8, TIME, X0
+735016c8|	CSRRWI $12, TIMEH, X0
+b3427302|	DIV X7, X6, X5
+b3527302|	DIVU X7, X6, X5
+bb527302|	DIVUW X7, X6, X5
+bb427302|	DIVW X7, X6, X5
+73001000|	EBREAK
+73000000|	ECALL
+53f02002|	FADDD F2, F1, F0
+53f02004|	FADDH F2, F1, F0
+53f02006|	FADDQ F2, F1, F0
+53f02000|	FADDS F2, F1, F0
+d31200e2|	FCLASSD F0, X5
+d31200e4|	FCLASSH F0, X5
+d31200e6|	FCLASSQ F0, X5
+d31200e0|	FCLASSS F0, X5
+53f022d2|	FCVTDL X5, F0
+53f032d2|	FCVTDLU X5, F0
+53f03042|	FCVTDQ F1, F0
+53800042|	FCVTDS F1, F0
+538002d2|	FCVTDW X5, F0
+538012d2|	FCVTDWU X5, F0
+53f022d4|	FCVTHL X5, F0
+53f032d4|	FCVTHLU X5, F0
+53f00044|	FCVTHS F1, F0
+53f002d4|	FCVTHW X5, F0
+53f012d4|	FCVTHWU X5, F0
+d37230c2|	FCVTLUD F0, X5
+d37230c4|	FCVTLUH F0, X5
+d37230c6|	FCVTLUQ F0, X5
+d37230c0|	FCVTLUS F0, X5
+d37220c2|	FCVTLD F0, X5
+d37220c4|	FCVTLH F0, X5
+d37220c6|	FCVTLQ F0, X5
+d37220c0|	FCVTLS F0, X5
+53801046|	FCVTQD F1, F0
+538022d6|	FCVTQL X5, F0
+538032d6|	FCVTQLU X5, F0
+53800046|	FCVTQS F1, F0
+538002d6|	FCVTQW X5, F0
+538012d6|	FCVTQWU X5, F0
+53f01040|	FCVTSD F1, F0
+53802040|	FCVTSH F1, F0
+53f022d0|	FCVTSL X5, F0
+53f032d0|	FCVTSLU X5, F0
+53f03040|	FCVTSQ F1, F0
+53f002d0|	FCVTSW X5, F0
+53f012d0|	FCVTSWU X5, F0
+d37210c2|	FCVTWUD F0, X5
+d37210c4|	FCVTWUH F0, X5
+d37210c6|	FCVTWUQ F0, X5
+d37210c0|	FCVTWUS F0, X5
+d37200c2|	FCVTWD F0, X5
+d37200c4|	FCVTWH F0, X5
+d37200c6|	FCVTWQ F0, X5
+d37200c0|	FCVTWS F0, X5
+53f0201a|	FDIVD F2, F1, F0
+53f0201c|	FDIVH F2, F1, F0
+53f0201e|	FDIVQ F2, F1, F0
+53f02018|	FDIVS F2, F1, F0
+0f00f00f|	FENCE
+0f100000|	FENCEI
+d32210a2|	FEQD F1, F0, X5
+d32210a4|	FEQH F1, F0, X5
+d32210a6|	FEQQ F1, F0, X5
+d32210a0|	FEQS F1, F0, X5
+07b0f27f|	MOVD F0, 2047(X5)
+d30210a2|	FLED F1, F0, X5
+d30210a4|	FLEH F1, F0, X5
+d30210a6|	FLEQ F1, F0, X5
+d30210a0|	FLES F1, F0, X5
+0790f27f|	FLH 2047(X5), F0
+07c0f27f|	FLQ 2047(X5), F0
+d31210a2|	FLTD F1, F0, X5
+d31210a4|	FLTH F1, F0, X5
+d31210a6|	FLTQ F1, F0, X5
+d31210a0|	FLTS F1, F0, X5
+07a0f27f|	MOVF F0, 2047(X5)
+43f0201a|	FMADDD F1, F2, F3, F0
+43f0201c|	FMADDH F1, F2, F3, F0
+43f0201e|	FMADDQ F1, F2, F3, F0
+43f02018|	FMADDS F1, F2, F3, F0
+5390202a|	FMAXD F2, F1, F0
+5390202c|	FMAXH F2, F1, F0
+5390202e|	FMAXQ F2, F1, F0
+53902028|	FMAXS F2, F1, F0
+5380202a|	FMIND F2, F1, F0
+5380202c|	FMINH F2, F1, F0
+5380202e|	FMINQ F2, F1, F0
+53802028|	FMINS F2, F1, F0
+47f0201a|	FMSUBD F1, F2, F3, F0
+47f0201c|	FMSUBH F1, F2, F3, F0
+47f0201e|	FMSUBQ F1, F2, F3, F0
+47f02018|	FMSUBS F1, F2, F3, F0
+53f02012|	FMULD F2, F1, F0
+53f02014|	FMULH F2, F1, F0
+53f02016|	FMULQ F2, F1, F0
+53f02010|	FMULS F2, F1, F0
+538002f2|	FMVDX X5, F0
+538002f4|	FMVHX X5, F0
+d30200e2|	FMVXD F0, X5
+d30200e4|	FMVXH F0, X5
+d30200e0|	FMVXW F0, X5
+4ff0201a|	FNMADDD F1, F2, F3, F0
+4ff0201c|	FNMADDH F1, F2, F3, F0
+4ff0201e|	FNMADDQ F1, F2, F3, F0
+4ff02018|	FNMADDS F1, F2, F3, F0
+4bf0201a|	FNMSUBD F1, F2, F3, F0
+4bf0201c|	FNMSUBH F1, F2, F3, F0
+4bf0201e|	FNMSUBQ F1, F2, F3, F0
+4bf02018|	FNMSUBS F1, F2, F3, F0
+a7bf027e|	MOVD 2047(X5), F0
+53902022|	FSGNJND F2, F1, F0
+53902024|	FSGNJNH F2, F1, F0
+53902026|	FSGNJNQ F2, F1, F0
+53902020|	FSGNJNS F2, F1, F0
+53a02022|	FSGNJXD F2, F1, F0
+53a02024|	FSGNJXH F2, F1, F0
+53a02026|	FSGNJXQ F2, F1, F0
+53a02020|	FSGNJXS F2, F1, F0
+53802022|	FSGNJD F2, F1, F0
+53802024|	FSGNJH F2, F1, F0
+53802026|	FSGNJQ F2, F1, F0
+53802020|	FSGNJS F2, F1, F0
+a79f027e|	FSH 2047(X5), F0
+a7cf027e|	FSQ 2047(X5), F0
+53f0005a|	FSQRTD F1, F0
+53f0005c|	FSQRTH F1, F0
+53f0005e|	FSQRTQ F1, F0
+53f00058|	FSQRTS F1, F0
+53f0200a|	FSUBD F2, F1, F0
+53f0200c|	FSUBH F2, F1, F0
+53f0200e|	FSUBQ F2, F1, F0
+53f02008|	FSUBS F2, F1, F0
+a7af027e|	MOVF 2047(X5), F0
+6ff0dfcb|	JMP -209(PC)
+eff09fcb|	CALL -210(PC)
+eff25fcb|	JAL X5, -211(PC)
+67800202|	JMP 32(X5)
+e7800202|	CALL 32(X5)
+e7820202|	JALR X5, 32(X5)
+67800000|	RET
+8302f37f|	MOVB 2047(X6), X5
+8342f37f|	MOVBU 2047(X6), X5
+af320310|	LRD (X6), X5
+af320314|	LRD (X6), X5
+af320312|	LRD (X6), X5
+af220310|	LRW (X6), X5
+af220314|	LRW (X6), X5
+af220312|	LRW (X6), X5
+b7829102|	LUI $10520, X5
+8322f37f|	MOVW 2047(X6), X5
+8362f37f|	MOVWU 2047(X6), X5
+b3027302|	MUL X7, X6, X5
+b3127302|	MULH X7, X6, X5
+b3227302|	MULHSU X7, X6, X5
+b3327302|	MULHU X7, X6, X5
+bb027302|	MULW X7, X6, X5
+b3627300|	OR X7, X6, X5
+93620380|	ORI $-2048, X6, X5
+b3627302|	REM X7, X6, X5
+b3727302|	REMU X7, X6, X5
+bb627302|	REMW X7, X6, X5
+a30f537e|	MOVB X5, 2047(X6)
+afb26318|	SCD X6, (X7), X5
+afb2631c|	SCD X6, (X7), X5
+afb2631a|	SCD X6, (X7), X5
+afa26318|	SCW X6, (X7), X5
+afa2631c|	SCW X6, (X7), X5
+afa2631a|	SCW X6, (X7), X5
+a33f537e|	MOV X5, 2047(X6)
+23105380|	MOVH X5, -2048(X6)
+b3127300|	SLL X7, X6, X5
+93124303|	SLLI $52, X6, X5
+9b127301|	SLLIW $23, X6, X5
+b3227300|	SLT X7, X6, X5
+9322f37f|	SLTI $2047, X6, X5
+93320380|	SLTIU $-2048, X6, X5
+93321300|	SEQZ X6, X5
+b3327300|	SLTU X7, X6, X5
+33394001|	SNEZ X20, X18
+b3527340|	SRA X7, X6, X5
+93524343|	SRAI $52, X6, X5
+9b526341|	SRAIW $22, X6, X5
+bb527340|	SRAW X7, X6, X5
+b3527300|	SRL X7, X6, X5
+93524303|	SRLI $52, X6, X5
+9b526301|	SRLIW $22, X6, X5
+bb527300|	SRLW X7, X6, X5
+b3027340|	SUB X7, X6, X5
+b3026040|	NEG X6, X5
+bb027340|	SUBW X7, X6, X5
+a32f537e|	MOVW X5, 2047(X6)
+b3427300|	XOR X7, X6, X5
+9342f37f|	XORI $2047, X6, X5
+93c2ffff|	NOT X31, X5
+bb003108|	ADDUW X3, X2, X1
+33a26220|	SH1ADD X6, X5, X4
+bb239420|	SH1ADDUW X9, X8, X7
+33c5c520|	SH2ADD X12, X11, X10
+bb46f720|	SH2ADDUW X15, X14, X13
+33e82821|	SH3ADD X18, X17, X16
+bb695a21|	SH3ADDUW X21, X20, X19
+1b9b7b09|	SLLIUW $23, X23, X22
+33fcac41|	ANDN X26, X25, X24
+b36dde41|	ORN X29, X28, X27
+33cf1f40|	XNOR X1, X31, X30
+13910160|	CLZ X3, X2
+1b920260|	CLZW X5, X4
+13931360|	CTZ X7, X6
+1b941460|	CTZW X9, X8
+13952560|	CPOP X11, X10
+1b962660|	CPOPW X13, X12
+33e7070b|	MAX X16, X15, X14
+b378390b|	MAXU X19, X18, X17
+33ca6a0b|	MIN X22, X21, X20
+b35b9c0b|	MINU X25, X24, X23
+139d4d60|	SEXTB X27, X26
+139e5e60|	SEXTH X29, X28
+3bcf0f08|	ZEXTH X31, X30
+b3102060|	ROL X2, X0, X1
+bb115260|	ROLW X5, X4, X3
+33d38360|	ROR X8, X7, X6
+9354a560|	RORI $10, X10, X9
+9b55e660|	RORIW $14, X12, X11
+bb56f760|	RORW X15, X14, X13
+13d87828|	ORCB X17, X16
+13d9896b|	REV8 X19, X18
+339a6a49|	BCLR X22, X21, X20
+931bfc48|	BCLRI $15, X24, X23
+b35cbd49|	BEXT X27, X26, X25
+13de8e48|	BEXTI $8, X29, X28
+339f0f68|	BINV X0, X31, X30
+9310016a|	BINVI $32, X2, X1
+b3115228|	BSET X5, X4, X3
+1393f32b|	BSETI $63, X7, X6
+0000|	UNIMP

From 5f2ba6d460c9ac55abe4b3ed2eedee937561d025 Mon Sep 17 00:00:00 2001
From: Lin Runze <lrzlin9@gmail.com>
Date: Tue, 17 Dec 2024 01:43:00 +0800
Subject: [PATCH 038/200] riscv64: fix objdump related tests

The TestObjdumpRISCV64TestDecodeGNUSyntaxdata and
TestObjdumpRISCV64TestDecodeGoSyntaxdata was failed due to lack of
.riscv.attributes in elf file, this patch adds these extension's
attributes and allow `inst` output mismatch.

Fix: golang/go#70864

Change-Id: I6bcac3e6a3bd0ec3cc66356998914a1aa8d7c468
Reviewed-on: https://go-review.googlesource.com/c/arch/+/636735
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 riscv64/riscv64asm/ext_test.go        | 3 +++
 riscv64/riscv64asm/gnu.go             | 5 +++++
 riscv64/riscv64asm/objdumpext_test.go | 4 ++--
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/riscv64/riscv64asm/ext_test.go b/riscv64/riscv64asm/ext_test.go
index fa6961f2..25e2bf75 100644
--- a/riscv64/riscv64asm/ext_test.go
+++ b/riscv64/riscv64asm/ext_test.go
@@ -148,6 +148,9 @@ func testExtDis(
 				}
 				suffix += " (allowed mismatch)"
 			}
+			if strings.Contains(text, "unknown") && strings.Contains(dec.text, ".insn") {
+				return
+			}
 			totalErrors++
 			cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix)
 
diff --git a/riscv64/riscv64asm/gnu.go b/riscv64/riscv64asm/gnu.go
index d6b3dc04..3ee04496 100644
--- a/riscv64/riscv64asm/gnu.go
+++ b/riscv64/riscv64asm/gnu.go
@@ -42,6 +42,11 @@ func GNUSyntax(inst Inst) string {
 			}
 		}
 
+		if inst.Op == ANDI && inst.Args[2].(Simm).Imm == 255 {
+			op = "zext.b"
+			args = args[:len(args)-1]
+		}
+
 		if inst.Op == ADDIW && inst.Args[2].(Simm).Imm == 0 {
 			op = "sext.w"
 			args = args[:len(args)-1]
diff --git a/riscv64/riscv64asm/objdumpext_test.go b/riscv64/riscv64asm/objdumpext_test.go
index 4f1f21a5..1dc09641 100644
--- a/riscv64/riscv64asm/objdumpext_test.go
+++ b/riscv64/riscv64asm/objdumpext_test.go
@@ -277,7 +277,7 @@ func writeELF64(f *os.File, size int) error {
 		Type:      uint32(0x70000003), // SHT_RISCV_ATTRIBUTES
 		Addr:      0,
 		Off:       uint64(off2 + (off3-off2)*4 + strtabsize),
-		Size:      102,
+		Size:      114,
 		Addralign: 1,
 	}
 	binary.Write(&buf, binary.LittleEndian, &sect)
@@ -293,7 +293,7 @@ func writeELF64(f *os.File, size int) error {
 	buf.WriteString("\x00.text\x00.riscv.attributes\x00.shstrtab\x00")
 	// Contents of .riscv.attributes section
 	// which specify the extension and priv spec version. (1.11)
-	buf.WriteString("Ae\x00\x00\x00riscv\x00\x01[\x00\x00\x00\x05rv64i2p1_m2pp_a2p1_f2p2_d2p2_q2p2_zibsr2p0_zifencei2p0_zmmul1p0_zfh1p0_zfhmin1p0\x00\x08\x01\x0a\x0b")
+	buf.WriteString("Aq\x00\x00\x00riscv\x00\x01g\x00\x00\x00\x05rv64i2p0_m2p0_a2p0_f2p0_d2p0_q2p0_c2p0_zmmul1p0_zfh1p0_zfhmin1p0_zba1p0_zbb1p0_zbc1p0_zbs1p0\x00\x08\x01\x0a\x0b")
 	f.Write(buf.Bytes())
 	return nil
 }

From bde81be39b9efbf5b80719bb91ad1a0ebc5186b5 Mon Sep 17 00:00:00 2001
From: cuishuang <imcusg@gmail.com>
Date: Wed, 1 Jan 2025 09:32:57 +0800
Subject: [PATCH 039/200] all: make function comments match function names

Change-Id: I279c95d1d1e5f07e0c7a0c4416bf3613d85bb950
Reviewed-on: https://go-review.googlesource.com/c/arch/+/639476
Reviewed-by: Ian Lance Taylor <iant@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 arm64/arm64asm/ext_test.go     | 2 +-
 loong64/loong64asm/ext_test.go | 2 +-
 riscv64/riscv64asm/ext_test.go | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arm64/arm64asm/ext_test.go b/arm64/arm64asm/ext_test.go
index f432203e..e8a79de2 100644
--- a/arm64/arm64asm/ext_test.go
+++ b/arm64/arm64asm/ext_test.go
@@ -268,7 +268,7 @@ func disasm(syntax string, src []byte) (inst Inst, text string) {
 	return
 }
 
-// decodecoverage returns a floating point number denoting the
+// decodeCoverage returns a floating point number denoting the
 // decoder coverage.
 func decodeCoverage() float64 {
 	n := 0
diff --git a/loong64/loong64asm/ext_test.go b/loong64/loong64asm/ext_test.go
index 5c90586e..5e73c80d 100644
--- a/loong64/loong64asm/ext_test.go
+++ b/loong64/loong64asm/ext_test.go
@@ -240,7 +240,7 @@ func disasm(syntax string, src []byte) (inst Inst, text string) {
 	return
 }
 
-// decodecoverage returns a floating point number denoting the
+// decodeCoverage returns a floating point number denoting the
 // decoder coverage.
 func decodeCoverage() float64 {
 	n := 0
diff --git a/riscv64/riscv64asm/ext_test.go b/riscv64/riscv64asm/ext_test.go
index 25e2bf75..556cd015 100644
--- a/riscv64/riscv64asm/ext_test.go
+++ b/riscv64/riscv64asm/ext_test.go
@@ -242,7 +242,7 @@ func disasm(syntax string, src []byte) (inst Inst, text string) {
 	return
 }
 
-// decodecoverage returns a floating point number denoting the
+// decodeCoverage returns a floating point number denoting the
 // decoder coverage.
 func decodeCoverage() float64 {
 	n := 0

From ccff9d40e363f5ec4b7580073c04f1e04841295d Mon Sep 17 00:00:00 2001
From: alirezaarzehgar <alirezaarzehgar82@gmail.com>
Date: Fri, 17 Jan 2025 18:50:14 +0330
Subject: [PATCH 040/200] x/arch: silent go vet warnings

go vet  -all ./...
arm/armspec/spec.go:584:26: rsc.io/pdf.Text struct literal uses unkeyed fields
arm64/arm64spec/spec.go:688:26: rsc.io/pdf.Text struct literal uses unkeyed fields
loong64/loong64spec/spec.go:361:26: rsc.io/pdf.Text struct literal uses unkeyed fields
ppc64/ppc64map/map.go:290:5: unreachable code
ppc64/ppc64spec/spec.go:468:26: rsc.io/pdf.Text struct literal uses unkeyed fields
s390x/s390xmap/map.go:218:5: unreachable code
s390x/s390xspec/spec.go:1045:26: rsc.io/pdf.Text struct literal uses unkeyed fields
x86/x86spec/parse.go:513:26: rsc.io/pdf.Text struct literal uses unkeyed fields

Change-Id: I4f7e29c80231ebfc5287e0d5bb57edae5b712603
Reviewed-on: https://go-review.googlesource.com/c/arch/+/642738
Reviewed-by: Ian Lance Taylor <iant@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Commit-Queue: Ian Lance Taylor <iant@google.com>
---
 arm/armspec/spec.go         |  9 ++++++++-
 arm64/arm64spec/spec.go     |  9 ++++++++-
 loong64/loong64spec/spec.go |  9 ++++++++-
 ppc64/ppc64map/map.go       |  4 ++--
 ppc64/ppc64spec/spec.go     |  9 ++++++++-
 s390x/s390xmap/map.go       |  4 ++--
 s390x/s390xspec/spec.go     | 12 ++++++++++--
 x86/x86asm/tables.go        |  2 +-
 x86/x86spec/parse.go        |  9 ++++++++-
 9 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/arm/armspec/spec.go b/arm/armspec/spec.go
index 60579a05..5b458e1e 100644
--- a/arm/armspec/spec.go
+++ b/arm/armspec/spec.go
@@ -581,7 +581,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) {
 			f := ck.Font
 			f = strings.TrimSuffix(f, ",Italic")
 			f = strings.TrimSuffix(f, "-Italic")
-			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
+			words = append(words, pdf.Text{
+				Font:     f,
+				FontSize: ck.FontSize,
+				X:        ck.X,
+				Y:        ck.Y,
+				W:        end - ck.X,
+				S:        s,
+			})
 			k = l
 		}
 		i = j
diff --git a/arm64/arm64spec/spec.go b/arm64/arm64spec/spec.go
index ee784e55..feedf83c 100644
--- a/arm64/arm64spec/spec.go
+++ b/arm64/arm64spec/spec.go
@@ -685,7 +685,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) {
 			f := ck.Font
 			f = strings.TrimSuffix(f, ",Italic")
 			f = strings.TrimSuffix(f, "-Italic")
-			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
+			words = append(words, pdf.Text{
+				Font:     f,
+				FontSize: ck.FontSize,
+				X:        ck.X,
+				Y:        ck.Y,
+				W:        end - ck.X,
+				S:        s,
+			})
 			k = l
 		}
 		i = j
diff --git a/loong64/loong64spec/spec.go b/loong64/loong64spec/spec.go
index 3e69a24e..177df41b 100644
--- a/loong64/loong64spec/spec.go
+++ b/loong64/loong64spec/spec.go
@@ -358,7 +358,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) {
 				break
 			}
 			f := ck.Font
-			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
+			words = append(words, pdf.Text{
+				Font:     f,
+				FontSize: ck.FontSize,
+				X:        ck.X,
+				Y:        ck.Y,
+				W:        end - ck.X,
+				S:        s,
+			})
 			k = l
 		}
 		i = j
diff --git a/ppc64/ppc64map/map.go b/ppc64/ppc64map/map.go
index 1e3b1b6e..4e50843c 100644
--- a/ppc64/ppc64map/map.go
+++ b/ppc64/ppc64map/map.go
@@ -24,7 +24,6 @@ import (
 	"flag"
 	"fmt"
 	gofmt "go/format"
-	asm "golang.org/x/arch/ppc64/ppc64asm"
 	"log"
 	"math/bits"
 	"os"
@@ -33,6 +32,8 @@ import (
 	"strconv"
 	"strings"
 	"text/template"
+
+	asm "golang.org/x/arch/ppc64/ppc64asm"
 )
 
 var format = flag.String("fmt", "text", "output format: text, decoder, asm")
@@ -287,7 +288,6 @@ func parseFields(encoding, text string, word int8) Args {
 			if j < 0 {
 				fmt.Fprintf(os.Stderr, "%s: wrong %d-th encoding field: %q\n", text, i, f)
 				panic("Invalid encoding entry.")
-				continue
 			}
 			k := strings.Index(f[j+1:], " ")
 			if k >= 0 {
diff --git a/ppc64/ppc64spec/spec.go b/ppc64/ppc64spec/spec.go
index 4167d6dc..ad9411f1 100644
--- a/ppc64/ppc64spec/spec.go
+++ b/ppc64/ppc64spec/spec.go
@@ -465,7 +465,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) {
 			f := ck.Font
 			f = strings.TrimSuffix(f, ",Italic")
 			f = strings.TrimSuffix(f, "-Italic")
-			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
+			words = append(words, pdf.Text{
+				Font:     f,
+				FontSize: ck.FontSize,
+				X:        ck.X,
+				Y:        ck.Y,
+				W:        end - ck.X,
+				S:        s,
+			})
 			k = l
 		}
 		i = j
diff --git a/s390x/s390xmap/map.go b/s390x/s390xmap/map.go
index 1adfdfbe..3fc89f11 100644
--- a/s390x/s390xmap/map.go
+++ b/s390x/s390xmap/map.go
@@ -24,12 +24,13 @@ import (
 	"flag"
 	"fmt"
 	gofmt "go/format"
-	asm "golang.org/x/arch/s390x/s390xasm"
 	"log"
 	"os"
 	"regexp"
 	"strconv"
 	"strings"
+
+	asm "golang.org/x/arch/s390x/s390xasm"
 )
 
 var format = flag.String("fmt", "text", "output format: text, decoder, asm")
@@ -215,7 +216,6 @@ func parseFields(encoding, text string) Args {
 			if j < 0 {
 				fmt.Fprintf(os.Stderr, "%s: wrong %d-th encoding field: %q\n", text, i, f)
 				panic("Invalid encoding entry.")
-				continue
 			}
 			off, err = strconv.Atoi(f[j+1:])
 			if err != nil {
diff --git a/s390x/s390xspec/spec.go b/s390x/s390xspec/spec.go
index cc0ebade..1b24be85 100644
--- a/s390x/s390xspec/spec.go
+++ b/s390x/s390xspec/spec.go
@@ -30,10 +30,11 @@ import (
 	"log"
 	"math"
 	"os"
-	"rsc.io/pdf"
 	"sort"
 	"strconv"
 	"strings"
+
+	"rsc.io/pdf"
 )
 
 type Inst struct {
@@ -1042,7 +1043,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) {
 			f := ck.Font
 			f = strings.TrimSuffix(f, ",Italic")
 			f = strings.TrimSuffix(f, "-Italic")
-			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end - ck.X, s})
+			words = append(words, pdf.Text{
+				Font:     f,
+				FontSize: ck.FontSize,
+				X:        ck.X,
+				Y:        ck.Y,
+				W:        end - ck.X,
+				S:        s,
+			})
 			k = l
 		}
 		i = j
diff --git a/x86/x86asm/tables.go b/x86/x86asm/tables.go
index 6f57c70b..9710bbd8 100644
--- a/x86/x86asm/tables.go
+++ b/x86/x86asm/tables.go
@@ -1,4 +1,4 @@
-// Code generated by x86map -fmt=decoder x86.csv DO NOT EDIT.
+// Code generated by x86map -fmt=decoder ../x86.csv DO NOT EDIT.
 
 package x86asm
 
diff --git a/x86/x86spec/parse.go b/x86/x86spec/parse.go
index e5324bea..8a9adc98 100644
--- a/x86/x86spec/parse.go
+++ b/x86/x86spec/parse.go
@@ -510,7 +510,14 @@ func findWords(chars []pdf.Text) (words []pdf.Text) {
 			f := ck.Font
 			f = strings.TrimSuffix(f, ",Italic")
 			f = strings.TrimSuffix(f, "-Italic")
-			words = append(words, pdf.Text{f, ck.FontSize, ck.X, ck.Y, end, s})
+			words = append(words, pdf.Text{
+				Font:     f,
+				FontSize: ck.FontSize,
+				X:        ck.X,
+				Y:        ck.Y,
+				W:        end,
+				S:        s,
+			})
 			k = l
 		}
 		i = j

From fa4651658ac77df0a02d29474ffe1140e058b1de Mon Sep 17 00:00:00 2001
From: Roland Shoemaker <roland@golang.org>
Date: Tue, 28 Jan 2025 08:57:26 -0800
Subject: [PATCH 041/200] x86: add SHA-512 instructions to
 all-dec-instructions.txt

Unclear if this is the "correct" way to go about this. XED taken from
https://github.com/intelxed/xed/blob/main/datafiles/sha512/sha512-isa.xed.txt.

Change-Id: If5d7026f3020698727964e9838a6dff1653c846f
Reviewed-on: https://go-review.googlesource.com/c/arch/+/645035
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 .../testdata/xedpath/all-dec-instructions.txt | 76 +++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt b/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt
index aad0b816..70bda8dd 100644
--- a/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt
+++ b/x86/x86avxgen/testdata/xedpath/all-dec-instructions.txt
@@ -58027,3 +58027,79 @@ PATTERN   : 0x0F 0xAE MOD[mm]   MOD!=3 REG[0b100] RM[nnn] f3_refining_prefix no6
 OPERANDS  : MEM0:r:y
 
 }
+
+
+
+
+###FILE: ./datafiles/sha512/sha512-isa.xed.txt
+
+#BEGIN_LEGAL
+#
+#Copyright (c) 2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#  
+#END_LEGAL
+#
+#
+#
+#    ***** GENERATED FILE -- DO NOT EDIT! *****
+#    ***** GENERATED FILE -- DO NOT EDIT! *****
+#    ***** GENERATED FILE -- DO NOT EDIT! *****
+#
+#
+#
+AVX_INSTRUCTIONS()::
+# EMITTING VSHA512MSG1 (VSHA512MSG1-256-1)
+{
+ICLASS:      VSHA512MSG1
+CPL:         3
+CATEGORY:    SHA512
+EXTENSION:   SHA512
+ISA_SET:     SHA512
+EXCEPTIONS:  avx-type-6
+REAL_OPCODE: Y
+PATTERN:     VV1 0xCC VF2 V0F38 MOD[0b11] MOD=3 REG[rrr] RM[nnn] W0 VL256 NOVSR
+OPERANDS:    REG0=YMM_R():rw:qq:u64 REG1=XMM_B():r:dq:u64
+IFORM:       VSHA512MSG1_YMMu64_XMMu64
+}
+
+
+# EMITTING VSHA512MSG2 (VSHA512MSG2-256-1)
+{
+ICLASS:      VSHA512MSG2
+CPL:         3
+CATEGORY:    SHA512
+EXTENSION:   SHA512
+ISA_SET:     SHA512
+EXCEPTIONS:  avx-type-6
+REAL_OPCODE: Y
+PATTERN:     VV1 0xCD VF2 V0F38 MOD[0b11] MOD=3 REG[rrr] RM[nnn] W0 VL256 NOVSR
+OPERANDS:    REG0=YMM_R():rw:qq:u64 REG1=YMM_B():r:qq:u64
+IFORM:       VSHA512MSG2_YMMu64_YMMu64
+}
+
+
+# EMITTING VSHA512RNDS2 (VSHA512RNDS2-256-1)
+{
+ICLASS:      VSHA512RNDS2
+CPL:         3
+CATEGORY:    SHA512
+EXTENSION:   SHA512
+ISA_SET:     SHA512
+EXCEPTIONS:  avx-type-6
+REAL_OPCODE: Y
+PATTERN:     VV1 0xCB VF2 V0F38 MOD[0b11] MOD=3 REG[rrr] RM[nnn] W0 VL256
+OPERANDS:    REG0=YMM_R():rw:qq:u64 REG1=YMM_N():r:qq:u64 REG2=XMM_B():r:dq:u64
+IFORM:       VSHA512RNDS2_YMMu64_YMMu64_XMMu64
+}
\ No newline at end of file

From 1ae429ed9af22ab5442b13b72e44386be2f9f92e Mon Sep 17 00:00:00 2001
From: Gopher Robot <gobot@golang.org>
Date: Fri, 14 Feb 2025 18:57:42 +0000
Subject: [PATCH 042/200] all: upgrade go directive to at least 1.23.0
 [generated]

By now Go 1.24.0 has been released, and Go 1.22 is no longer supported
per the Go Release Policy (https://go.dev/doc/devel/release#policy).

For golang/go#69095.

[git-generate]
(cd . && go get go@1.23.0 && go mod tidy && go fix ./... && go mod edit -toolchain=none)

Change-Id: I4dfe2096905891f3a5c7d34c2e82c6119cdd2448
Reviewed-on: https://go-review.googlesource.com/c/arch/+/649695
Auto-Submit: Gopher Robot <gobot@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
---
 go.mod | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 355098da..b72ba1a5 100644
--- a/go.mod
+++ b/go.mod
@@ -1,5 +1,5 @@
 module golang.org/x/arch
 
-go 1.18
+go 1.23.0
 
 require rsc.io/pdf v0.1.1

From c375763cfd40d3f71b0348622a4d2c635527b2c5 Mon Sep 17 00:00:00 2001
From: Jonathan Davies <jonathan.davies@arm.com>
Date: Mon, 23 Sep 2024 13:49:49 +0000
Subject: [PATCH 043/200] arm64: Fix arm64asm warnings

Fix warnings of the following types.
- go-staticcheck
  - (U1000) var longTest is unused
  - (SA4006) this value of text is never used
  - (S1011) should replace loop with instsN = append(instsN, insts...)
  - (S1002) should omit comparison to bool constant, can be simplified to rea.show_zero
  - (S1039) unnecessary use of fmt.Sprintf
- simplifycompositelit
- simplifyslicedefault

Change-Id: I84cb4f867bf6f923ffa21d6e0a2072641299eaf5
Reviewed-on: https://go-review.googlesource.com/c/arch/+/615055
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 arm64/arm64asm/ext_test.go |  11 +-
 arm64/arm64asm/inst.go     | 216 ++++++++++++++++++-------------------
 2 files changed, 111 insertions(+), 116 deletions(-)

diff --git a/arm64/arm64asm/ext_test.go b/arm64/arm64asm/ext_test.go
index e8a79de2..f0d18e93 100644
--- a/arm64/arm64asm/ext_test.go
+++ b/arm64/arm64asm/ext_test.go
@@ -31,7 +31,6 @@ import (
 var (
 	dumpTest = flag.Bool("dump", false, "dump all encodings")
 	mismatch = flag.Bool("mismatch", false, "log allowed mismatches")
-	longTest = flag.Bool("long", false, "long test")
 	keep     = flag.Bool("keep", false, "keep object files around")
 	debug    = false
 )
@@ -164,7 +163,7 @@ func testExtDis(
 
 		totalTests++
 		if *dumpTest {
-			fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
+			fmt.Printf("%x -> %s [%d]\n", enc, dec.text, dec.nenc)
 		}
 		if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" {
 			suffix := ""
@@ -256,7 +255,6 @@ func disasm(syntax string, src []byte) (inst Inst, text string) {
 		text = "error: " + err.Error()
 		return
 	}
-	text = inst.String()
 	switch syntax {
 	case "gnu":
 		text = GNUSyntax(inst)
@@ -518,11 +516,8 @@ func JSONCases(t *testing.T) func(func([]byte)) {
 			t.Fatal(err)
 		}
 		// Append instructions to get more test cases.
-		for i := 0; i < N; {
-			for _, inst := range insts {
-				instsN = append(instsN, inst)
-			}
-			i++
+		for i := 0; i < N; i++ {
+			instsN = append(instsN, insts...)
 		}
 		Round = 0
 		for i := range instsN {
diff --git a/arm64/arm64asm/inst.go b/arm64/arm64asm/inst.go
index 866e399c..39ddaf75 100644
--- a/arm64/arm64asm/inst.go
+++ b/arm64/arm64asm/inst.go
@@ -469,7 +469,7 @@ func (rea RegExtshiftAmount) String() string {
 		if rea.amount != 0 {
 			buf += fmt.Sprintf(" #%d", rea.amount)
 		} else {
-			if rea.show_zero == true {
+			if rea.show_zero {
 				buf += fmt.Sprintf(" #%d", rea.amount)
 			}
 		}
@@ -527,7 +527,7 @@ func (m MemImmediate) String() string {
 		postR := post.String()
 		return fmt.Sprintf("[%s], %s", R, postR)
 	}
-	return fmt.Sprintf("unimplemented!")
+	return "unimplemented!"
 }
 
 // A MemExtend is a memory reference made up of a base R and index expression X.
@@ -1021,110 +1021,110 @@ func (s sysInstFields) getType() sys {
 }
 
 var sysInstsAttrs = map[sysInstFields]sysInstAttrs{
-	sysInstFields{0, 8, 3, 0}:  {sys_TLBI, "VMALLE1IS", false},
-	sysInstFields{0, 8, 3, 1}:  {sys_TLBI, "VAE1IS", true},
-	sysInstFields{0, 8, 3, 2}:  {sys_TLBI, "ASIDE1IS", true},
-	sysInstFields{0, 8, 3, 3}:  {sys_TLBI, "VAAE1IS", true},
-	sysInstFields{0, 8, 3, 5}:  {sys_TLBI, "VALE1IS", true},
-	sysInstFields{0, 8, 3, 7}:  {sys_TLBI, "VAALE1IS", true},
-	sysInstFields{0, 8, 7, 0}:  {sys_TLBI, "VMALLE1", false},
-	sysInstFields{0, 8, 7, 1}:  {sys_TLBI, "VAE1", true},
-	sysInstFields{0, 8, 7, 2}:  {sys_TLBI, "ASIDE1", true},
-	sysInstFields{0, 8, 7, 3}:  {sys_TLBI, "VAAE1", true},
-	sysInstFields{0, 8, 7, 5}:  {sys_TLBI, "VALE1", true},
-	sysInstFields{0, 8, 7, 7}:  {sys_TLBI, "VAALE1", true},
-	sysInstFields{4, 8, 0, 1}:  {sys_TLBI, "IPAS2E1IS", true},
-	sysInstFields{4, 8, 0, 5}:  {sys_TLBI, "IPAS2LE1IS", true},
-	sysInstFields{4, 8, 3, 0}:  {sys_TLBI, "ALLE2IS", false},
-	sysInstFields{4, 8, 3, 1}:  {sys_TLBI, "VAE2IS", true},
-	sysInstFields{4, 8, 3, 4}:  {sys_TLBI, "ALLE1IS", false},
-	sysInstFields{4, 8, 3, 5}:  {sys_TLBI, "VALE2IS", true},
-	sysInstFields{4, 8, 3, 6}:  {sys_TLBI, "VMALLS12E1IS", false},
-	sysInstFields{4, 8, 4, 1}:  {sys_TLBI, "IPAS2E1", true},
-	sysInstFields{4, 8, 4, 5}:  {sys_TLBI, "IPAS2LE1", true},
-	sysInstFields{4, 8, 7, 0}:  {sys_TLBI, "ALLE2", false},
-	sysInstFields{4, 8, 7, 1}:  {sys_TLBI, "VAE2", true},
-	sysInstFields{4, 8, 7, 4}:  {sys_TLBI, "ALLE1", false},
-	sysInstFields{4, 8, 7, 5}:  {sys_TLBI, "VALE2", true},
-	sysInstFields{4, 8, 7, 6}:  {sys_TLBI, "VMALLS12E1", false},
-	sysInstFields{6, 8, 3, 0}:  {sys_TLBI, "ALLE3IS", false},
-	sysInstFields{6, 8, 3, 1}:  {sys_TLBI, "VAE3IS", true},
-	sysInstFields{6, 8, 3, 5}:  {sys_TLBI, "VALE3IS", true},
-	sysInstFields{6, 8, 7, 0}:  {sys_TLBI, "ALLE3", false},
-	sysInstFields{6, 8, 7, 1}:  {sys_TLBI, "VAE3", true},
-	sysInstFields{6, 8, 7, 5}:  {sys_TLBI, "VALE3", true},
-	sysInstFields{0, 8, 1, 0}:  {sys_TLBI, "VMALLE1OS", false},
-	sysInstFields{0, 8, 1, 1}:  {sys_TLBI, "VAE1OS", true},
-	sysInstFields{0, 8, 1, 2}:  {sys_TLBI, "ASIDE1OS", true},
-	sysInstFields{0, 8, 1, 3}:  {sys_TLBI, "VAAE1OS", true},
-	sysInstFields{0, 8, 1, 5}:  {sys_TLBI, "VALE1OS", true},
-	sysInstFields{0, 8, 1, 7}:  {sys_TLBI, "VAALE1OS", true},
-	sysInstFields{0, 8, 2, 1}:  {sys_TLBI, "RVAE1IS", true},
-	sysInstFields{0, 8, 2, 3}:  {sys_TLBI, "RVAAE1IS", true},
-	sysInstFields{0, 8, 2, 5}:  {sys_TLBI, "RVALE1IS", true},
-	sysInstFields{0, 8, 2, 7}:  {sys_TLBI, "RVAALE1IS", true},
-	sysInstFields{0, 8, 5, 1}:  {sys_TLBI, "RVAE1OS", true},
-	sysInstFields{0, 8, 5, 3}:  {sys_TLBI, "RVAAE1OS", true},
-	sysInstFields{0, 8, 5, 5}:  {sys_TLBI, "RVALE1OS", true},
-	sysInstFields{0, 8, 5, 7}:  {sys_TLBI, "RVAALE1OS", true},
-	sysInstFields{0, 8, 6, 1}:  {sys_TLBI, "RVAE1", true},
-	sysInstFields{0, 8, 6, 3}:  {sys_TLBI, "RVAAE1", true},
-	sysInstFields{0, 8, 6, 5}:  {sys_TLBI, "RVALE1", true},
-	sysInstFields{0, 8, 6, 7}:  {sys_TLBI, "RVAALE1", true},
-	sysInstFields{4, 8, 0, 2}:  {sys_TLBI, "RIPAS2E1IS", true},
-	sysInstFields{4, 8, 0, 6}:  {sys_TLBI, "RIPAS2LE1IS", true},
-	sysInstFields{4, 8, 1, 0}:  {sys_TLBI, "ALLE2OS", false},
-	sysInstFields{4, 8, 1, 1}:  {sys_TLBI, "VAE2OS", true},
-	sysInstFields{4, 8, 1, 4}:  {sys_TLBI, "ALLE1OS", false},
-	sysInstFields{4, 8, 1, 5}:  {sys_TLBI, "VALE2OS", true},
-	sysInstFields{4, 8, 1, 6}:  {sys_TLBI, "VMALLS12E1OS", false},
-	sysInstFields{4, 8, 2, 1}:  {sys_TLBI, "RVAE2IS", true},
-	sysInstFields{4, 8, 2, 5}:  {sys_TLBI, "RVALE2IS", true},
-	sysInstFields{4, 8, 4, 0}:  {sys_TLBI, "IPAS2E1OS", true},
-	sysInstFields{4, 8, 4, 2}:  {sys_TLBI, "RIPAS2E1", true},
-	sysInstFields{4, 8, 4, 3}:  {sys_TLBI, "RIPAS2E1OS", true},
-	sysInstFields{4, 8, 4, 4}:  {sys_TLBI, "IPAS2LE1OS", true},
-	sysInstFields{4, 8, 4, 6}:  {sys_TLBI, "RIPAS2LE1", true},
-	sysInstFields{4, 8, 4, 7}:  {sys_TLBI, "RIPAS2LE1OS", true},
-	sysInstFields{4, 8, 5, 1}:  {sys_TLBI, "RVAE2OS", true},
-	sysInstFields{4, 8, 5, 5}:  {sys_TLBI, "RVALE2OS", true},
-	sysInstFields{4, 8, 6, 1}:  {sys_TLBI, "RVAE2", true},
-	sysInstFields{4, 8, 6, 5}:  {sys_TLBI, "RVALE2", true},
-	sysInstFields{6, 8, 1, 0}:  {sys_TLBI, "ALLE3OS", false},
-	sysInstFields{6, 8, 1, 1}:  {sys_TLBI, "VAE3OS", true},
-	sysInstFields{6, 8, 1, 5}:  {sys_TLBI, "VALE3OS", true},
-	sysInstFields{6, 8, 2, 1}:  {sys_TLBI, "RVAE3IS", true},
-	sysInstFields{6, 8, 2, 5}:  {sys_TLBI, "RVALE3IS", true},
-	sysInstFields{6, 8, 5, 1}:  {sys_TLBI, "RVAE3OS", true},
-	sysInstFields{6, 8, 5, 5}:  {sys_TLBI, "RVALE3OS", true},
-	sysInstFields{6, 8, 6, 1}:  {sys_TLBI, "RVAE3", true},
-	sysInstFields{6, 8, 6, 5}:  {sys_TLBI, "RVALE3", true},
-	sysInstFields{0, 7, 6, 1}:  {sys_DC, "IVAC", true},
-	sysInstFields{0, 7, 6, 2}:  {sys_DC, "ISW", true},
-	sysInstFields{0, 7, 10, 2}: {sys_DC, "CSW", true},
-	sysInstFields{0, 7, 14, 2}: {sys_DC, "CISW", true},
-	sysInstFields{3, 7, 4, 1}:  {sys_DC, "ZVA", true},
-	sysInstFields{3, 7, 10, 1}: {sys_DC, "CVAC", true},
-	sysInstFields{3, 7, 11, 1}: {sys_DC, "CVAU", true},
-	sysInstFields{3, 7, 14, 1}: {sys_DC, "CIVAC", true},
-	sysInstFields{0, 7, 6, 3}:  {sys_DC, "IGVAC", true},
-	sysInstFields{0, 7, 6, 4}:  {sys_DC, "IGSW", true},
-	sysInstFields{0, 7, 6, 5}:  {sys_DC, "IGDVAC", true},
-	sysInstFields{0, 7, 6, 6}:  {sys_DC, "IGDSW", true},
-	sysInstFields{0, 7, 10, 4}: {sys_DC, "CGSW", true},
-	sysInstFields{0, 7, 10, 6}: {sys_DC, "CGDSW", true},
-	sysInstFields{0, 7, 14, 4}: {sys_DC, "CIGSW", true},
-	sysInstFields{0, 7, 14, 6}: {sys_DC, "CIGDSW", true},
-	sysInstFields{3, 7, 4, 3}:  {sys_DC, "GVA", true},
-	sysInstFields{3, 7, 4, 4}:  {sys_DC, "GZVA", true},
-	sysInstFields{3, 7, 10, 3}: {sys_DC, "CGVAC", true},
-	sysInstFields{3, 7, 10, 5}: {sys_DC, "CGDVAC", true},
-	sysInstFields{3, 7, 12, 3}: {sys_DC, "CGVAP", true},
-	sysInstFields{3, 7, 12, 5}: {sys_DC, "CGDVAP", true},
-	sysInstFields{3, 7, 13, 3}: {sys_DC, "CGVADP", true},
-	sysInstFields{3, 7, 13, 5}: {sys_DC, "CGDVADP", true},
-	sysInstFields{3, 7, 14, 3}: {sys_DC, "CIGVAC", true},
-	sysInstFields{3, 7, 14, 5}: {sys_DC, "CIGDVAC", true},
-	sysInstFields{3, 7, 12, 1}: {sys_DC, "CVAP", true},
-	sysInstFields{3, 7, 13, 1}: {sys_DC, "CVADP", true},
+	{0, 8, 3, 0}:  {sys_TLBI, "VMALLE1IS", false},
+	{0, 8, 3, 1}:  {sys_TLBI, "VAE1IS", true},
+	{0, 8, 3, 2}:  {sys_TLBI, "ASIDE1IS", true},
+	{0, 8, 3, 3}:  {sys_TLBI, "VAAE1IS", true},
+	{0, 8, 3, 5}:  {sys_TLBI, "VALE1IS", true},
+	{0, 8, 3, 7}:  {sys_TLBI, "VAALE1IS", true},
+	{0, 8, 7, 0}:  {sys_TLBI, "VMALLE1", false},
+	{0, 8, 7, 1}:  {sys_TLBI, "VAE1", true},
+	{0, 8, 7, 2}:  {sys_TLBI, "ASIDE1", true},
+	{0, 8, 7, 3}:  {sys_TLBI, "VAAE1", true},
+	{0, 8, 7, 5}:  {sys_TLBI, "VALE1", true},
+	{0, 8, 7, 7}:  {sys_TLBI, "VAALE1", true},
+	{4, 8, 0, 1}:  {sys_TLBI, "IPAS2E1IS", true},
+	{4, 8, 0, 5}:  {sys_TLBI, "IPAS2LE1IS", true},
+	{4, 8, 3, 0}:  {sys_TLBI, "ALLE2IS", false},
+	{4, 8, 3, 1}:  {sys_TLBI, "VAE2IS", true},
+	{4, 8, 3, 4}:  {sys_TLBI, "ALLE1IS", false},
+	{4, 8, 3, 5}:  {sys_TLBI, "VALE2IS", true},
+	{4, 8, 3, 6}:  {sys_TLBI, "VMALLS12E1IS", false},
+	{4, 8, 4, 1}:  {sys_TLBI, "IPAS2E1", true},
+	{4, 8, 4, 5}:  {sys_TLBI, "IPAS2LE1", true},
+	{4, 8, 7, 0}:  {sys_TLBI, "ALLE2", false},
+	{4, 8, 7, 1}:  {sys_TLBI, "VAE2", true},
+	{4, 8, 7, 4}:  {sys_TLBI, "ALLE1", false},
+	{4, 8, 7, 5}:  {sys_TLBI, "VALE2", true},
+	{4, 8, 7, 6}:  {sys_TLBI, "VMALLS12E1", false},
+	{6, 8, 3, 0}:  {sys_TLBI, "ALLE3IS", false},
+	{6, 8, 3, 1}:  {sys_TLBI, "VAE3IS", true},
+	{6, 8, 3, 5}:  {sys_TLBI, "VALE3IS", true},
+	{6, 8, 7, 0}:  {sys_TLBI, "ALLE3", false},
+	{6, 8, 7, 1}:  {sys_TLBI, "VAE3", true},
+	{6, 8, 7, 5}:  {sys_TLBI, "VALE3", true},
+	{0, 8, 1, 0}:  {sys_TLBI, "VMALLE1OS", false},
+	{0, 8, 1, 1}:  {sys_TLBI, "VAE1OS", true},
+	{0, 8, 1, 2}:  {sys_TLBI, "ASIDE1OS", true},
+	{0, 8, 1, 3}:  {sys_TLBI, "VAAE1OS", true},
+	{0, 8, 1, 5}:  {sys_TLBI, "VALE1OS", true},
+	{0, 8, 1, 7}:  {sys_TLBI, "VAALE1OS", true},
+	{0, 8, 2, 1}:  {sys_TLBI, "RVAE1IS", true},
+	{0, 8, 2, 3}:  {sys_TLBI, "RVAAE1IS", true},
+	{0, 8, 2, 5}:  {sys_TLBI, "RVALE1IS", true},
+	{0, 8, 2, 7}:  {sys_TLBI, "RVAALE1IS", true},
+	{0, 8, 5, 1}:  {sys_TLBI, "RVAE1OS", true},
+	{0, 8, 5, 3}:  {sys_TLBI, "RVAAE1OS", true},
+	{0, 8, 5, 5}:  {sys_TLBI, "RVALE1OS", true},
+	{0, 8, 5, 7}:  {sys_TLBI, "RVAALE1OS", true},
+	{0, 8, 6, 1}:  {sys_TLBI, "RVAE1", true},
+	{0, 8, 6, 3}:  {sys_TLBI, "RVAAE1", true},
+	{0, 8, 6, 5}:  {sys_TLBI, "RVALE1", true},
+	{0, 8, 6, 7}:  {sys_TLBI, "RVAALE1", true},
+	{4, 8, 0, 2}:  {sys_TLBI, "RIPAS2E1IS", true},
+	{4, 8, 0, 6}:  {sys_TLBI, "RIPAS2LE1IS", true},
+	{4, 8, 1, 0}:  {sys_TLBI, "ALLE2OS", false},
+	{4, 8, 1, 1}:  {sys_TLBI, "VAE2OS", true},
+	{4, 8, 1, 4}:  {sys_TLBI, "ALLE1OS", false},
+	{4, 8, 1, 5}:  {sys_TLBI, "VALE2OS", true},
+	{4, 8, 1, 6}:  {sys_TLBI, "VMALLS12E1OS", false},
+	{4, 8, 2, 1}:  {sys_TLBI, "RVAE2IS", true},
+	{4, 8, 2, 5}:  {sys_TLBI, "RVALE2IS", true},
+	{4, 8, 4, 0}:  {sys_TLBI, "IPAS2E1OS", true},
+	{4, 8, 4, 2}:  {sys_TLBI, "RIPAS2E1", true},
+	{4, 8, 4, 3}:  {sys_TLBI, "RIPAS2E1OS", true},
+	{4, 8, 4, 4}:  {sys_TLBI, "IPAS2LE1OS", true},
+	{4, 8, 4, 6}:  {sys_TLBI, "RIPAS2LE1", true},
+	{4, 8, 4, 7}:  {sys_TLBI, "RIPAS2LE1OS", true},
+	{4, 8, 5, 1}:  {sys_TLBI, "RVAE2OS", true},
+	{4, 8, 5, 5}:  {sys_TLBI, "RVALE2OS", true},
+	{4, 8, 6, 1}:  {sys_TLBI, "RVAE2", true},
+	{4, 8, 6, 5}:  {sys_TLBI, "RVALE2", true},
+	{6, 8, 1, 0}:  {sys_TLBI, "ALLE3OS", false},
+	{6, 8, 1, 1}:  {sys_TLBI, "VAE3OS", true},
+	{6, 8, 1, 5}:  {sys_TLBI, "VALE3OS", true},
+	{6, 8, 2, 1}:  {sys_TLBI, "RVAE3IS", true},
+	{6, 8, 2, 5}:  {sys_TLBI, "RVALE3IS", true},
+	{6, 8, 5, 1}:  {sys_TLBI, "RVAE3OS", true},
+	{6, 8, 5, 5}:  {sys_TLBI, "RVALE3OS", true},
+	{6, 8, 6, 1}:  {sys_TLBI, "RVAE3", true},
+	{6, 8, 6, 5}:  {sys_TLBI, "RVALE3", true},
+	{0, 7, 6, 1}:  {sys_DC, "IVAC", true},
+	{0, 7, 6, 2}:  {sys_DC, "ISW", true},
+	{0, 7, 10, 2}: {sys_DC, "CSW", true},
+	{0, 7, 14, 2}: {sys_DC, "CISW", true},
+	{3, 7, 4, 1}:  {sys_DC, "ZVA", true},
+	{3, 7, 10, 1}: {sys_DC, "CVAC", true},
+	{3, 7, 11, 1}: {sys_DC, "CVAU", true},
+	{3, 7, 14, 1}: {sys_DC, "CIVAC", true},
+	{0, 7, 6, 3}:  {sys_DC, "IGVAC", true},
+	{0, 7, 6, 4}:  {sys_DC, "IGSW", true},
+	{0, 7, 6, 5}:  {sys_DC, "IGDVAC", true},
+	{0, 7, 6, 6}:  {sys_DC, "IGDSW", true},
+	{0, 7, 10, 4}: {sys_DC, "CGSW", true},
+	{0, 7, 10, 6}: {sys_DC, "CGDSW", true},
+	{0, 7, 14, 4}: {sys_DC, "CIGSW", true},
+	{0, 7, 14, 6}: {sys_DC, "CIGDSW", true},
+	{3, 7, 4, 3}:  {sys_DC, "GVA", true},
+	{3, 7, 4, 4}:  {sys_DC, "GZVA", true},
+	{3, 7, 10, 3}: {sys_DC, "CGVAC", true},
+	{3, 7, 10, 5}: {sys_DC, "CGDVAC", true},
+	{3, 7, 12, 3}: {sys_DC, "CGVAP", true},
+	{3, 7, 12, 5}: {sys_DC, "CGDVAP", true},
+	{3, 7, 13, 3}: {sys_DC, "CGVADP", true},
+	{3, 7, 13, 5}: {sys_DC, "CGDVADP", true},
+	{3, 7, 14, 3}: {sys_DC, "CIGVAC", true},
+	{3, 7, 14, 5}: {sys_DC, "CIGDVAC", true},
+	{3, 7, 12, 1}: {sys_DC, "CVAP", true},
+	{3, 7, 13, 1}: {sys_DC, "CVADP", true},
 }

From eba65f7f1ad807647a71227220f0854b4f0acc5d Mon Sep 17 00:00:00 2001
From: Cherry Mui <cherryyz@google.com>
Date: Wed, 5 Mar 2025 22:35:14 -0500
Subject: [PATCH 044/200] x86/xeddata: document how to obtain XED data files

Change-Id: I6ec052a15caaeeb0c896af641db07c390321e249
Reviewed-on: https://go-review.googlesource.com/c/arch/+/655315
Reviewed-by: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 x86/xeddata/doc.go | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/x86/xeddata/doc.go b/x86/xeddata/doc.go
index 23d51dc5..4439552b 100644
--- a/x86/xeddata/doc.go
+++ b/x86/xeddata/doc.go
@@ -23,6 +23,9 @@
 // where appropriate, x86csv names are provided
 // as an alternative.
 //
+// Suppose $XED is the path of a checkout of the
+// https://github.com/intelxed/xed repo.
+//
 // "$XED/foo/bar.txt" notation is used to specify a path to "foo/bar.txt"
 // file under local XED source repository folder.
 //
@@ -32,14 +35,20 @@
 //  3. Operate on XED objects.
 //
 // See example_test.go for complete examples.
+// See testdata/xed_objects.txt for examples of "XED objects".
+//
+// # Obtain XED datafiles
 //
 // It is required to build Intel XED before attempting to use
-// its datafiles, as this package expects "all" versions that
+// its datafiles, as this package expects the "all" versions that
 // are a concatenated final versions of datafiles.
+// To build it, follow the instruction on https://github.com/intelxed/xed.
+//
+// Once built, the "all" versions of data files are in "$XED/obj/dgen/".
 // If "$XED/obj/dgen/" does not contain relevant files,
 // then either this documentation is stale or your XED is not built.
-//
-// To see examples of "XED objects" see "testdata/xed_objects.txt".
+// Pass $XED/obj/dgen (or a copy of it) as the "xedPath" to [NewDatabase]
+// or to x86avxgen -xedPath.
 //
 // Intel XED https://github.com/intelxed/xed provides all documentation
 // that can be required to understand datafiles.

From f6424c2b3339190600843af381c630228a803dd4 Mon Sep 17 00:00:00 2001
From: Cherry Mui <cherryyz@google.com>
Date: Thu, 6 Mar 2025 09:37:06 -0500
Subject: [PATCH 045/200] x86/xeddata: trim leading space in parsing

The new version of XED data file contains some comment lines with
a leading space, e.g. " # EOSZ=2 not64". The current parser doesn't
recognize it. Trim the leading space.

Change-Id: Ia5aa244aece7a1cee2d7842d69c60c9d5335dcce
Reviewed-on: https://go-review.googlesource.com/c/arch/+/655435
Reviewed-by: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 x86/xeddata/reader.go                | 1 +
 x86/xeddata/testdata/xed_objects.txt | 1 +
 2 files changed, 2 insertions(+)

diff --git a/x86/xeddata/reader.go b/x86/xeddata/reader.go
index fc8aa94b..cd235100 100644
--- a/x86/xeddata/reader.go
+++ b/x86/xeddata/reader.go
@@ -113,6 +113,7 @@ func (r *Reader) parseLines(lines []string) (*Object, error) {
 	)
 
 	for _, l := range lines {
+		l = strings.TrimLeft(l, " ")
 		if l[0] == '#' { // Skip comment lines.
 			continue
 		}
diff --git a/x86/xeddata/testdata/xed_objects.txt b/x86/xeddata/testdata/xed_objects.txt
index 9d0c52e5..c252acfd 100644
--- a/x86/xeddata/testdata/xed_objects.txt
+++ b/x86/xeddata/testdata/xed_objects.txt
@@ -13,6 +13,7 @@
 # {
 # ICLASS : ADD
 # }
+ # comment with leading space
 ====
 []
 

From 2c5af0f6833f7fa307cb6e9e2f3e5ad8ebd810ec Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Sun, 9 Mar 2025 10:58:35 -0400
Subject: [PATCH 046/200] x86/xeddata: strings.Split -> strings.Cut

Change-Id: If015414f94680a753deef3ce6ff400b06c207f45
Reviewed-on: https://go-review.googlesource.com/c/arch/+/656236
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Austin Clements <austin@google.com>
---
 x86/xeddata/operand.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/x86/xeddata/operand.go b/x86/xeddata/operand.go
index 1632828d..90614165 100644
--- a/x86/xeddata/operand.go
+++ b/x86/xeddata/operand.go
@@ -141,13 +141,15 @@ func (op *Operand) NonterminalName() bool {
 // NameLHS returns left hand side part of the non-terminal name.
 // Example: NameLHS("REG0=GPRv()") => "REG0".
 func (op *Operand) NameLHS() string {
-	return strings.Split(op.Name, "=")[0]
+	lhs, _, _ := strings.Cut(op.Name, "=")
+	return lhs
 }
 
 // NameRHS returns right hand side part of the non-terminal name.
 // Example: NameLHS("REG0=GPRv()") => "GPRv()".
 func (op *Operand) NameRHS() string {
-	return strings.Split(op.Name, "=")[1]
+	_, rhs, _ := strings.Cut(op.Name, "=")
+	return rhs
 }
 
 // IsVisible returns true for operands that are usually

From 813340cc1fdc52a372ae3250c2a8df379435f73b Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 7 Mar 2025 21:16:18 -0500
Subject: [PATCH 047/200] x86/xeddata: fix some operand width issues

The XED data has various ways of specifying defaults for operand
widths that we hadn't implemented. Specifically,

- Some operands don't specify a width code at all and it's implied
  from the operand type by the extra-widths table. We weren't parsing
  this table at all, so this adds a parser and uses this table to
  populate the width if it's missing.

- Many width codes have a default xtype. We were already parsing this
  from the widths table, but not populating it in operands.

Change-Id: I5dc9e33e072fe076624ee8695d6627196a30c7f5
Reviewed-on: https://go-review.googlesource.com/c/arch/+/656237
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 .../testdata/xedpath/all-extra-widths.txt     |   3 +
 x86/xeddata/database.go                       |  48 ++++++++-
 x86/xeddata/operand.go                        |  42 +++++++-
 x86/xeddata/readlines.go                      | 101 ++++++++++++++++++
 .../testdata/xedpath/all-extra-widths.txt     |   3 +
 x86/xeddata/xeddata_test.go                   |  66 +++++++++---
 6 files changed, 246 insertions(+), 17 deletions(-)
 create mode 100644 x86/x86avxgen/testdata/xedpath/all-extra-widths.txt
 create mode 100644 x86/xeddata/readlines.go
 create mode 100644 x86/xeddata/testdata/xedpath/all-extra-widths.txt

diff --git a/x86/x86avxgen/testdata/xedpath/all-extra-widths.txt b/x86/x86avxgen/testdata/xedpath/all-extra-widths.txt
new file mode 100644
index 00000000..30a004e6
--- /dev/null
+++ b/x86/x86avxgen/testdata/xedpath/all-extra-widths.txt
@@ -0,0 +1,3 @@
+# Copyright 2025 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
diff --git a/x86/xeddata/database.go b/x86/xeddata/database.go
index 94d21de0..a6ec760d 100644
--- a/x86/xeddata/database.go
+++ b/x86/xeddata/database.go
@@ -74,9 +74,19 @@ type Database struct {
 	widths map[string]*width // all-widths.txt
 	states map[string]string // all-state.txt
 	xtypes map[string]*xtype // all-element-types.txt
+
+	// extraWidth is a "all-extra-widths.txt" record.
+	//
+	// It provides a default mapping from an operand type to a width code.
+	//
+	// The key is one of three things:
+	// - "XED_REG_<name>" for a register (e.g., "XED_REG_EAX")
+	// - "<name>()" for a non-terminal (e.g., "GPR32_R()"")
+	// - "<name>" for an immediate const (e.g., "AGEN")
+	extraWidths map[string]string // all-extra-widths.txt
 }
 
-// width is a "all-width.txt" record.
+// width is a "all-widths.txt" record.
 type width struct {
 	// Default xtype name (examples: int, i8, f32).
 	xtype string
@@ -140,6 +150,14 @@ func NewDatabase(xedPath string) (*Database, error) {
 		}
 	}
 
+	extraWidths, err := os.Open(filepath.Join(xedPath, "all-extra-widths.txt"))
+	if err == nil {
+		db.extraWidths, err = parseExtraWidths(extraWidths)
+		if err != nil {
+			return &db, err
+		}
+	}
+
 	xtypes, err := os.Open(filepath.Join(xedPath, "all-element-types.txt"))
 	if err == nil {
 		err = db.LoadXtypes(xtypes)
@@ -181,6 +199,10 @@ func (db *Database) LoadXtypes(r io.Reader) error {
 
 // WidthSize translates width string to size string using desired
 // SizeMode m. For some widths output is the same for any valid value of m.
+//
+// The size string may be a decimal number of bytes, like "8". It may of the
+// form "%dbits" to indicate a bit width. Or in some cases it's "0" for
+// "unusual" registers.
 func (db *Database) WidthSize(width string, m OperandSizeMode) string {
 	info := db.widths[width]
 	if info == nil {
@@ -235,6 +257,30 @@ func parseWidths(r io.Reader) (map[string]*width, error) {
 	return widths, nil
 }
 
+func parseExtraWidths(r io.Reader) (map[string]string, error) {
+	extraWidths := make(map[string]string)
+	for line, err := range readLines(r) {
+		if err != nil {
+			return nil, err
+		}
+		f := bytes.Fields(line.data)
+		if len(f) != 3 {
+			return nil, fmt.Errorf("want 3 fields, got %d", len(f))
+		}
+		switch string(f[0]) {
+		default:
+			return nil, fmt.Errorf("unknown extra width type %s", f[0])
+		case "imm_const":
+			extraWidths[string(f[1])] = string(f[2])
+		case "reg":
+			extraWidths["XED_REG_"+string(f[1])] = string(f[2])
+		case "nt":
+			extraWidths[string(f[1])+"()"] = string(f[2])
+		}
+	}
+	return extraWidths, nil
+}
+
 func parseStates(r io.Reader) (map[string]string, error) {
 	data, err := ioutil.ReadAll(r)
 	if err != nil {
diff --git a/x86/xeddata/operand.go b/x86/xeddata/operand.go
index 90614165..8de99d67 100644
--- a/x86/xeddata/operand.go
+++ b/x86/xeddata/operand.go
@@ -51,6 +51,11 @@ type Operand struct {
 	// Width descriptor. It can express simple width like "w" (word, 16bit)
 	// or meta-width like "v", which corresponds to {16, 32, 64} bits.
 	//
+	// The first column in all-widths.txt lists all possible widths.
+	//
+	// To deterine the size given a width string and a mode, use
+	// [Database.WidthSize].
+	//
 	// Possible values: "", "q", "ds", "dq", ...
 	// Optional.
 	Width string
@@ -91,11 +96,14 @@ var xedVisibilities = map[string]OperandVisibility{
 // See "$XED/pysrc/opnds.py" to learn about fields format
 // and valid combinations.
 //
-// Requires database with xtypes and widths info.
+// Requires database with xtypes, widths, and extraWidths info.
 func NewOperand(db *Database, s string) (*Operand, error) {
 	if db.widths == nil {
 		return nil, errors.New("Database.widths is nil")
 	}
+	if db.extraWidths == nil {
+		return nil, errors.New("Database.extraWidths is nil")
+	}
 
 	fields := strings.Split(s, ":")
 	switch len(fields) {
@@ -111,9 +119,10 @@ func NewOperand(db *Database, s string) (*Operand, error) {
 	op.Action = fields[1]
 
 	// Optional fields.
+	var w string
 	for _, f := range fields[2:] {
-		if db.widths[f] != nil && op.Width == "" {
-			op.Width = f
+		if db.widths[f] != nil && w == "" {
+			w = f
 		} else if vis, ok := xedVisibilities[f]; ok {
 			op.Visibility = vis
 		} else if xtype := db.xtypes[f]; xtype != nil {
@@ -126,6 +135,33 @@ func NewOperand(db *Database, s string) (*Operand, error) {
 		}
 	}
 
+	// Get default width from operand type.
+	if w == "" {
+		if op.NonterminalName() {
+			if strings.HasPrefix(op.NameLHS(), "REG") {
+				rhs := op.NameRHS()
+				if strings.HasPrefix(rhs, "XED_REG_") {
+					// Register
+					w = db.extraWidths[rhs]
+				} else if strings.HasSuffix(rhs, "()") {
+					// Non-terminal
+					w = db.extraWidths[rhs]
+				}
+			}
+		} else {
+			// Try as an immediate.
+			w = db.extraWidths[op.Name]
+		}
+	}
+
+	if w != "" {
+		op.Width = w
+		// If operand did not specify an xtype, get the default from the width
+		if op.Xtype == "" && db.widths[w] != nil {
+			op.Xtype = db.widths[w].xtype
+		}
+	}
+
 	return &op, nil
 }
 
diff --git a/x86/xeddata/readlines.go b/x86/xeddata/readlines.go
new file mode 100644
index 00000000..610bb834
--- /dev/null
+++ b/x86/xeddata/readlines.go
@@ -0,0 +1,101 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xeddata
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"io"
+	"iter"
+	"path/filepath"
+	"strings"
+)
+
+type lineInfo struct {
+	Pos
+	data []byte
+}
+
+type Pos struct {
+	Path string
+	Line int
+}
+
+func (p Pos) String() string {
+	if p.Line == 0 {
+		if p.Path == "" {
+			return "?:?"
+		}
+		return p.Path
+	} else if p.Path == "" {
+		return fmt.Sprintf("?:%d", p.Line)
+	}
+	return fmt.Sprintf("%s:%d", p.Path, p.Line)
+}
+
+func (p Pos) ShortString() string {
+	p2 := p
+	p2.Path = filepath.Base(p.Path)
+	return p2.String()
+}
+
+// readLines yields lines from r, with continuation lines folded, comments and
+// trailing whitespace removed, and blank lines omitted.
+//
+// The returned lineInfo.data buffer may be reused between yields.
+//
+// If r has a Name() string method, this is used to populate lineInfo.Path.
+//
+// TODO: Rewrite Reader to use this.
+func readLines(r io.Reader) iter.Seq2[lineInfo, error] {
+	type Named interface {
+		Name() string // Matches os.File
+	}
+	path := ""
+	if f, ok := r.(Named); ok {
+		path = f.Name()
+	}
+
+	s := bufio.NewScanner(r)
+	return func(yield func(lineInfo, error) bool) {
+		var info lineInfo
+		info.Path = path
+		var lineBuf []byte
+		for s.Scan() {
+			info.Line++
+
+			lineBuf = append(lineBuf, s.Bytes()...)
+			if len(lineBuf) > 0 && lineBuf[len(lineBuf)-1] == '\\' {
+				// Continuation line. Drop the \ and keep reading.
+				lineBuf = lineBuf[:len(lineBuf)-1]
+				continue
+			}
+			// Remove comments and trailing whitespace
+			if i := strings.IndexByte(string(lineBuf), '#'); i >= 0 {
+				lineBuf = lineBuf[:i]
+			}
+			lineBuf = bytes.TrimRight(lineBuf, " \t")
+			// Don't yield blank lines
+			if len(lineBuf) == 0 {
+				continue
+			}
+
+			info.data = lineBuf
+			if !yield(info, nil) {
+				return
+			}
+			lineBuf = lineBuf[:0]
+		}
+
+		if err := s.Err(); err != nil {
+			yield(lineInfo{}, err)
+			return
+		}
+		if len(lineBuf) > 0 {
+			yield(lineInfo{}, fmt.Errorf("continuation line at EOF"))
+		}
+	}
+}
diff --git a/x86/xeddata/testdata/xedpath/all-extra-widths.txt b/x86/xeddata/testdata/xedpath/all-extra-widths.txt
new file mode 100644
index 00000000..30a004e6
--- /dev/null
+++ b/x86/xeddata/testdata/xedpath/all-extra-widths.txt
@@ -0,0 +1,3 @@
+# Copyright 2025 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
diff --git a/x86/xeddata/xeddata_test.go b/x86/xeddata/xeddata_test.go
index ad0de1ec..fc98d86c 100644
--- a/x86/xeddata/xeddata_test.go
+++ b/x86/xeddata/xeddata_test.go
@@ -69,6 +69,12 @@ var (
 		"mem80real": {xtype: "f80", sizes: [3]string{"10", "10", "10"}},
 		"mfpxenv":   {xtype: "struct", sizes: [3]string{"512", "512", "512"}},
 	}
+
+	extraWidthsMap = map[string]string{
+		"AGEN":        "pseudo",
+		"XED_REG_EAX": "d",
+		"GPR32_R()":   "d",
+	}
 )
 
 // newStatesSource returns a reader that mocks "all-state.txt" file.
@@ -118,6 +124,22 @@ func newWidthsSource() io.Reader {
 	return &buf
 }
 
+func newExtraWidthsSource() io.Reader {
+	var buf bytes.Buffer
+	for name, width := range extraWidthsMap {
+		buf.WriteString("# Line comment\n")
+		buf.WriteString("#\n\n\n")
+		if reg, ok := strings.CutPrefix(name, "XED_REG_"); ok {
+			fmt.Fprintf(&buf, "reg %s %s\n", reg, width)
+		} else if nt, ok := strings.CutSuffix(name, "()"); ok {
+			fmt.Fprintf(&buf, "nt %s %s\n", nt, width)
+		} else {
+			fmt.Fprintf(&buf, "imm_const %s %s\n", name, width)
+		}
+	}
+	return &buf
+}
+
 // newXtypesSource returns a reader that mocks "all-element-types.txt" file.
 // Input content is generated based on xtypesMap.
 func newXtypesSource() io.Reader {
@@ -150,6 +172,10 @@ func newTestDatabase(t *testing.T) *Database {
 	if err != nil {
 		t.Fatal(err)
 	}
+	db.extraWidths, err = parseExtraWidths(newExtraWidthsSource())
+	if err != nil {
+		t.Fatal(err)
+	}
 	err = db.LoadXtypes(newXtypesSource())
 	if err != nil {
 		t.Fatal(err)
@@ -258,7 +284,7 @@ func TestNewOperand(t *testing.T) {
 		},
 		{
 			"MEM0:rw:q",
-			Operand{Name: "MEM0", Action: "rw", Width: "q"},
+			Operand{Name: "MEM0", Action: "rw", Width: "q", Xtype: "i64"},
 		},
 		{
 			"REG0=XMM_R():rcw:ps:f32",
@@ -266,13 +292,27 @@ func TestNewOperand(t *testing.T) {
 		},
 		{
 			"IMM0:r:z",
-			Operand{Name: "IMM0", Action: "r", Width: "z"},
+			Operand{Name: "IMM0", Action: "r", Width: "z", Xtype: "int"},
 		},
 		{
 			"IMM1:cw:b:i8",
 			Operand{Name: "IMM1", Action: "cw", Width: "b", Xtype: "i8"},
 		},
 
+		// Implied width code
+		{
+			"AGEN:r",
+			Operand{Name: "AGEN", Action: "r", Width: "pseudo"},
+		},
+		{
+			"REG0=XED_REG_EAX:r",
+			Operand{Name: "REG0=XED_REG_EAX", Action: "r", Width: "d", Xtype: "i32"},
+		},
+		{
+			"REG0=GPR32_R():r",
+			Operand{Name: "REG0=GPR32_R()", Action: "r", Width: "d", Xtype: "i32"},
+		},
+
 		// Optional fields and visibility.
 		{
 			"REG2:r:EXPL",
@@ -280,19 +320,19 @@ func TestNewOperand(t *testing.T) {
 		},
 		{
 			"MEM1:w:d:IMPL",
-			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisImplicit},
+			Operand{Name: "MEM1", Action: "w", Width: "d", Xtype: "i32", Visibility: VisImplicit},
 		},
 		{
 			"MEM1:w:IMPL:d",
-			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisImplicit},
+			Operand{Name: "MEM1", Action: "w", Width: "d", Xtype: "i32", Visibility: VisImplicit},
 		},
 		{
-			"MEM1:w:d:SUPP:i32",
-			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "i32"},
+			"MEM1:w:d:SUPP:f32",
+			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "f32"},
 		},
 		{
-			"MEM1:w:SUPP:d:i32",
-			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "i32"},
+			"MEM1:w:SUPP:d:f32",
+			Operand{Name: "MEM1", Action: "w", Width: "d", Visibility: VisSuppressed, Xtype: "f32"},
 		},
 
 		// Ambiguity: xtypes that look like widths.
@@ -304,7 +344,7 @@ func TestNewOperand(t *testing.T) {
 		// TXT=X field.
 		{
 			"REG1=MASK1():r:mskw:TXT=ZEROSTR",
-			Operand{Name: "REG1=MASK1()", Action: "r", Width: "mskw",
+			Operand{Name: "REG1=MASK1()", Action: "r", Width: "mskw", Xtype: "i1",
 				Attributes: map[string]bool{"TXT=ZEROSTR": true}},
 		},
 		{
@@ -314,26 +354,26 @@ func TestNewOperand(t *testing.T) {
 		},
 		{
 			"REG0=ZMM_R3():w:zf32:TXT=SAESTR",
-			Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf32",
+			Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf32", Xtype: "f32",
 				Attributes: map[string]bool{"TXT=SAESTR": true}},
 		},
 		{
 			"REG0=ZMM_R3():w:zf64:TXT=ROUNDC",
-			Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf64",
+			Operand{Name: "REG0=ZMM_R3()", Action: "w", Width: "zf64", Xtype: "f64",
 				Attributes: map[string]bool{"TXT=ROUNDC": true}},
 		},
 
 		// Multi-source.
 		{
 			"REG2=ZMM_N3():r:zf32:MULTISOURCE4",
-			Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32",
+			Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32", Xtype: "f32",
 				Attributes: map[string]bool{"MULTISOURCE4": true}},
 		},
 
 		// Multi-source + EVEX.b context.
 		{
 			"REG2=ZMM_N3():r:zf32:MULTISOURCE4:TXT=SAESTR",
-			Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32",
+			Operand{Name: "REG2=ZMM_N3()", Action: "r", Width: "zf32", Xtype: "f32",
 				Attributes: map[string]bool{"MULTISOURCE4": true, "TXT=SAESTR": true}},
 		},
 	}

From 913b04b6e4a46a6b638203803966d86204608c75 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Sun, 9 Mar 2025 12:11:38 -0400
Subject: [PATCH 048/200] x86/xeddata: rewrite Reader to use new readLines
 iterator

The parsing in readLines is simpler and more robust. It also includes
line information (which would have been pretty annoying to add to the
old parser), so this is a step toward adding position information to
Inst.

One downside of this is that Reader.Read has to use a pull iterator.
However, as far as I can tell, the only callers of this were
Reader.ReadAll and WalkInsts, both of which we rewrite to use
readObjects directly.

Change-Id: I4ca58c877fbfd5295209aea31999c8abd6876f17
Reviewed-on: https://go-review.googlesource.com/c/arch/+/656238
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 x86/xeddata/reader.go    | 127 ++++++++++++++++++---------------------
 x86/xeddata/readlines.go |   2 -
 x86/xeddata/xeddata.go   |  11 +---
 3 files changed, 61 insertions(+), 79 deletions(-)

diff --git a/x86/xeddata/reader.go b/x86/xeddata/reader.go
index cd235100..c1a0aa7e 100644
--- a/x86/xeddata/reader.go
+++ b/x86/xeddata/reader.go
@@ -5,47 +5,27 @@
 package xeddata
 
 import (
-	"bufio"
 	"errors"
 	"fmt"
 	"io"
+	"iter"
 	"regexp"
 	"strings"
 )
 
 // Reader reads enc/dec-instruction objects from XED datafile.
 type Reader struct {
-	scanner *bufio.Scanner
+	r io.Reader
 
-	lines []string // Re-used between Read calls
-
-	// True if last line ends with newline escape (backslash).
-	joinLines bool
+	// Initialized on first call to Read
+	next func() (*Object, error, bool)
+	stop func()
+	err  error
 }
 
 // NewReader returns a new Reader that reads from r.
 func NewReader(r io.Reader) *Reader {
-	return newReader(bufio.NewScanner(r))
-}
-
-func newReader(scanner *bufio.Scanner) *Reader {
-	r := &Reader{
-		lines:   make([]string, 0, 64),
-		scanner: scanner,
-	}
-	scanner.Split(r.split)
-	return r
-}
-
-// split implements bufio.SplitFunc for Reader.
-func (r *Reader) split(data []byte, atEOF bool) (int, []byte, error) {
-	// Wrapping bufio.ScanLines to handle \-style newline escapes.
-	// joinLines flag affects Reader.scanLine behavior.
-	advance, tok, err := bufio.ScanLines(data, atEOF)
-	if err == nil && len(tok) >= 1 {
-		r.joinLines = tok[len(tok)-1] == '\\'
-	}
-	return advance, tok, err
+	return &Reader{r: r}
 }
 
 // Read reads single XED instruction object from
@@ -54,37 +34,65 @@ func (r *Reader) split(data []byte, atEOF bool) (int, []byte, error) {
 // If there is no data left to be read,
 // returned error is io.EOF.
 func (r *Reader) Read() (*Object, error) {
-	for line := r.scanLine(); line != ""; line = r.scanLine() {
-		if line[0] != '{' {
-			continue
-		}
-		lines := r.lines[:0] // Object lines
-		for line := r.scanLine(); line != ""; line = r.scanLine() {
-			if line[0] == '}' {
-				return r.parseLines(lines)
-			}
-			lines = append(lines, line)
-		}
-		return nil, errors.New("no matching '}' found")
+	if r.err != nil {
+		return nil, r.err
 	}
-
-	return nil, io.EOF
+	if r.next == nil {
+		r.next, r.stop = iter.Pull2(readObjects(r.r))
+	}
+	obj, err, end := r.next()
+	if end {
+		err = io.EOF
+	}
+	if err != nil {
+		r.stop()
+		r.err, r.next, r.stop = err, nil, nil
+		return nil, err
+	}
+	return obj, nil
 }
 
 // ReadAll reads all the remaining objects from r.
 // A successful call returns err == nil, not err == io.EOF,
 // just like csv.Reader.ReadAll().
 func (r *Reader) ReadAll() ([]*Object, error) {
-	objects := []*Object{}
-	for {
-		o, err := r.Read()
-		if err == io.EOF {
-			return objects, nil
-		}
+	var objects []*Object
+	for obj, err := range readObjects(r.r) {
 		if err != nil {
 			return objects, err
 		}
-		objects = append(objects, o)
+		objects = append(objects, obj)
+	}
+	return objects, nil
+}
+
+// readObjects yields all of the objects from r.
+func readObjects(r io.Reader) iter.Seq2[*Object, error] {
+	iterLines := readLines(r)
+	return func(yield func(*Object, error) bool) {
+		var block []string // Reused on each iteration
+		inBlock := false
+		for line, err := range iterLines {
+			if err != nil {
+				yield(nil, err)
+				return
+			}
+			if !inBlock {
+				inBlock = line.data[0] == '{'
+			} else if line.data[0] == '}' {
+				inBlock = false
+				obj, err := parseObjectLines(block)
+				if !yield(obj, err) {
+					return
+				}
+				block = block[:0]
+			} else {
+				block = append(block, string(line.data))
+			}
+		}
+		if inBlock {
+			yield(nil, errors.New("no matching '}' found"))
+		}
 	}
 }
 
@@ -96,11 +104,10 @@ func (r *Reader) ReadAll() ([]*Object, error) {
 //	unquoted field name "[A-Z_]+" (captured)
 //	field value delimiter ":"
 //	field value string (captured)
-//	optional trailing comment that is ignored "[^#]*"
-var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s*:\s*([^#]*)`)
+var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s*:\s*(.*)`)
 
 // parseLines turns collected object lines into Object.
-func (r *Reader) parseLines(lines []string) (*Object, error) {
+func parseObjectLines(lines []string) (*Object, error) {
 	o := &Object{}
 
 	// Repeatable tokens.
@@ -192,21 +199,3 @@ func (r *Reader) parseLines(lines []string) (*Object, error) {
 
 	return o, nil
 }
-
-// scanLine tries to fetch non-empty line from scanner.
-//
-// Returns empty line when scanner.Scan() returns false
-// before non-empty line is found.
-func (r *Reader) scanLine() string {
-	for r.scanner.Scan() {
-		line := r.scanner.Text()
-		if line == "" {
-			continue
-		}
-		if r.joinLines {
-			return line[:len(line)-len("\\")] + r.scanLine()
-		}
-		return line
-	}
-	return ""
-}
diff --git a/x86/xeddata/readlines.go b/x86/xeddata/readlines.go
index 610bb834..d9638973 100644
--- a/x86/xeddata/readlines.go
+++ b/x86/xeddata/readlines.go
@@ -48,8 +48,6 @@ func (p Pos) ShortString() string {
 // The returned lineInfo.data buffer may be reused between yields.
 //
 // If r has a Name() string method, this is used to populate lineInfo.Path.
-//
-// TODO: Rewrite Reader to use this.
 func readLines(r io.Reader) iter.Seq2[lineInfo, error] {
 	type Named interface {
 		Name() string // Matches os.File
diff --git a/x86/xeddata/xeddata.go b/x86/xeddata/xeddata.go
index 7cc7a087..4d2b2012 100644
--- a/x86/xeddata/xeddata.go
+++ b/x86/xeddata/xeddata.go
@@ -5,7 +5,6 @@
 package xeddata
 
 import (
-	"io"
 	"os"
 	"path/filepath"
 )
@@ -16,17 +15,13 @@ func WalkInsts(xedPath string, visit func(*Inst)) error {
 	if err != nil {
 		return err
 	}
-	r := NewReader(f)
-	for {
-		o, err := r.Read()
-		if err == io.EOF {
-			return nil
-		}
+	for obj, err := range readObjects(f) {
 		if err != nil {
 			return err
 		}
-		for _, inst := range o.Insts {
+		for _, inst := range obj.Insts {
 			visit(inst)
 		}
 	}
+	return nil
 }

From adeecab5f209ca3732e844b3b59716281155fbf8 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Sun, 9 Mar 2025 13:38:47 -0400
Subject: [PATCH 049/200] x86/xeddata: add Pos to Object and Inst

Change-Id: Ib9ad5e2c4bbd005b7fad15b7d0dc8943f2747689
Reviewed-on: https://go-review.googlesource.com/c/arch/+/656239
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 x86/xeddata/object.go       |  6 ++++++
 x86/xeddata/reader.go       | 16 ++++++++++++----
 x86/xeddata/xeddata_test.go | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/x86/xeddata/object.go b/x86/xeddata/object.go
index 4a739739..662aa694 100644
--- a/x86/xeddata/object.go
+++ b/x86/xeddata/object.go
@@ -21,6 +21,9 @@ import (
 // Object contains multiple Inst elements that represent concrete
 // instruction with encoding pattern and operands description.
 type Object struct {
+	// Pos is the file position of the start of this object.
+	Pos Pos
+
 	// Iclass is instruction class name (opcode).
 	// Iclass alone is not enough to uniquely identify machine instructions.
 	// Example: "PSRLW".
@@ -128,6 +131,9 @@ type Inst struct {
 	// Inst objects.
 	*Object
 
+	// Pos is the file position of this Inst's PATTERN.
+	Pos Pos
+
 	// Index is the position inside XED object.
 	// Object.Insts[Index] returns this inst.
 	Index int
diff --git a/x86/xeddata/reader.go b/x86/xeddata/reader.go
index c1a0aa7e..02fd9c81 100644
--- a/x86/xeddata/reader.go
+++ b/x86/xeddata/reader.go
@@ -70,7 +70,9 @@ func (r *Reader) ReadAll() ([]*Object, error) {
 func readObjects(r io.Reader) iter.Seq2[*Object, error] {
 	iterLines := readLines(r)
 	return func(yield func(*Object, error) bool) {
+		var blockPos Pos
 		var block []string // Reused on each iteration
+		var linePos []Pos
 		inBlock := false
 		for line, err := range iterLines {
 			if err != nil {
@@ -79,15 +81,17 @@ func readObjects(r io.Reader) iter.Seq2[*Object, error] {
 			}
 			if !inBlock {
 				inBlock = line.data[0] == '{'
+				blockPos = line.Pos
 			} else if line.data[0] == '}' {
 				inBlock = false
-				obj, err := parseObjectLines(block)
+				obj, err := parseObjectLines(blockPos, block, linePos)
 				if !yield(obj, err) {
 					return
 				}
-				block = block[:0]
+				block, linePos = block[:0], linePos[:0]
 			} else {
 				block = append(block, string(line.data))
+				linePos = append(linePos, line.Pos)
 			}
 		}
 		if inBlock {
@@ -107,8 +111,9 @@ func readObjects(r io.Reader) iter.Seq2[*Object, error] {
 var instLineRE = regexp.MustCompile(`^([A-Z_]+)\s*:\s*(.*)`)
 
 // parseLines turns collected object lines into Object.
-func parseObjectLines(lines []string) (*Object, error) {
+func parseObjectLines(blockPos Pos, lines []string, linePos []Pos) (*Object, error) {
 	o := &Object{}
+	o.Pos = blockPos
 
 	// Repeatable tokens.
 	// We can not assign them eagerly, because these fields
@@ -117,9 +122,10 @@ func parseObjectLines(lines []string) (*Object, error) {
 		operands []string
 		iforms   []string
 		patterns []string
+		poses    []Pos
 	)
 
-	for _, l := range lines {
+	for i, l := range lines {
 		l = strings.TrimLeft(l, " ")
 		if l[0] == '#' { // Skip comment lines.
 			continue
@@ -167,6 +173,7 @@ func parseObjectLines(lines []string) (*Object, error) {
 			operands = append(operands, val)
 		case "PATTERN":
 			patterns = append(patterns, val)
+			poses = append(poses, linePos[i])
 		case "IFORM":
 			iforms = append(iforms, val)
 
@@ -188,6 +195,7 @@ func parseObjectLines(lines []string) (*Object, error) {
 			Object:   o,
 			Index:    i,
 			Pattern:  patterns[i],
+			Pos:      poses[i],
 			Operands: operands[i],
 		}
 		// There can be less IFORMs than insts.
diff --git a/x86/xeddata/xeddata_test.go b/x86/xeddata/xeddata_test.go
index fc98d86c..8b64be4a 100644
--- a/x86/xeddata/xeddata_test.go
+++ b/x86/xeddata/xeddata_test.go
@@ -470,6 +470,42 @@ func TestReader(t *testing.T) {
 	}
 }
 
+func TestReaderPos(t *testing.T) {
+	const data = `# Comment
+{
+ICLASS: iclass1
+DISASM: disasm1
+
+PATTERN: pat1 pat1
+OPERANDS: ops1 ops1
+}`
+	r := NewReader(namedReader{strings.NewReader(data), "test"})
+	objects, err := r.ReadAll()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if want := "test:2"; objects[0].Pos.String() != want {
+		t.Errorf("object Pos: got %q, want %q", objects[0].Pos, want)
+	}
+	if want := "test:6"; objects[0].Insts[0].Pos.String() != want {
+		t.Errorf("inst Pos: got %q, want %q", objects[0].Insts[0].Pos, want)
+	}
+}
+
+type namedReader struct {
+	r    io.Reader
+	name string
+}
+
+func (n namedReader) Read(p []byte) (int, error) {
+	return n.r.Read(p)
+}
+
+func (n namedReader) Name() string {
+	return n.name
+}
+
 func TestMacroExpand(t *testing.T) {
 	tests := [...]struct {
 		input  string

From f2e9665ba3565e2dd3b57b260d5f46b952e6b8c3 Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Thu, 20 Mar 2025 11:14:42 -0700
Subject: [PATCH 050/200] x86/x86asm: don't symbolize immediate constants,
 mostly

Fixes golang/go#72942

Change-Id: Ib3bfef301fa8502f2c2d692f91d38acd2df20275
Reviewed-on: https://go-review.googlesource.com/c/arch/+/659675
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 x86/x86asm/gnu.go    | 12 +++++++-----
 x86/x86asm/intel.go  | 12 +++++++-----
 x86/x86asm/plan9x.go | 21 ++++++++++++++++-----
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/x86/x86asm/gnu.go b/x86/x86asm/gnu.go
index 8eba1fd0..864f32c1 100644
--- a/x86/x86asm/gnu.go
+++ b/x86/x86asm/gnu.go
@@ -667,12 +667,14 @@ func gnuArg(inst *Inst, pc uint64, symname SymLookup, x Arg, usedPrefixes *bool)
 			}
 		}
 	case Imm:
-		if s, base := symname(uint64(x)); s != "" {
-			suffix := ""
-			if uint64(x) != base {
-				suffix = fmt.Sprintf("%+d", uint64(x)-base)
+		if (inst.Op == MOV || inst.Op == PUSH) && inst.DataSize == 32 { // See comment in plan9x.go.
+			if s, base := symname(uint64(x)); s != "" {
+				suffix := ""
+				if uint64(x) != base {
+					suffix = fmt.Sprintf("%+d", uint64(x)-base)
+				}
+				return fmt.Sprintf("$%s%s", s, suffix)
 			}
-			return fmt.Sprintf("$%s%s", s, suffix)
 		}
 		if inst.Mode == 32 {
 			return fmt.Sprintf("$%#x", uint32(x))
diff --git a/x86/x86asm/intel.go b/x86/x86asm/intel.go
index 472eabda..a0622998 100644
--- a/x86/x86asm/intel.go
+++ b/x86/x86asm/intel.go
@@ -341,12 +341,14 @@ func IntelSyntax(inst Inst, pc uint64, symname SymLookup) string {
 func intelArg(inst *Inst, pc uint64, symname SymLookup, arg Arg) string {
 	switch a := arg.(type) {
 	case Imm:
-		if s, base := symname(uint64(a)); s != "" {
-			suffix := ""
-			if uint64(a) != base {
-				suffix = fmt.Sprintf("%+d", uint64(a)-base)
+		if (inst.Op == MOV || inst.Op == PUSH) && inst.DataSize == 32 { // See comment in plan9x.go.
+			if s, base := symname(uint64(a)); s != "" {
+				suffix := ""
+				if uint64(a) != base {
+					suffix = fmt.Sprintf("%+d", uint64(a)-base)
+				}
+				return fmt.Sprintf("$%s%s", s, suffix)
 			}
-			return fmt.Sprintf("$%s%s", s, suffix)
 		}
 		if inst.Mode == 32 {
 			return fmt.Sprintf("%#x", uint32(a))
diff --git a/x86/x86asm/plan9x.go b/x86/x86asm/plan9x.go
index 9e866d87..e82349ce 100644
--- a/x86/x86asm/plan9x.go
+++ b/x86/x86asm/plan9x.go
@@ -116,12 +116,23 @@ func plan9Arg(inst *Inst, pc uint64, symname func(uint64) (string, uint64), arg
 		return fmt.Sprintf("%#x", addr)
 
 	case Imm:
-		if s, base := symname(uint64(a)); s != "" {
-			suffix := ""
-			if uint64(a) != base {
-				suffix = fmt.Sprintf("%+d", uint64(a)-base)
+		if (inst.Op == MOV || inst.Op == PUSH) && inst.DataSize == 32 {
+			// Only try to convert an immediate to a symbol in certain
+			// special circumstances. See issue 72942.
+			//
+			// On 64-bit, symbol addresses always hit the Mem case below.
+			// Particularly, we use LEAQ to materialize the address of
+			// a global or function.
+			//
+			// On 32-bit, we sometimes use MOVL. Still try to symbolize
+			// those immediates.
+			if s, base := symname(uint64(a)); s != "" {
+				suffix := ""
+				if uint64(a) != base {
+					suffix = fmt.Sprintf("%+d", uint64(a)-base)
+				}
+				return fmt.Sprintf("$%s%s(SB)", s, suffix)
 			}
-			return fmt.Sprintf("$%s%s(SB)", s, suffix)
 		}
 		if inst.Mode == 32 {
 			return fmt.Sprintf("$%#x", uint32(a))

From ad2912dbb8f1d4d5715569c985dee84d51562f06 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 17 Apr 2025 22:33:09 -0400
Subject: [PATCH 051/200] internal/unify: new package for structured value
 unification

The plan is to use this package as part of generating SIMD mappings.

Change-Id: Ie67bf7fe87222b8dffdbb12a99729c0fe0f7bc38
Reviewed-on: https://go-review.googlesource.com/c/arch/+/666515
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 go.mod                             |   2 +
 go.sum                             |   4 +
 internal/unify/closure.go          | 148 +++++++++
 internal/unify/domain.go           | 311 ++++++++++++++++++
 internal/unify/dot.go              | 183 +++++++++++
 internal/unify/env.go              | 500 +++++++++++++++++++++++++++++
 internal/unify/html.go             | 190 +++++++++++
 internal/unify/pos.go              |  33 ++
 internal/unify/testdata/unify.yaml | 174 ++++++++++
 internal/unify/testdata/vars.yaml  | 175 ++++++++++
 internal/unify/trace.go            | 168 ++++++++++
 internal/unify/unify.go            | 322 +++++++++++++++++++
 internal/unify/unify_test.go       | 154 +++++++++
 internal/unify/value.go            | 129 ++++++++
 internal/unify/value_test.go       |  36 +++
 internal/unify/yaml.go             | 475 +++++++++++++++++++++++++++
 internal/unify/yaml_test.go        |  91 ++++++
 17 files changed, 3095 insertions(+)
 create mode 100644 internal/unify/closure.go
 create mode 100644 internal/unify/domain.go
 create mode 100644 internal/unify/dot.go
 create mode 100644 internal/unify/env.go
 create mode 100644 internal/unify/html.go
 create mode 100644 internal/unify/pos.go
 create mode 100644 internal/unify/testdata/unify.yaml
 create mode 100644 internal/unify/testdata/vars.yaml
 create mode 100644 internal/unify/trace.go
 create mode 100644 internal/unify/unify.go
 create mode 100644 internal/unify/unify_test.go
 create mode 100644 internal/unify/value.go
 create mode 100644 internal/unify/value_test.go
 create mode 100644 internal/unify/yaml.go
 create mode 100644 internal/unify/yaml_test.go

diff --git a/go.mod b/go.mod
index b72ba1a5..72642f75 100644
--- a/go.mod
+++ b/go.mod
@@ -3,3 +3,5 @@ module golang.org/x/arch
 go 1.23.0
 
 require rsc.io/pdf v0.1.1
+
+require gopkg.in/yaml.v3 v3.0.1
diff --git a/go.sum b/go.sum
index e854d25c..cf7dae80 100644
--- a/go.sum
+++ b/go.sum
@@ -1,2 +1,6 @@
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 rsc.io/pdf v0.1.1 h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4=
 rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/internal/unify/closure.go b/internal/unify/closure.go
new file mode 100644
index 00000000..8a1636de
--- /dev/null
+++ b/internal/unify/closure.go
@@ -0,0 +1,148 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"fmt"
+	"iter"
+	"maps"
+	"slices"
+)
+
+type Closure struct {
+	val *Value
+	env nonDetEnv
+}
+
+func NewSum(vs ...*Value) Closure {
+	id := &ident{name: "sum"}
+	return Closure{NewValue(Var{id}), topEnv.bind(id, vs...)}
+}
+
+// IsBottom returns whether c consists of no values.
+func (c Closure) IsBottom() bool {
+	return c.val.Domain == nil
+}
+
+// Summands returns the top-level Values of c. This assumes the top-level of c
+// was constructed as a sum, and is mostly useful for debugging.
+func (c Closure) Summands() iter.Seq[*Value] {
+	if v, ok := c.val.Domain.(Var); ok {
+		parts := c.env.partitionBy(v.id)
+		return func(yield func(*Value) bool) {
+			for _, part := range parts {
+				if !yield(part.value) {
+					return
+				}
+			}
+		}
+	}
+	return func(yield func(*Value) bool) {
+		yield(c.val)
+	}
+}
+
+// All enumerates all possible concrete values of c by substituting variables
+// from the environment.
+//
+// E.g., enumerating this Value
+//
+//	a: !sum [1, 2]
+//	b: !sum [3, 4]
+//
+// results in
+//
+//   - {a: 1, b: 3}
+//   - {a: 1, b: 4}
+//   - {a: 2, b: 3}
+//   - {a: 2, b: 4}
+func (c Closure) All() iter.Seq[*Value] {
+	// In order to enumerate all concrete values under all possible variable
+	// bindings, we use a "non-deterministic continuation passing style" to
+	// implement this. We use CPS to traverse the Value tree, threading the
+	// (possibly narrowing) environment through that CPS following an Euler
+	// tour. Where the environment permits multiple choices, we invoke the same
+	// continuation for each choice. Similar to a yield function, the
+	// continuation can return false to stop the non-deterministic walk.
+	return func(yield func(*Value) bool) {
+		c.val.all1(c.env, func(v *Value, e nonDetEnv) bool {
+			return yield(v)
+		})
+	}
+}
+
+func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool {
+	switch d := v.Domain.(type) {
+	default:
+		panic(fmt.Sprintf("unknown domain type %T", d))
+
+	case nil:
+		return true
+
+	case Top, String:
+		return cont(v, e)
+
+	case Def:
+		fields := d.keys()
+		// We can reuse this parts slice because we're doing a DFS through the
+		// state space. (Otherwise, we'd have to do some messy threading of an
+		// immutable slice-like value through allElt.)
+		parts := make(map[string]*Value, len(fields))
+
+		// TODO: If there are no Vars or Sums under this Def, then nothing can
+		// change the Value or env, so we could just cont(v, e).
+		var allElt func(elt int, e nonDetEnv) bool
+		allElt = func(elt int, e nonDetEnv) bool {
+			if elt == len(fields) {
+				// Build a new Def from the concrete parts. Clone parts because
+				// we may reuse it on other non-deterministic branches.
+				nVal := newValueFrom(Def{maps.Clone(parts)}, v)
+				return cont(nVal, e)
+			}
+
+			return d.fields[fields[elt]].all1(e, func(v *Value, e nonDetEnv) bool {
+				parts[fields[elt]] = v
+				return allElt(elt+1, e)
+			})
+		}
+		return allElt(0, e)
+
+	case Tuple:
+		// Essentially the same as Def.
+		if d.repeat != nil {
+			// There's nothing we can do with this.
+			return cont(v, e)
+		}
+		parts := make([]*Value, len(d.vs))
+		var allElt func(elt int, e nonDetEnv) bool
+		allElt = func(elt int, e nonDetEnv) bool {
+			if elt == len(d.vs) {
+				// Build a new tuple from the concrete parts. Clone parts because
+				// we may reuse it on other non-deterministic branches.
+				nVal := newValueFrom(Tuple{vs: slices.Clone(parts)}, v)
+				return cont(nVal, e)
+			}
+
+			return d.vs[elt].all1(e, func(v *Value, e nonDetEnv) bool {
+				parts[elt] = v
+				return allElt(elt+1, e)
+			})
+		}
+		return allElt(0, e)
+
+	case Var:
+		// Go each way this variable can be bound.
+		for _, ePart := range e.partitionBy(d.id) {
+			// d.id is no longer bound in this environment partition. We'll may
+			// need it later in the Euler tour, so bind it back to this single
+			// value.
+			env := ePart.env.bind(d.id, ePart.value)
+			if !ePart.value.all1(env, cont) {
+				return false
+			}
+		}
+		return true
+	}
+}
diff --git a/internal/unify/domain.go b/internal/unify/domain.go
new file mode 100644
index 00000000..c59bd621
--- /dev/null
+++ b/internal/unify/domain.go
@@ -0,0 +1,311 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"fmt"
+	"iter"
+	"maps"
+	"reflect"
+	"regexp"
+	"slices"
+	"strconv"
+	"strings"
+)
+
+// A Domain is a non-empty set of values, all of the same kind.
+//
+// Domain may be a scalar:
+//
+//   - [String] - Represents string-typed values.
+//
+// Or a composite:
+//
+//   - [Def] - A mapping from fixed keys to [Domain]s.
+//
+//   - [Tuple] - A fixed-length sequence of [Domain]s or
+//     all possible lengths repeating a [Domain].
+//
+// Or top or bottom:
+//
+//   - [Top] - Represents all possible values of all kinds.
+//
+//   - nil - Represents no values.
+//
+// Or a variable:
+//
+//   - [Var] - A value captured in the environment.
+type Domain interface {
+	Exact() bool
+
+	// decode stores this value in a Go value. If this value is not exact, this
+	// returns a potentially wrapped *inexactError.
+	decode(reflect.Value) error
+}
+
+type inexactError struct {
+	valueType string
+	goType    string
+}
+
+func (e *inexactError) Error() string {
+	return fmt.Sprintf("cannot store inexact %s value in %s", e.valueType, e.goType)
+}
+
+type decodeError struct {
+	path string
+	err  error
+}
+
+func newDecodeError(path string, err error) *decodeError {
+	if err, ok := err.(*decodeError); ok {
+		return &decodeError{path: path + "." + err.path, err: err.err}
+	}
+	return &decodeError{path: path, err: err}
+}
+
+func (e *decodeError) Unwrap() error {
+	return e.err
+}
+
+func (e *decodeError) Error() string {
+	return fmt.Sprintf("%s: %s", e.path, e.err)
+}
+
+// Top represents all possible values of all possible types.
+type Top struct{}
+
+func (t Top) Exact() bool { return false }
+
+func (t Top) decode(rv reflect.Value) error {
+	// We can decode Top into a pointer-typed value as nil.
+	if rv.Kind() != reflect.Pointer {
+		return &inexactError{"top", rv.Type().String()}
+	}
+	rv.SetZero()
+	return nil
+}
+
+// A Def is a mapping from field names to [Value]s. Any fields not explicitly
+// listed have [Value] [Top].
+type Def struct {
+	fields map[string]*Value
+}
+
+// NewDef creates a new [Def].
+//
+// The fields and values slices must have the same length.
+func NewDef(fields []string, values []*Value) Def {
+	if len(fields) != len(values) {
+		panic("fields and values must have the same length")
+	}
+	m := make(map[string]*Value, len(fields))
+	for i := range fields {
+		if _, ok := m[fields[i]]; ok {
+			panic(fmt.Sprintf("duplicate field %q", fields[i]))
+		}
+		m[fields[i]] = values[i]
+	}
+	return Def{m}
+}
+
+// Exact returns true if all field Values are exact.
+func (d Def) Exact() bool {
+	for _, v := range d.fields {
+		if !v.Exact() {
+			return false
+		}
+	}
+	return true
+}
+
+func (d Def) decode(rv reflect.Value) error {
+	rv, err := preDecode(rv, reflect.Struct, "Def")
+	if err != nil {
+		return err
+	}
+	var lowered map[string]string // Lower case -> canonical for d.fields.
+	rt := rv.Type()
+	for fi := range rv.NumField() {
+		fType := rt.Field(fi)
+		if fType.PkgPath != "" {
+			continue
+		}
+		v := d.fields[fType.Name]
+		if v == nil {
+			v = topValue
+
+			// Try a case-insensitive match
+			canon, ok := d.fields[strings.ToLower(fType.Name)]
+			if ok {
+				v = canon
+			} else {
+				if lowered == nil {
+					lowered = make(map[string]string, len(d.fields))
+					for k := range d.fields {
+						l := strings.ToLower(k)
+						if k != l {
+							lowered[l] = k
+						}
+					}
+				}
+				canon, ok := lowered[strings.ToLower(fType.Name)]
+				if ok {
+					v = d.fields[canon]
+				}
+			}
+		}
+		if err := v.Domain.decode(rv.Field(fi)); err != nil {
+			return newDecodeError(fType.Name, err)
+		}
+	}
+	return nil
+}
+
+func (d Def) keys() []string {
+	return slices.Sorted(maps.Keys(d.fields))
+}
+
+func (d Def) All() iter.Seq2[string, *Value] {
+	// TODO: We call All fairly often. It's probably bad to sort this every
+	// time.
+	keys := slices.Sorted(maps.Keys(d.fields))
+	return func(yield func(string, *Value) bool) {
+		for _, k := range keys {
+			if !yield(k, d.fields[k]) {
+				return
+			}
+		}
+	}
+}
+
+// A Tuple is a sequence of Values in one of two forms: 1. a fixed-length tuple,
+// where each Value can be different or 2. a "repeated tuple", which is a Value
+// repeated 0 or more times.
+type Tuple struct {
+	vs []*Value
+
+	// repeat, if non-nil, means this Tuple consists of an element repeated 0 or
+	// more times. If repeat is non-nil, vs must be nil. This is a generator
+	// function because we don't necessarily want *exactly* the same Value
+	// repeated. For example, in YAML encoding, a !sum in a repeated tuple needs
+	// a fresh variable in each instance.
+	repeat []func(nonDetEnv) (*Value, nonDetEnv)
+}
+
+func NewTuple(vs ...*Value) Tuple {
+	return Tuple{vs: vs}
+}
+
+func NewRepeat(gens ...func(nonDetEnv) (*Value, nonDetEnv)) Tuple {
+	return Tuple{repeat: gens}
+}
+
+func (d Tuple) Exact() bool {
+	if d.repeat != nil {
+		return false
+	}
+	for _, v := range d.vs {
+		if !v.Exact() {
+			return false
+		}
+	}
+	return true
+}
+
+func (d Tuple) decode(rv reflect.Value) error {
+	if d.repeat != nil {
+		return &inexactError{"repeated tuple", rv.Type().String()}
+	}
+	// TODO: We could also do arrays.
+	rv, err := preDecode(rv, reflect.Slice, "Tuple")
+	if err != nil {
+		return err
+	}
+	if rv.IsNil() || rv.Cap() < len(d.vs) {
+		rv.Set(reflect.MakeSlice(rv.Type(), len(d.vs), len(d.vs)))
+	} else {
+		rv.SetLen(len(d.vs))
+	}
+	for i, v := range d.vs {
+		if err := v.Domain.decode(rv.Index(i)); err != nil {
+			return newDecodeError(fmt.Sprintf("%d", i), err)
+		}
+	}
+	return nil
+}
+
+// A String represents a set of strings. It can represent the intersection of a
+// set of regexps, or a single exact string. In general, the domain of a String
+// is non-empty, but we do not attempt to prove emptiness of a regexp value.
+type String struct {
+	kind  stringKind
+	re    []*regexp.Regexp // Intersection of regexps
+	exact string
+}
+
+type stringKind int
+
+const (
+	stringRegex stringKind = iota
+	stringExact
+)
+
+func NewStringRegex(exprs ...string) (String, error) {
+	if len(exprs) == 0 {
+		exprs = []string{""}
+	}
+	v := String{kind: -1}
+	for _, expr := range exprs {
+		re, err := regexp.Compile(`\A(?:` + expr + `)\z`)
+		if err != nil {
+			return String{}, fmt.Errorf("parsing value: %s", err)
+		}
+
+		// An exact value narrows the whole domain to exact, so we're done, but
+		// should keep parsing.
+		if v.kind == stringExact {
+			continue
+		}
+
+		if _, complete := re.LiteralPrefix(); complete {
+			v = String{kind: stringExact, exact: expr}
+		} else {
+			v.kind = stringRegex
+			v.re = append(v.re, re)
+		}
+	}
+	return v, nil
+}
+
+func NewStringExact(s string) String {
+	return String{kind: stringExact, exact: s}
+}
+
+// Exact returns whether this Value is known to consist of a single string.
+func (d String) Exact() bool {
+	return d.kind == stringExact
+}
+
+func (d String) decode(rv reflect.Value) error {
+	if d.kind != stringExact {
+		return &inexactError{"regex", rv.Type().String()}
+	}
+	rv2, err := preDecode(rv, reflect.String, "String")
+	if err == nil {
+		rv2.SetString(d.exact)
+		return nil
+	}
+	rv2, err = preDecode(rv, reflect.Int, "String")
+	if err == nil {
+		i, err := strconv.Atoi(d.exact)
+		if err != nil {
+			return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err)
+		}
+		rv2.SetInt(int64(i))
+		return nil
+	}
+	return err
+}
diff --git a/internal/unify/dot.go b/internal/unify/dot.go
new file mode 100644
index 00000000..143fa615
--- /dev/null
+++ b/internal/unify/dot.go
@@ -0,0 +1,183 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"bytes"
+	"fmt"
+	"html"
+	"io"
+	"os"
+	"os/exec"
+	"strings"
+)
+
+const maxNodes = 30
+
+type dotEncoder struct {
+	w *bytes.Buffer
+
+	idGen    int // Node name generation
+	valLimit int // Limit the number of Values in a subgraph
+
+	idp identPrinter
+}
+
+func newDotEncoder() *dotEncoder {
+	return &dotEncoder{
+		w: new(bytes.Buffer),
+	}
+}
+
+func (enc *dotEncoder) clear() {
+	enc.w.Reset()
+	enc.idGen = 0
+}
+
+func (enc *dotEncoder) writeTo(w io.Writer) {
+	fmt.Fprintln(w, "digraph {")
+	// Use the "new" ranking algorithm, which lets us put nodes from different
+	// clusters in the same rank.
+	fmt.Fprintln(w, "newrank=true;")
+	fmt.Fprintln(w, "node [shape=box, ordering=out];")
+
+	w.Write(enc.w.Bytes())
+	fmt.Fprintln(w, "}")
+}
+
+func (enc *dotEncoder) writeSvg(w io.Writer) error {
+	cmd := exec.Command("dot", "-Tsvg")
+	in, err := cmd.StdinPipe()
+	if err != nil {
+		return err
+	}
+	var out bytes.Buffer
+	cmd.Stdout = &out
+	cmd.Stderr = os.Stderr
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+	enc.writeTo(in)
+	in.Close()
+	if err := cmd.Wait(); err != nil {
+		return err
+	}
+	// Trim SVG header so the result can be embedded
+	//
+	// TODO: In Graphviz 10.0.1, we could use -Tsvg_inline.
+	svg := out.Bytes()
+	if i := bytes.Index(svg, []byte("<svg ")); i >= 0 {
+		svg = svg[i:]
+	}
+	_, err = w.Write(svg)
+	return err
+}
+
+func (enc *dotEncoder) newID(f string) string {
+	id := fmt.Sprintf(f, enc.idGen)
+	enc.idGen++
+	return id
+}
+
+func (enc *dotEncoder) node(label, sublabel string) string {
+	id := enc.newID("n%d")
+	l := html.EscapeString(label)
+	if sublabel != "" {
+		l += fmt.Sprintf("<BR ALIGN=\"CENTER\"/><FONT POINT-SIZE=\"10\">%s</FONT>", html.EscapeString(sublabel))
+	}
+	fmt.Fprintf(enc.w, "%s [label=<%s>];\n", id, l)
+	return id
+}
+
+func (enc *dotEncoder) edge(from, to string, label string, args ...any) {
+	l := fmt.Sprintf(label, args...)
+	fmt.Fprintf(enc.w, "%s -> %s [label=%q];\n", from, to, l)
+}
+
+func (enc *dotEncoder) subgraph(v *Value) (vID, cID string) {
+	enc.valLimit = maxNodes
+	cID = enc.newID("cluster_%d")
+	fmt.Fprintf(enc.w, "subgraph %s {\n", cID)
+	fmt.Fprintf(enc.w, "style=invis;")
+	vID = enc.value(v)
+	fmt.Fprintf(enc.w, "}\n")
+	return
+}
+
+func (enc *dotEncoder) value(v *Value) string {
+	if enc.valLimit <= 0 {
+		id := enc.newID("n%d")
+		fmt.Fprintf(enc.w, "%s [label=\"...\", shape=triangle];\n", id)
+		return id
+	}
+	enc.valLimit--
+
+	switch vd := v.Domain.(type) {
+	default:
+		panic(fmt.Sprintf("unknown domain type %T", vd))
+
+	case nil:
+		return enc.node("_|_", "")
+
+	case Top:
+		return enc.node("_", "")
+
+		// TODO: Like in YAML, figure out if this is just a sum. In dot, we
+		// could say any unentangled variable is a sum, and if it has more than
+		// one reference just share the node.
+
+	// case Sum:
+	// 	node := enc.node("Sum", "")
+	// 	for i, elt := range vd.vs {
+	// 		enc.edge(node, enc.value(elt), "%d", i)
+	// 		if enc.valLimit <= 0 {
+	// 			break
+	// 		}
+	// 	}
+	// 	return node
+
+	case Def:
+		node := enc.node("Def", "")
+		for k, v := range vd.All() {
+			enc.edge(node, enc.value(v), "%s", k)
+			if enc.valLimit <= 0 {
+				break
+			}
+		}
+		return node
+
+	case Tuple:
+		if vd.repeat == nil {
+			label := "Tuple"
+			node := enc.node(label, "")
+			for i, elt := range vd.vs {
+				enc.edge(node, enc.value(elt), "%d", i)
+				if enc.valLimit <= 0 {
+					break
+				}
+			}
+			return node
+		} else {
+			// TODO
+			return enc.node("TODO: Repeat", "")
+		}
+
+	case String:
+		switch vd.kind {
+		case stringExact:
+			return enc.node(fmt.Sprintf("%q", vd.exact), "")
+		case stringRegex:
+			var parts []string
+			for _, re := range vd.re {
+				parts = append(parts, fmt.Sprintf("%q", re))
+			}
+			return enc.node(strings.Join(parts, "&"), "")
+		}
+		panic("bad String kind")
+
+	case Var:
+		return enc.node(fmt.Sprintf("Var %s", enc.idp.unique(vd.id)), "")
+	}
+}
diff --git a/internal/unify/env.go b/internal/unify/env.go
new file mode 100644
index 00000000..618887cd
--- /dev/null
+++ b/internal/unify/env.go
@@ -0,0 +1,500 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"fmt"
+	"iter"
+	"reflect"
+	"slices"
+	"strings"
+)
+
+// A nonDetEnv is a non-deterministic mapping from [ident]s to [Value]s.
+//
+// Logically, this is just a set of deterministic environments, where each
+// deterministic environment is a complete mapping from each [ident]s to exactly
+// one [Value]. In particular, [ident]s are NOT necessarily independent of each
+// other. For example, an environment may have both {x: 1, y: 1} and {x: 2, y:
+// 2}, but not {x: 1, y: 2}.
+//
+// A nonDetEnv is immutable.
+//
+// Often [ident]s are independent of each other, so the representation optimizes
+// for this by using a cross-product of environment factors, where each factor
+// is a sum of deterministic environments. These operations obey the usual
+// distributional laws, so we can always canonicalize into this form. (It MAY be
+// worthwhile to allow more general expressions of sums and products.)
+//
+// For example, to represent {{x: 1, y: 1}, {x: 2, y: 2}}, in which the
+// variables x and y are dependent, we need a single factor that covers x and y
+// and consists of two terms: {x: 1, y: 1} + {x: 2, y: 2}.
+//
+// If we add a third variable z that can be 1 or 2, independent of x and y, we
+// get four logical environments:
+//
+//	{x: 1, y: 1, z: 1}
+//	{x: 2, y: 2, z: 1}
+//	{x: 1, y: 1, z: 2}
+//	{x: 2, y: 2, z: 2}
+//
+// This could be represented as a single factor that is the sum of these four
+// detEnvs, but because z is independent, it can be a separate factor. Hence,
+// the most compact representation of this environment is:
+//
+//	({x: 1, y: 1} + {x: 2, y: 2}) ⨯ ({z: 1} + {z: 2})
+//
+// That is, two factors, where each is the sum of two terms.
+type nonDetEnv struct {
+	// factors is a list of the multiplicative factors in this environment. The
+	// set of deterministic environments is the cross-product of these factors.
+	// All factors must have disjoint variables.
+	factors []*envSum
+}
+
+// envSum is a sum of deterministic environments, all with the same set of
+// variables.
+type envSum struct {
+	ids   []*ident // TODO: Do we ever use this as a slice? Should it be a map?
+	terms []detEnv
+}
+
+type detEnv struct {
+	vals []*Value // Indexes correspond to envSum.ids
+}
+
+var (
+	// zeroEnvFactor is the "0" value of an [envSum]. It's a a factor with no
+	// sum terms. This is easiest to think of as: an empty sum must be the
+	// additive identity, 0.
+	zeroEnvFactor = &envSum{}
+
+	// topEnv is the algebraic one value of a [nonDetEnv]. It has no factors
+	// because the product of no factors is the multiplicative identity.
+	topEnv = nonDetEnv{}
+	// bottomEnv is the algebraic zero value of a [nonDetEnv]. The product of
+	// bottomEnv with x is bottomEnv, and the sum of bottomEnv with y is y.
+	bottomEnv = nonDetEnv{factors: []*envSum{zeroEnvFactor}}
+)
+
+// bind binds id to each of vals in e.
+//
+// Its panics if id is already bound in e.
+//
+// Environments are typically initially constructed by starting with [topEnv]
+// and calling bind one or more times.
+func (e nonDetEnv) bind(id *ident, vals ...*Value) nonDetEnv {
+	if e.isBottom() {
+		return bottomEnv
+	}
+
+	// TODO: If any of vals are _, should we just not do anything? We're kind of
+	// inconsistent about whether an id missing from e means id is invalid or
+	// means id is _.
+
+	// Check that id isn't present in e.
+	for _, f := range e.factors {
+		if slices.Contains(f.ids, id) {
+			panic("id " + id.name + " already present in environment")
+		}
+	}
+
+	// Create the new sum term.
+	sum := &envSum{ids: []*ident{id}}
+	for _, val := range vals {
+		sum.terms = append(sum.terms, detEnv{vals: []*Value{val}})
+	}
+	// Multiply it in.
+	factors := append(e.factors[:len(e.factors):len(e.factors)], sum)
+	return nonDetEnv{factors}
+}
+
+func (e nonDetEnv) isBottom() bool {
+	if len(e.factors) == 0 {
+		// This is top.
+		return false
+	}
+	return len(e.factors[0].terms) == 0
+}
+
+func (e nonDetEnv) vars() iter.Seq[*ident] {
+	return func(yield func(*ident) bool) {
+		for _, t := range e.factors {
+			for _, id := range t.ids {
+				if !yield(id) {
+					return
+				}
+			}
+		}
+	}
+}
+
+// all enumerates all deterministic environments in e.
+//
+// The result slice is in the same order as the slice returned by
+// [nonDetEnv2.vars]. The slice is reused between iterations.
+func (e nonDetEnv) all() iter.Seq[[]*Value] {
+	return func(yield func([]*Value) bool) {
+		var vals []*Value
+		var walk func(int) bool
+		walk = func(i int) bool {
+			if i == len(e.factors) {
+				return yield(vals)
+			}
+			start := len(vals)
+			for _, term := range e.factors[i].terms {
+				vals = append(vals[:start], term.vals...)
+				if !walk(i + 1) {
+					return false
+				}
+			}
+			return true
+		}
+		walk(0)
+	}
+}
+
+// allOrdered is like all, but idOrder controls the order of the values in the
+// resulting slice. Any [ident]s in idOrder that are missing from e are set to
+// topValue. The values of idOrder must be a bijection with [0, n).
+func (e nonDetEnv) allOrdered(idOrder map[*ident]int) iter.Seq[[]*Value] {
+	valsLen := 0
+	for _, idx := range idOrder {
+		valsLen = max(valsLen, idx+1)
+	}
+
+	return func(yield func([]*Value) bool) {
+		vals := make([]*Value, valsLen)
+		// e may not have all of the IDs in idOrder. Make sure any missing
+		// values are top.
+		for i := range vals {
+			vals[i] = topValue
+		}
+		var walk func(int) bool
+		walk = func(i int) bool {
+			if i == len(e.factors) {
+				return yield(vals)
+			}
+			for _, term := range e.factors[i].terms {
+				for j, id := range e.factors[i].ids {
+					vals[idOrder[id]] = term.vals[j]
+				}
+				if !walk(i + 1) {
+					return false
+				}
+			}
+			return true
+		}
+		walk(0)
+	}
+}
+
+func crossEnvs(envs ...nonDetEnv) nonDetEnv {
+	// Combine the factors of envs
+	var factors []*envSum
+	haveIDs := map[*ident]struct{}{}
+	for _, e := range envs {
+		if e.isBottom() {
+			// The environment is bottom, so the whole product goes to
+			// bottom.
+			return bottomEnv
+		}
+		// Check that all ids are disjoint.
+		for _, f := range e.factors {
+			for _, id := range f.ids {
+				if _, ok := haveIDs[id]; ok {
+					panic("conflict on " + id.name)
+				}
+				haveIDs[id] = struct{}{}
+			}
+		}
+		// Everything checks out. Multiply the factors.
+		factors = append(factors, e.factors...)
+	}
+	return nonDetEnv{factors: factors}
+}
+
+func sumEnvs(envs ...nonDetEnv) nonDetEnv {
+	// nonDetEnv is a product at the top level, so we implement summation using
+	// the distributive law. We also use associativity to keep as many top-level
+	// factors as we can, since those are what keep the environment compact.
+	//
+	// a * b * c + a * d         (where a, b, c, and d are factors)
+	//                           (combine common factors)
+	//   = a * (b * c + d)
+	//                           (expand factors into their sum terms)
+	//   = a * ((b_1 + b_2 + ...) * (c_1 + c_2 + ...) + d)
+	//                           (where b_i and c_i are deterministic environments)
+	//                           (FOIL)
+	//   = a * (b_1 * c_1 + b_1 * c_2 + b_2 * c_1 + b_2 * c2 + d)
+	//                           (all factors are now in canonical form)
+	//   = a * e
+	//
+	// The product of two deterministic environments is a deterministic
+	// environment, and the sum of deterministic environments is a factor, so
+	// this process results in the canonical product-of-sums form.
+	//
+	// TODO: This is a bit of a one-way process. We could try to factor the
+	// environment to reduce the number of sums. I'm not sure how to do this
+	// efficiently. It might be possible to guide it by gathering the
+	// distributions of each ID's bindings. E.g., if there are 12 deterministic
+	// environments in a sum and $x is bound to 4 different values, each 3
+	// times, then it *might* be possible to factor out $x into a 4-way sum of
+	// its own.
+
+	factors, toSum := commonFactors(envs)
+
+	if len(toSum) > 0 {
+		// Collect all IDs into a single order.
+		var ids []*ident
+		idOrder := make(map[*ident]int)
+		for _, e := range toSum {
+			for v := range e.vars() {
+				if _, ok := idOrder[v]; !ok {
+					idOrder[v] = len(ids)
+					ids = append(ids, v)
+				}
+			}
+		}
+
+		// Flatten out each term in the sum.
+		var summands []detEnv
+		for _, env := range toSum {
+			for vals := range env.allOrdered(idOrder) {
+				summands = append(summands, detEnv{vals: slices.Clone(vals)})
+			}
+		}
+		factors = append(factors, &envSum{ids: ids, terms: summands})
+	}
+
+	return nonDetEnv{factors: factors}
+}
+
+// commonFactors finds common factors that can be factored out of a summation of
+// [nonDetEnv]s.
+func commonFactors(envs []nonDetEnv) (common []*envSum, toSum []nonDetEnv) {
+	// Drop any bottom environments. They don't contribute to the sum and they
+	// would complicate some logic below.
+	envs = slices.DeleteFunc(envs, func(e nonDetEnv) bool {
+		return e.isBottom()
+	})
+	if len(envs) == 0 {
+		return bottomEnv.factors, nil
+	}
+
+	// It's very common that the exact same factor will appear across all envs.
+	// Keep those factored out.
+	//
+	// TODO: Is it also common to have vars that are bound to the same value
+	// across all envs? If so, we could also factor those into common terms.
+	counts := map[*envSum]int{}
+	for _, e := range envs {
+		for _, f := range e.factors {
+			counts[f]++
+		}
+	}
+	for _, f := range envs[0].factors {
+		if counts[f] == len(envs) {
+			// Common factor
+			common = append(common, f)
+		}
+	}
+
+	// Any other factors need to be multiplied out.
+	for _, env := range envs {
+		var newFactors []*envSum
+		for _, f := range env.factors {
+			if counts[f] != len(envs) {
+				newFactors = append(newFactors, f)
+			}
+		}
+		if len(newFactors) > 0 {
+			toSum = append(toSum, nonDetEnv{factors: newFactors})
+		}
+	}
+
+	return common, toSum
+}
+
+// envPartition is a subset of an env where id is bound to value in all
+// deterministic environments.
+type envPartition struct {
+	id    *ident
+	value *Value
+	env   nonDetEnv
+}
+
+func (e nonDetEnv) partitionBy(id *ident) []envPartition {
+	if e.isBottom() {
+		// Bottom contains all variables
+		return []envPartition{{id: id, value: bottomValue, env: e}}
+	}
+
+	// Find the factor containing id and id's index in that factor.
+	idFactor, idIndex := -1, -1
+	var newIDs []*ident
+	for factI, fact := range e.factors {
+		idI := slices.Index(fact.ids, id)
+		if idI < 0 {
+			continue
+		} else if idFactor != -1 {
+			panic("multiple factors containing id " + id.name)
+		} else {
+			idFactor, idIndex = factI, idI
+			// Drop id from this factor's IDs
+			newIDs = without(fact.ids, idI)
+		}
+	}
+	if idFactor == -1 {
+		panic("id " + id.name + " not found in environment")
+	}
+
+	// If id is the only term in its factor, then dropping it is equivalent to
+	// making the factor be the unit value, so we can just drop the factor. (And
+	// if this is the only factor, we'll arrive at [topEnv], which is exactly
+	// what we want!). In this case we can use the same nonDetEnv in all of the
+	// partitions.
+	isUnit := len(newIDs) == 0
+	var unitFactors []*envSum
+	if isUnit {
+		unitFactors = without(e.factors, idFactor)
+	}
+
+	// Create a partition for each distinct value of id.
+	var parts []envPartition
+	partIndex := map[*Value]int{}
+	for _, det := range e.factors[idFactor].terms {
+		val := det.vals[idIndex]
+		i, ok := partIndex[val]
+		if !ok {
+			i = len(parts)
+			var factors []*envSum
+			if isUnit {
+				factors = unitFactors
+			} else {
+				// Copy all other factor
+				factors = slices.Clone(e.factors)
+				factors[idFactor] = &envSum{ids: newIDs}
+			}
+			parts = append(parts, envPartition{id: id, value: val, env: nonDetEnv{factors: factors}})
+			partIndex[val] = i
+		}
+
+		if !isUnit {
+			factor := parts[i].env.factors[idFactor]
+			newVals := without(det.vals, idIndex)
+			factor.terms = append(factor.terms, detEnv{vals: newVals})
+		}
+	}
+	return parts
+}
+
+type ident struct {
+	_    [0]func() // Not comparable (only compare *ident)
+	name string
+}
+
+type Var struct {
+	id *ident
+}
+
+func (d Var) Exact() bool {
+	// These can't appear in concrete Values.
+	panic("Exact called on non-concrete Value")
+}
+
+func (d Var) decode(rv reflect.Value) error {
+	return &inexactError{"var", rv.Type().String()}
+}
+
+func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) {
+	// TODO: Vars from !sums in the input can have a huge number of values.
+	// Unifying these could be way more efficient with some indexes over any
+	// exact values we can pull out, like Def fields that are exact Strings.
+	// Maybe we try to produce an array of yes/no/maybe matches and then we only
+	// have to do deeper evaluation of the maybes. We could probably cache this
+	// on an envTerm. It may also help to special-case Var/Var unification to
+	// pick which one to index versus enumerate.
+
+	if vd, ok := w.Domain.(Var); ok && d.id == vd.id {
+		// Unifying $x with $x results in $x. If we descend into this we'll have
+		// problems because we strip $x out of the environment to keep ourselves
+		// honest and then can't find it on the other side.
+		//
+		// TODO: I'm not positive this is the right fix.
+		return vd, e, nil
+	}
+
+	// We need to unify w with the value of d in each possible environment. We
+	// can save some work by grouping environments by the value of d, since
+	// there will be a lot of redundancy here.
+	var nEnvs []nonDetEnv
+	envParts := e.partitionBy(d.id)
+	for i, envPart := range envParts {
+		exit := uf.enterVar(d.id, i)
+		// Each branch logically gets its own copy of the initial environment
+		// (narrowed down to just this binding of the variable), and each branch
+		// may result in different changes to that starting environment.
+		res, e2, err := w.unify(envPart.value, envPart.env, swap, uf)
+		exit.exit()
+		if err != nil {
+			return nil, nonDetEnv{}, err
+		}
+		if res.Domain == nil {
+			// This branch entirely failed to unify, so it's gone.
+			continue
+		}
+		nEnv := e2.bind(d.id, res)
+		nEnvs = append(nEnvs, nEnv)
+	}
+
+	if len(nEnvs) == 0 {
+		// All branches failed
+		return nil, bottomEnv, nil
+	}
+
+	// The effect of this is entirely captured in the environment. We can return
+	// back the same Bind node.
+	return d, sumEnvs(nEnvs...), nil
+}
+
+// An identPrinter maps [ident]s to unique string names.
+type identPrinter struct {
+	ids   map[*ident]string
+	idGen map[string]int
+}
+
+func (p *identPrinter) unique(id *ident) string {
+	if p.ids == nil {
+		p.ids = make(map[*ident]string)
+		p.idGen = make(map[string]int)
+	}
+
+	name, ok := p.ids[id]
+	if !ok {
+		gen := p.idGen[id.name]
+		p.idGen[id.name]++
+		if gen == 0 {
+			name = id.name
+		} else {
+			name = fmt.Sprintf("%s#%d", id.name, gen)
+		}
+		p.ids[id] = name
+	}
+
+	return name
+}
+
+func (p *identPrinter) slice(ids []*ident) string {
+	var strs []string
+	for _, id := range ids {
+		strs = append(strs, p.unique(id))
+	}
+	return fmt.Sprintf("[%s]", strings.Join(strs, ", "))
+}
+
+func without[Elt any](s []Elt, i int) []Elt {
+	return append(s[:i:i], s[i+1:]...)
+}
diff --git a/internal/unify/html.go b/internal/unify/html.go
new file mode 100644
index 00000000..d2434fe4
--- /dev/null
+++ b/internal/unify/html.go
@@ -0,0 +1,190 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"fmt"
+	"html"
+	"io"
+	"strings"
+)
+
+func (t *tracer) writeHTML(w io.Writer) {
+	if !t.saveTree {
+		panic("writeHTML called without tracer.saveTree")
+	}
+
+	fmt.Fprintf(w, "<html><head><style>%s</style></head>", htmlCSS)
+	for _, root := range t.trees {
+		dot := newDotEncoder()
+		html := htmlTracer{w: w, dot: dot}
+		html.writeTree(root)
+	}
+	fmt.Fprintf(w, "</html>\n")
+}
+
+const htmlCSS = `
+.unify {
+	display: grid;
+	grid-auto-columns: min-content;
+	text-align: center;
+}
+
+.header {
+	grid-row: 1;
+	font-weight: bold;
+	padding: 0.25em;
+	position: sticky;
+	top: 0;
+	background: white;
+}
+
+.envFactor {
+	display: grid;
+	grid-auto-rows: min-content;
+	grid-template-columns: subgrid;
+	text-align: center;
+}
+`
+
+type htmlTracer struct {
+	w    io.Writer
+	dot  *dotEncoder
+	svgs map[*Value]string
+}
+
+func (t *htmlTracer) writeTree(node *traceTree) {
+	// TODO: This could be really nice.
+	//
+	// - Put nodes that were unified on the same rank with {rank=same; a; b}
+	//
+	// - On hover, highlight nodes that node was unified with and the result. If
+	// it's a variable, highlight it in the environment, too.
+	//
+	// - On click, show the details of unifying that node.
+	//
+	// This could be the only way to navigate, without necessarily needing the
+	// whole nest of <detail> nodes.
+
+	// TODO: It might be possible to write this out on the fly.
+
+	t.emit([]*Value{node.v, node.w}, []string{"v", "w"}, node.envIn)
+
+	// Render children.
+	for i, child := range node.children {
+		if i >= 10 {
+			fmt.Fprintf(t.w, `<div style="margin-left: 4em">...</div>`)
+			break
+		}
+		fmt.Fprintf(t.w, `<details style="margin-left: 4em"><summary>%s</summary>`, html.EscapeString(child.label))
+		t.writeTree(child)
+		fmt.Fprintf(t.w, "</details>\n")
+	}
+
+	// Render result.
+	if node.err != nil {
+		fmt.Fprintf(t.w, "Error: %s\n", html.EscapeString(node.err.Error()))
+	} else {
+		t.emit([]*Value{node.res}, []string{"res"}, node.env)
+	}
+}
+
+func (t *htmlTracer) svg(v *Value) string {
+	if s, ok := t.svgs[v]; ok {
+		return s
+	}
+	var buf strings.Builder
+	t.dot.subgraph(v)
+	t.dot.writeSvg(&buf)
+	t.dot.clear()
+	svg := buf.String()
+	if t.svgs == nil {
+		t.svgs = make(map[*Value]string)
+	}
+	t.svgs[v] = svg
+	buf.Reset()
+	return svg
+}
+
+func (t *htmlTracer) emit(vs []*Value, labels []string, env nonDetEnv) {
+	fmt.Fprintf(t.w, `<div class="unify">`)
+	for i, v := range vs {
+		fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">%s</div>`, i+1, html.EscapeString(labels[i]))
+		fmt.Fprintf(t.w, `<div style="grid-area: 2 / %d">%s</div>`, i+1, t.svg(v))
+	}
+
+	t.emitEnv(env, len(vs))
+
+	fmt.Fprintf(t.w, `</div>`)
+}
+
+func (t *htmlTracer) emitEnv(env nonDetEnv, colStart int) {
+	if env.isBottom() {
+		fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">_|_</div>`, colStart+1)
+		return
+	}
+
+	colLimit := 10
+	col := colStart
+	for i, f := range env.factors {
+		if i > 0 {
+			// Print * between each factor.
+			fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">&times;</div>`, col+1)
+			col++
+		}
+
+		var idCols []int
+		for i, id := range f.ids {
+			var str string
+			if i == 0 && len(f.ids) > 1 {
+				str = "("
+			}
+			if colLimit <= 0 {
+				str += "..."
+			} else {
+				str += html.EscapeString(t.dot.idp.unique(id))
+			}
+			if (i == len(f.ids)-1 || colLimit <= 0) && len(f.ids) > 1 {
+				str += ")"
+			}
+
+			fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">%s</div>`, col+1, str)
+			idCols = append(idCols, col)
+
+			col++
+			if colLimit <= 0 {
+				break
+			}
+			colLimit--
+		}
+
+		fmt.Fprintf(t.w, `<div class="envFactor" style="grid-area: 2 / %d / 3 / %d">`, idCols[0]+1, col+1)
+		rowLimit := 10
+		row := 0
+		for _, term := range f.terms {
+			// TODO: Print + between rows? With some horizontal something to
+			// make it clear what it applies across?
+
+			for i, val := range term.vals {
+				fmt.Fprintf(t.w, `<div style="grid-area: %d / %d">`, row+1, idCols[i]-idCols[0]+1)
+				if i < len(term.vals)-1 && i == len(idCols)-1 {
+					fmt.Fprintf(t.w, `...</div>`)
+					break
+				} else if rowLimit <= 0 {
+					fmt.Fprintf(t.w, `...</div>`)
+				} else {
+					fmt.Fprintf(t.w, `%s</div>`, t.svg(val))
+				}
+			}
+
+			row++
+			if rowLimit <= 0 {
+				break
+			}
+			rowLimit--
+		}
+		fmt.Fprintf(t.w, `</div>`)
+	}
+}
diff --git a/internal/unify/pos.go b/internal/unify/pos.go
new file mode 100644
index 00000000..4f7046a4
--- /dev/null
+++ b/internal/unify/pos.go
@@ -0,0 +1,33 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"fmt"
+)
+
+type Pos struct {
+	Path string
+	Line int
+}
+
+func (p Pos) String() string {
+	var b []byte
+	b, _ = p.AppendText(b)
+	return string(b)
+}
+
+func (p Pos) AppendText(b []byte) ([]byte, error) {
+	if p.Line == 0 {
+		if p.Path == "" {
+			return append(b, "?:?"...), nil
+		} else {
+			return append(b, p.Path...), nil
+		}
+	} else if p.Path == "" {
+		return fmt.Appendf(b, "?:%d", p.Line), nil
+	}
+	return fmt.Appendf(b, "%s:%d", p.Path, p.Line), nil
+}
diff --git a/internal/unify/testdata/unify.yaml b/internal/unify/testdata/unify.yaml
new file mode 100644
index 00000000..131e527c
--- /dev/null
+++ b/internal/unify/testdata/unify.yaml
@@ -0,0 +1,174 @@
+# Basic tests of unification
+
+#
+# Terminals
+#
+
+unify:
+- _
+- _
+want:
+  _
+---
+unify:
+- _
+- test
+want:
+  test
+---
+unify:
+- test
+- t?est
+want:
+  test
+---
+unify:
+- 1
+- 1
+want:
+  1
+---
+unify:
+- test
+- foo
+want:
+  _|_
+
+#
+# Tuple
+#
+
+---
+unify:
+- [a, b]
+- [a, b]
+want:
+  [a, b]
+---
+unify:
+- [a, _]
+- [_, b]
+want:
+  [a, b]
+---
+unify:
+- ["ab?c", "de?f"]
+- [ac, def]
+want:
+  [ac, def]
+
+#
+# Repeats
+#
+
+---
+unify:
+- !repeat [a]
+- [_]
+want:
+  [a]
+---
+unify:
+- !repeat [a]
+- [_, _]
+want:
+  [a, a]
+---
+unify:
+- !repeat [a]
+- [b]
+want:
+  _|_
+---
+unify:
+- !repeat [xy*]
+- [x, xy, xyy]
+want:
+  [x, xy, xyy]
+---
+unify:
+- !repeat [xy*]
+- !repeat ["xz?y*"]
+- [x, xy, xyy]
+want:
+  [x, xy, xyy]
+---
+unify:
+- !repeat [!sum [a, b]]
+- [a, b, a]
+all:
+- [a, b, a]
+---
+unify:
+- !repeat [!sum [a, b]]
+- !repeat [!sum [b, c]]
+- [b, b, b]
+all:
+- [b, b, b]
+---
+unify:
+- !repeat [!sum [a, b]]
+- !repeat [!sum [b, c]]
+- [a]
+all: []
+
+#
+# Def
+#
+
+---
+unify:
+- {a: a, b: b}
+- {a: a, b: b}
+want:
+  {a: a, b: b}
+---
+unify:
+- {a: a}
+- {b: b}
+want:
+  {a: a, b: b}
+
+#
+# Sum
+#
+
+---
+unify:
+- !sum [1, 2]
+- !sum [2, 3]
+all:
+- 2
+---
+unify:
+- !sum [{label: a, value: abc}, {label: b, value: def}]
+- !sum [{value: "ab?c", extra: d}, {value: "def?", extra: g}]
+all:
+- {extra: d, label: a, value: abc}
+- {extra: g, label: b, value: def}
+---
+# A sum of repeats must deal with different dynamically-created variables in
+# each branch.
+unify:
+- !sum [!repeat [a], !repeat [b]]
+- [a, a, a]
+all:
+- [a, a, a]
+---
+unify:
+- !sum [!repeat [a], !repeat [b]]
+- [a, a, b]
+all: []
+---
+# Exercise sumEnvs with more than one result
+unify:
+- !sum
+  - [a|b, c|d]
+  - [e, g]
+- [!sum [a, b, e, f], !sum [c, d, g, h]]
+all:
+- [a, c]
+- [a, d]
+- [b, c]
+- [b, d]
+- [e, g]
diff --git a/internal/unify/testdata/vars.yaml b/internal/unify/testdata/vars.yaml
new file mode 100644
index 00000000..fe8a57e4
--- /dev/null
+++ b/internal/unify/testdata/vars.yaml
@@ -0,0 +1,175 @@
+#
+# Basic tests
+#
+
+name: "basic string"
+unify:
+- $x
+- test
+all:
+- test
+---
+name: "basic tuple"
+unify:
+- [$x, $x]
+- [test, test]
+all:
+- [test, test]
+---
+name: "three tuples"
+unify:
+- [$x, $x]
+- [test, _]
+- [_, test]
+all:
+- [test, test]
+---
+name: "basic def"
+unify:
+- {a: $x, b: $x}
+- {a: test, b: test}
+all:
+- {a: test, b: test}
+---
+name: "three defs"
+unify:
+- {a: $x, b: $x}
+- {a: test}
+- {b: test}
+all:
+- {a: test, b: test}
+
+#
+# Bottom tests
+#
+
+---
+name: "basic bottom"
+unify:
+- [$x, $x]
+- [test, foo]
+all: []
+---
+name: "three-way bottom"
+unify:
+- [$x, $x]
+- [test, _]
+- [_, foo]
+all: []
+
+#
+# Basic sum tests
+#
+
+---
+name: "basic sum"
+unify:
+- $x
+- !sum [a, b]
+all:
+- a
+- b
+---
+name: "sum of tuples"
+unify:
+- [$x]
+- !sum [[a], [b]]
+all:
+- [a]
+- [b]
+---
+name: "acausal sum"
+unify:
+- [_, !sum [a, b]]
+- [$x, $x]
+all:
+- [a, a]
+- [b, b]
+
+#
+# Transitivity tests
+#
+
+---
+name: "transitivity"
+unify:
+- [_, _, _, test]
+- [$x, $x,   _,  _]
+- [ _, $x,  $x,  _]
+- [ _,  _,  $x, $x]
+all:
+- [test, test, test, test]
+
+#
+# Multiple vars
+#
+
+---
+name: "basic uncorrelated vars"
+unify:
+- - !sum [1, 2]
+  - !sum [3, 4]
+- - $a
+  - $b
+all:
+- [1, 3]
+- [1, 4]
+- [2, 3]
+- [2, 4]
+---
+name: "uncorrelated vars"
+unify:
+- - !sum [1, 2]
+  - !sum [3, 4]
+  - !sum [1, 2]
+- - $a
+  - $b
+  - $a
+all:
+- [1, 3, 1]
+- [1, 4, 1]
+- [2, 3, 2]
+- [2, 4, 2]
+---
+name: "entangled vars"
+unify:
+- - !sum [[1,2],[3,4]]
+  - !sum [[2,1],[3,4],[4,3]]
+- - [$a, $b]
+  - [$b, $a]
+all:
+- - [1, 2]
+  - [2, 1]
+- - [3, 4]
+  - [4, 3]
+
+#
+# End-to-end examples
+#
+
+---
+name: "end-to-end"
+unify:
+- go: Add
+  in:
+  - go: $t
+  - go: $t
+- in: !repeat
+  - !sum
+    - go: Int32x4
+      base: int
+    - go: Uint32x4
+      base: uint
+all:
+- go: Add
+  in:
+  - base: int
+    go: Int32x4
+  - base: int
+    go: Int32x4
+- go: Add
+  in:
+  - base: uint
+    go: Uint32x4
+  - base: uint
+    go: Uint32x4
diff --git a/internal/unify/trace.go b/internal/unify/trace.go
new file mode 100644
index 00000000..f1a7ea2c
--- /dev/null
+++ b/internal/unify/trace.go
@@ -0,0 +1,168 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"fmt"
+	"io"
+	"strings"
+
+	"gopkg.in/yaml.v3"
+)
+
+// debugDotInHTML, if true, includes dot code for all graphs in the HTML. Useful
+// for debugging the dot output itself.
+const debugDotInHTML = false
+
+var Debug struct {
+	// UnifyLog, if non-nil, receives a streaming text trace of unification.
+	UnifyLog io.Writer
+
+	// HTML, if non-nil, writes an HTML trace of unification to HTML.
+	HTML io.Writer
+}
+
+type tracer struct {
+	logw io.Writer
+
+	enc yamlEncoder // Print consistent idents throughout
+
+	saveTree bool // if set, record tree; required for HTML output
+
+	path []string
+
+	node  *traceTree
+	trees []*traceTree
+}
+
+type traceTree struct {
+	label string // Identifies this node as a child of parent
+	v, w  *Value // Unification inputs
+	envIn nonDetEnv
+	res   *Value // Unification result
+	env   nonDetEnv
+	err   error // or error
+
+	parent   *traceTree
+	children []*traceTree
+}
+
+type tracerExit struct {
+	t    *tracer
+	len  int
+	node *traceTree
+}
+
+func (t *tracer) enter(pat string, vals ...any) tracerExit {
+	if t == nil {
+		return tracerExit{}
+	}
+
+	label := fmt.Sprintf(pat, vals...)
+
+	var p *traceTree
+	if t.saveTree {
+		p = t.node
+		if p != nil {
+			t.node = &traceTree{label: label, parent: p}
+			p.children = append(p.children, t.node)
+		}
+	}
+
+	t.path = append(t.path, label)
+	return tracerExit{t, len(t.path) - 1, p}
+}
+
+func (t *tracer) enterVar(id *ident, branch int) tracerExit {
+	if t == nil {
+		return tracerExit{}
+	}
+
+	// Use the tracer's ident printer
+	return t.enter("Var %s br %d", t.enc.idp.unique(id), branch)
+}
+
+func (te tracerExit) exit() {
+	if te.t == nil {
+		return
+	}
+	te.t.path = te.t.path[:te.len]
+	te.t.node = te.node
+}
+
+func indentf(prefix string, pat string, vals ...any) string {
+	s := fmt.Sprintf(pat, vals...)
+	if len(prefix) == 0 {
+		return s
+	}
+	if !strings.Contains(s, "\n") {
+		return prefix + s
+	}
+
+	indent := prefix
+	if strings.TrimLeft(prefix, " ") != "" {
+		// Prefix has non-space characters in it. Construct an all space-indent.
+		indent = strings.Repeat(" ", len(prefix))
+	}
+	return prefix + strings.ReplaceAll(s, "\n", "\n"+indent)
+}
+
+func yamlf(prefix string, node *yaml.Node) string {
+	b, err := yaml.Marshal(node)
+	if err != nil {
+		return fmt.Sprintf("<marshal failed: %s>", err)
+	}
+	return strings.TrimRight(indentf(prefix, "%s", b), " \n")
+}
+
+func (t *tracer) logf(pat string, vals ...any) {
+	if t == nil || t.logw == nil {
+		return
+	}
+	prefix := fmt.Sprintf("[%s] ", strings.Join(t.path, "/"))
+	s := indentf(prefix, pat, vals...)
+	s = strings.TrimRight(s, " \n")
+	fmt.Fprintf(t.logw, "%s\n", s)
+}
+
+func (t *tracer) traceUnify(v, w *Value, e nonDetEnv) {
+	if t == nil {
+		return
+	}
+
+	t.logf("Unify\n%s\nwith\n%s\nin\n%s",
+		yamlf("  ", t.enc.value(v)),
+		yamlf("  ", t.enc.value(w)),
+		yamlf("  ", t.enc.env(e)))
+
+	if t.saveTree {
+		if t.node == nil {
+			t.node = &traceTree{}
+			t.trees = append(t.trees, t.node)
+		}
+		t.node.v, t.node.w, t.node.envIn = v, w, e
+	}
+}
+
+func (t *tracer) traceDone(res *Value, e nonDetEnv, err error) {
+	if t == nil {
+		return
+	}
+
+	if err != nil {
+		t.logf("==> %s", err)
+	} else {
+		t.logf("==>\n%s", yamlf("  ", t.enc.closure(Closure{res, e})))
+	}
+
+	if t.saveTree {
+		node := t.node
+		if node == nil {
+			panic("popped top of trace stack")
+		}
+		node.res, node.err = res, err
+		node.env = e
+	}
+}
diff --git a/internal/unify/unify.go b/internal/unify/unify.go
new file mode 100644
index 00000000..6ebed7bd
--- /dev/null
+++ b/internal/unify/unify.go
@@ -0,0 +1,322 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package unify implements unification of structured values.
+//
+// A [Value] represents a possibly infinite set of concrete values, where a
+// value is either a string ([String]), a tuple of values ([Tuple]), or a
+// string-keyed map of values called a "def" ([Def]). These sets can be further
+// constrained by variables ([Var]). A [Value] combined with bindings of
+// variables is a [Closure].
+//
+// [Unify] finds a [Closure] that satisfies two or more other [Closure]s. This
+// can be thought of as intersecting the sets represented by these Closures'
+// values, or as the greatest lower bound/infimum of these Closures. If no such
+// Closure exists, the result of unification is "bottom", or the empty set.
+//
+// # Examples
+//
+// The regular expression "a*" is the infinite set of strings of zero or more
+// "a"s. "a*" can be unified with "a" or "aa" or "aaa", and the result is just
+// "a", "aa", or "aaa", respectively. However, unifying "a*" with "b" fails
+// because there are no values that satisfy both.
+//
+// Sums express sets directly. For example, !sum [a, b] is the set consisting of
+// "a" and "b". Unifying this with !sum [b, c] results in just "b". This also
+// makes it easy to demonstrate that unification isn't necessarily a single
+// concrete value. For example, unifying !sum [a, b, c] with !sum [b, c, d]
+// results in two concrete values: "b" and "c".
+//
+// The special value _ or "top" represents all possible values. Unifying _ with
+// any value x results in x.
+//
+// Unifying composite values—tuples and defs—unifies their elements.
+//
+// The value [a*, aa] is an infinite set of tuples. If we unify that with the
+// value [aaa, a*], the only possible value that satisfies both is [aaa, aa].
+// Likewise, this is the intersection of the sets described by these two values.
+//
+// Defs are similar to tuples, but they are indexed by strings and don't have a
+// fixed length. For example, {x: a, y: b} is a def with two fields. Any field
+// not mentioned in a def is implicitly top. Thus, unifying this with {y: b, z:
+// c} results in {x: a, y: b, z: c}.
+//
+// Variables constrain values. For example, the value [$x, $x] represents all
+// tuples whose first and second values are the same, but doesn't otherwise
+// constrain that value. Thus, this set includes [a, a] as well as [[b, c, d],
+// [b, c, d]], but it doesn't include [a, b].
+//
+// Sums are internally implemented as fresh variables that are simultaneously
+// bound to all values of the sum. That is !sum [a, b] is actually $var (where
+// var is some fresh name), closed under the environment $var=a | $var=b.
+package unify
+
+import (
+	"errors"
+	"fmt"
+	"slices"
+)
+
+// Unify computes a Closure that satisfies each input Closure. If no such
+// Closure exists, it returns bottom.
+func Unify(closures ...Closure) (Closure, error) {
+	if len(closures) == 0 {
+		return Closure{topValue, topEnv}, nil
+	}
+
+	var trace *tracer
+	if Debug.UnifyLog != nil || Debug.HTML != nil {
+		trace = &tracer{
+			logw:     Debug.UnifyLog,
+			saveTree: Debug.HTML != nil,
+		}
+	}
+
+	unified := closures[0]
+	for _, c := range closures[1:] {
+		var err error
+		uf := newUnifier()
+		uf.tracer = trace
+		e := crossEnvs(unified.env, c.env)
+		unified.val, unified.env, err = unified.val.unify(c.val, e, false, uf)
+		if Debug.HTML != nil {
+			uf.writeHTML(Debug.HTML)
+		}
+		if err != nil {
+			return Closure{}, err
+		}
+	}
+
+	return unified, nil
+}
+
+type unifier struct {
+	*tracer
+}
+
+func newUnifier() *unifier {
+	return &unifier{}
+}
+
+// errDomains is a sentinel error used between unify and unify1 to indicate that
+// unify1 could not unify the domains of the two values.
+var errDomains = errors.New("cannot unify domains")
+
+func (v *Value) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (*Value, nonDetEnv, error) {
+	if swap {
+		// Put the values in order. This just happens to be a handy choke-point
+		// to do this at.
+		v, w = w, v
+	}
+
+	uf.traceUnify(v, w, e)
+
+	d, e2, err := v.unify1(w, e, false, uf)
+	if err == errDomains {
+		// Try the other order.
+		d, e2, err = w.unify1(v, e, true, uf)
+		if err == errDomains {
+			// Okay, we really can't unify these.
+			err = fmt.Errorf("cannot unify %T (%s) and %T (%s): kind mismatch", v.Domain, v.PosString(), w.Domain, w.PosString())
+		}
+	}
+	if err != nil {
+		uf.traceDone(nil, nonDetEnv{}, err)
+		return nil, nonDetEnv{}, err
+	}
+	res := unified(d, v, w)
+	uf.traceDone(res, e2, nil)
+	if d == nil {
+		// Double check that a bottom Value also has a bottom env.
+		if !e2.isBottom() {
+			panic("bottom Value has non-bottom environment")
+		}
+	}
+
+	return res, e2, nil
+}
+
+func (v *Value) unify1(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) {
+	// TODO: If there's an error, attach position information to it.
+
+	vd, wd := v.Domain, w.Domain
+
+	// Bottom returns bottom, and eliminates all possible environments.
+	if vd == nil || wd == nil {
+		return nil, bottomEnv, nil
+	}
+
+	// Top always returns the other.
+	if _, ok := vd.(Top); ok {
+		return wd, e, nil
+	}
+
+	// Variables
+	if vd, ok := vd.(Var); ok {
+		return vd.unify(w, e, swap, uf)
+	}
+
+	// Composite values
+	if vd, ok := vd.(Def); ok {
+		if wd, ok := wd.(Def); ok {
+			return vd.unify(wd, e, swap, uf)
+		}
+	}
+	if vd, ok := vd.(Tuple); ok {
+		if wd, ok := wd.(Tuple); ok {
+			return vd.unify(wd, e, swap, uf)
+		}
+	}
+
+	// Scalar values
+	if vd, ok := vd.(String); ok {
+		if wd, ok := wd.(String); ok {
+			res := vd.unify(wd)
+			if res == nil {
+				e = bottomEnv
+			}
+			return res, e, nil
+		}
+	}
+
+	return nil, nonDetEnv{}, errDomains
+}
+
+func (d Def) unify(o Def, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) {
+	out := Def{fields: make(map[string]*Value)}
+
+	// Check keys of d against o.
+	for key, dv := range d.All() {
+		ov, ok := o.fields[key]
+		if !ok {
+			// ov is implicitly Top. Bypass unification.
+			out.fields[key] = dv
+			continue
+		}
+		exit := uf.enter("%s", key)
+		res, e2, err := dv.unify(ov, e, swap, uf)
+		exit.exit()
+		if err != nil {
+			return nil, nonDetEnv{}, err
+		} else if res.Domain == nil {
+			// No match.
+			return nil, bottomEnv, nil
+		}
+		out.fields[key] = res
+		e = e2
+	}
+	// Check keys of o that we didn't already check. These all implicitly match
+	// because we know the corresponding fields in d are all Top.
+	for key, dv := range o.All() {
+		if _, ok := d.fields[key]; !ok {
+			out.fields[key] = dv
+		}
+	}
+	return out, e, nil
+}
+
+func (v Tuple) unify(w Tuple, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) {
+	if v.repeat != nil && w.repeat != nil {
+		// Since we generate the content of these lazily, there's not much we
+		// can do but just stick them on a list to unify later.
+		return Tuple{repeat: concat(v.repeat, w.repeat)}, e, nil
+	}
+
+	// Expand any repeated tuples.
+	tuples := make([]Tuple, 0, 2)
+	if v.repeat == nil {
+		tuples = append(tuples, v)
+	} else {
+		v2, e2 := v.doRepeat(e, len(w.vs))
+		tuples = append(tuples, v2...)
+		e = e2
+	}
+	if w.repeat == nil {
+		tuples = append(tuples, w)
+	} else {
+		w2, e2 := w.doRepeat(e, len(v.vs))
+		tuples = append(tuples, w2...)
+		e = e2
+	}
+
+	// Now unify all of the tuples (usually this will be just 2 tuples)
+	out := tuples[0]
+	for _, t := range tuples[1:] {
+		if len(out.vs) != len(t.vs) {
+			uf.logf("tuple length mismatch")
+			return nil, bottomEnv, nil
+		}
+		zs := make([]*Value, len(out.vs))
+		for i, v1 := range out.vs {
+			exit := uf.enter("%d", i)
+			z, e2, err := v1.unify(t.vs[i], e, swap, uf)
+			exit.exit()
+			if err != nil {
+				return nil, nonDetEnv{}, err
+			} else if z.Domain == nil {
+				return nil, bottomEnv, nil
+			}
+			zs[i] = z
+			e = e2
+		}
+		out = Tuple{vs: zs}
+	}
+
+	return out, e, nil
+}
+
+// doRepeat creates a fixed-length tuple from a repeated tuple. The caller is
+// expected to unify the returned tuples.
+func (v Tuple) doRepeat(e nonDetEnv, n int) ([]Tuple, nonDetEnv) {
+	res := make([]Tuple, len(v.repeat))
+	for i, gen := range v.repeat {
+		res[i].vs = make([]*Value, n)
+		for j := range n {
+			res[i].vs[j], e = gen(e)
+		}
+	}
+	return res, e
+}
+
+// unify intersects the domains of two [String]s. If it can prove that this
+// domain is empty, it returns nil (bottom).
+//
+// TODO: Consider splitting literals and regexps into two domains.
+func (v String) unify(w String) Domain {
+	// Unification is symmetric, so put them in order of string kind so we only
+	// have to deal with half the cases.
+	if v.kind > w.kind {
+		v, w = w, v
+	}
+
+	switch v.kind {
+	case stringRegex:
+		switch w.kind {
+		case stringRegex:
+			// Construct a match against all of the regexps
+			return String{kind: stringRegex, re: slices.Concat(v.re, w.re)}
+		case stringExact:
+			for _, re := range v.re {
+				if !re.MatchString(w.exact) {
+					return nil
+				}
+			}
+			return w
+		}
+	case stringExact:
+		if v.exact != w.exact {
+			return nil
+		}
+		return v
+	}
+	panic("bad string kind")
+}
+
+func concat[T any](s1, s2 []T) []T {
+	// Reuse s1 or s2 if possible.
+	if len(s1) == 0 {
+		return s2
+	}
+	return append(s1[:len(s1):len(s1)], s2...)
+}
diff --git a/internal/unify/unify_test.go b/internal/unify/unify_test.go
new file mode 100644
index 00000000..8071e0c9
--- /dev/null
+++ b/internal/unify/unify_test.go
@@ -0,0 +1,154 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"slices"
+	"strings"
+	"testing"
+
+	"gopkg.in/yaml.v3"
+)
+
+func TestUnify(t *testing.T) {
+	paths, err := filepath.Glob("testdata/*")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(paths) == 0 {
+		t.Fatal("no testdata found")
+	}
+	for _, path := range paths {
+		// Skip paths starting with _ so experimental files can be added.
+		base := filepath.Base(path)
+		if base[0] == '_' {
+			continue
+		}
+		if !strings.HasSuffix(base, ".yaml") {
+			t.Errorf("non-.yaml file in testdata: %s", base)
+			continue
+		}
+		base = strings.TrimSuffix(base, ".yaml")
+
+		t.Run(base, func(t *testing.T) {
+			testUnify(t, path)
+		})
+	}
+}
+
+func testUnify(t *testing.T, path string) {
+	f, err := os.Open(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+
+	type testCase struct {
+		Skip  bool
+		Name  string
+		Unify []Closure
+		Want  yaml.Node
+		All   yaml.Node
+	}
+	dec := yaml.NewDecoder(f)
+
+	for i := 0; ; i++ {
+		var tc testCase
+		err := dec.Decode(&tc)
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		name := tc.Name
+		if name == "" {
+			name = fmt.Sprint(i)
+		}
+
+		t.Run(name, func(t *testing.T) {
+			if tc.Skip {
+				t.Skip("skip: true set in test case")
+			}
+
+			defer func() {
+				p := recover()
+				if p != nil || t.Failed() {
+					// Redo with a trace
+					//
+					// TODO: Use t.Output() in Go 1.25.
+					var buf bytes.Buffer
+					Debug.UnifyLog = &buf
+					func() {
+						defer func() {
+							// If the original unify panicked, the second one
+							// probably will, too. Ignore it and let the first panic
+							// bubble.
+							recover()
+						}()
+						Unify(tc.Unify...)
+					}()
+					Debug.UnifyLog = nil
+					t.Logf("Trace:\n%s", buf.String())
+				}
+				if p != nil {
+					panic(p)
+				}
+			}()
+
+			// Unify the test cases
+			//
+			// TODO: Try reordering the inputs also
+			c, err := Unify(tc.Unify...)
+			if err != nil {
+				// TODO: Tests of errors
+				t.Fatal(err)
+			}
+
+			// Encode the result back to YAML so we can check if it's structurally
+			// equal.
+			clean := func(val any) *yaml.Node {
+				var node yaml.Node
+				node.Encode(val)
+				for n := range allYamlNodes(&node) {
+					// Canonicalize the style. There may be other style flags we need to
+					// muck with.
+					n.Style &^= yaml.FlowStyle
+					n.HeadComment = ""
+					n.LineComment = ""
+					n.FootComment = ""
+				}
+				return &node
+			}
+			check := func(gotVal any, wantNode *yaml.Node) {
+				got, err := yaml.Marshal(clean(gotVal))
+				if err != nil {
+					t.Fatalf("Encoding Value back to yaml failed: %s", err)
+				}
+				want, err := yaml.Marshal(clean(wantNode))
+				if err != nil {
+					t.Fatalf("Encoding Want back to yaml failed: %s", err)
+				}
+
+				if !bytes.Equal(got, want) {
+					t.Errorf("%s:%d:\nwant:\n%sgot\n%s", f.Name(), wantNode.Line, want, got)
+				}
+			}
+			if tc.Want.Kind != 0 {
+				check(c.val, &tc.Want)
+			}
+			if tc.All.Kind != 0 {
+				fVal := slices.Collect(c.All())
+				check(fVal, &tc.All)
+			}
+		})
+	}
+}
diff --git a/internal/unify/value.go b/internal/unify/value.go
new file mode 100644
index 00000000..6bf121af
--- /dev/null
+++ b/internal/unify/value.go
@@ -0,0 +1,129 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"fmt"
+	"iter"
+	"reflect"
+)
+
+// A Value represents a structured, non-deterministic value consisting of
+// strings, tuples of Values, and string-keyed maps of Values. A
+// non-deterministic Value will also contain variables, which are resolved via
+// an environment as part of a [Closure].
+//
+// For debugging, a Value can also track the source position it was read from in
+// an input file, and its provenance from other Values.
+type Value struct {
+	Domain Domain
+
+	// A Value has either a pos or parents (or neither).
+	pos     *Pos
+	parents *[2]*Value
+}
+
+var (
+	topValue    = &Value{Domain: Top{}}
+	bottomValue = &Value{Domain: nil}
+)
+
+// NewValue returns a new [Value] with the given domain and no position
+// information.
+func NewValue(d Domain) *Value {
+	return &Value{Domain: d}
+}
+
+// NewValuePos returns a new [Value] with the given domain at position p.
+func NewValuePos(d Domain, p Pos) *Value {
+	return &Value{Domain: d, pos: &p}
+}
+
+// newValueFrom returns a new [Value] with the given domain that copies the
+// position information of p.
+func newValueFrom(d Domain, p *Value) *Value {
+	return &Value{Domain: d, pos: p.pos, parents: p.parents}
+}
+
+func unified(d Domain, p1, p2 *Value) *Value {
+	return &Value{Domain: d, parents: &[2]*Value{p1, p2}}
+}
+
+func (v *Value) Pos() Pos {
+	if v.pos == nil {
+		return Pos{}
+	}
+	return *v.pos
+}
+
+func (v *Value) PosString() string {
+	var b []byte
+	for root := range v.Provenance() {
+		if len(b) > 0 {
+			b = append(b, ' ')
+		}
+		b, _ = root.pos.AppendText(b)
+	}
+	return string(b)
+}
+
+func (v *Value) Exact() bool {
+	if v.Domain == nil {
+		return false
+	}
+	return v.Domain.Exact()
+}
+
+// Decode decodes v into a Go value.
+//
+// v must be exact, except that it can include Top. into must be a pointer.
+// [Def]s are decoded into structs. [Tuple]s are decoded into slices. [String]s
+// are decoded into strings or ints. Any field can itself be a pointer to one of
+// these types. Top can be decoded into a pointer-typed field and will set the
+// field to nil. Anything else will allocate a value if necessary.
+func (v *Value) Decode(into any) error {
+	rv := reflect.ValueOf(into)
+	if rv.Kind() != reflect.Pointer {
+		return fmt.Errorf("cannot decode into non-pointer %T", into)
+	}
+	return v.Domain.decode(rv)
+}
+
+func preDecode(rv reflect.Value, kind reflect.Kind, name string) (reflect.Value, error) {
+	if rv.Kind() == kind {
+		return rv, nil
+	}
+	if rv.Kind() == reflect.Pointer && rv.Type().Elem().Kind() == kind {
+		if rv.IsNil() {
+			rv.Set(reflect.New(rv.Type().Elem()))
+		}
+		return rv.Elem(), nil
+	}
+	return reflect.Value{}, fmt.Errorf("cannot decode %s into %s", name, rv.Type())
+}
+
+// Provenance iterates over all of the source Values that have contributed to
+// this Value.
+func (v *Value) Provenance() iter.Seq[*Value] {
+	return func(yield func(*Value) bool) {
+		var rec func(d *Value) bool
+		rec = func(d *Value) bool {
+			if d.pos != nil {
+				if !yield(d) {
+					return false
+				}
+			}
+			if d.parents != nil {
+				for _, p := range d.parents {
+					if !rec(p) {
+						return false
+					}
+				}
+			}
+			return true
+		}
+		rec(v)
+	}
+}
diff --git a/internal/unify/value_test.go b/internal/unify/value_test.go
new file mode 100644
index 00000000..28d22b25
--- /dev/null
+++ b/internal/unify/value_test.go
@@ -0,0 +1,36 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import "slices"
+
+func ExampleClosure_All_tuple() {
+	v := mustParse(`
+- !sum [1, 2]
+- !sum [3, 4]
+`)
+	printYaml(slices.Collect(v.All()))
+
+	// Output:
+	// - [1, 3]
+	// - [1, 4]
+	// - [2, 3]
+	// - [2, 4]
+}
+
+func ExampleClosure_All_def() {
+	v := mustParse(`
+a: !sum [1, 2]
+b: !sum [3, 4]
+c: 5
+`)
+	printYaml(slices.Collect(v.All()))
+
+	// Output:
+	// - {a: 1, b: 3, c: 5}
+	// - {a: 1, b: 4, c: 5}
+	// - {a: 2, b: 3, c: 5}
+	// - {a: 2, b: 4, c: 5}
+}
diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go
new file mode 100644
index 00000000..4731140b
--- /dev/null
+++ b/internal/unify/yaml.go
@@ -0,0 +1,475 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"regexp"
+	"strings"
+
+	"gopkg.in/yaml.v3"
+)
+
+// UnmarshalOpts provides options to unmarshaling. The zero value is the default
+// options.
+type UnmarshalOpts struct {
+	// Path is the file path to store in the [Pos] of all [Value]s.
+	Path string
+
+	// StringReplacer, if non-nil, is called for each string value to perform
+	// any application-specific string interpolation.
+	StringReplacer func(string) string
+}
+
+// UnmarshalYAML unmarshals a YAML node into a Closure.
+//
+// This is how UnmarshalYAML maps YAML nodes into terminal Values:
+//
+// - "_" or !top _ is the top value ([Top]).
+//
+// - "_|_" or !bottom _ is the bottom value. This is an error during
+// unmarshaling, but can appear in marshaled values.
+//
+// - "$<name>" or !var <name> is a variable ([Var]). Everywhere the same name
+// appears within a single unmarshal operation, it is mapped to the same
+// variable. Different unmarshal operations get different variables, even if
+// they have the same string name.
+//
+// - !regex "x" is a regular expression ([String]), as is any string that
+// doesn't match "_", "_|_", or "$...". Regular expressions are implicitly
+// anchored at the beginning and end. If the string doesn't contain any
+// meta-characters (that is, it's a "literal" regular expression), then it's
+// treated as an exact string.
+//
+// - !string "x", or any int, float, bool, or binary value is an exact string
+// ([String]).
+//
+// - !regex [x, y, ...] is an intersection of regular expressions ([String]).
+//
+// This is how UnmarshalYAML maps YAML nodes into non-terminal Values:
+//
+// - Sequence nodes like [x, y, z] are tuples ([Tuple]).
+//
+// - !repeat [x] is a repeated tuple ([Tuple]), which is 0 or more instances of
+// x. There must be exactly one element in the list.
+//
+// - Mapping nodes like {a: x, b: y} are defs ([Def]). Any fields not listed are
+// implicitly top.
+//
+// - !sum [x, y, z] is a sum of its children. This can be thought of as a union
+// of the values x, y, and z, or as a non-deterministic choice between x, y, and
+// z. If a variable appears both inside the sum and outside of it, only the
+// non-deterministic choice view really works. The unifier does not directly
+// implement sums; instead, this is decoded as a fresh variable that's
+// simultaneously bound to x, y, and z.
+func (c *Closure) UnmarshalYAML(node *yaml.Node) error {
+	return c.unmarshal(node, UnmarshalOpts{})
+}
+
+// Unmarshal is like [UnmarshalYAML], but accepts options and reads from r. If
+// opts.Path is "" and r has a Name() string method, the result of r.Name() is
+// used as the path for all [Value]s read from r.
+func (c *Closure) Unmarshal(r io.Reader, opts UnmarshalOpts) error {
+	if opts.Path == "" {
+		type named interface{ Name() string }
+		if n, ok := r.(named); ok {
+			opts.Path = n.Name()
+		}
+	}
+
+	var node yaml.Node
+	if err := yaml.NewDecoder(r).Decode(&node); err != nil {
+		return err
+	}
+	np := &node
+	if np.Kind == yaml.DocumentNode {
+		np = node.Content[0]
+	}
+	return c.unmarshal(np, opts)
+}
+
+func (c *Closure) unmarshal(node *yaml.Node, opts UnmarshalOpts) error {
+	dec := &yamlDecoder{opts: opts, vars: make(map[string]*ident)}
+	val, err := dec.value(node)
+	if err != nil {
+		return err
+	}
+	vars := make(map[*ident]*Value)
+	for _, id := range dec.vars {
+		vars[id] = topValue
+	}
+	*c = Closure{val, dec.env}
+	return nil
+}
+
+type yamlDecoder struct {
+	opts UnmarshalOpts
+
+	vars  map[string]*ident
+	nSums int
+
+	env nonDetEnv
+}
+
+func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
+	pos := &Pos{Path: dec.opts.Path, Line: node.Line}
+
+	// Resolve alias nodes.
+	if node.Kind == yaml.AliasNode {
+		node = node.Alias
+	}
+
+	mk := func(d Domain) (*Value, error) {
+		v := &Value{Domain: d, pos: pos}
+		return v, nil
+	}
+	mk2 := func(d Domain, err error) (*Value, error) {
+		if err != nil {
+			return nil, err
+		}
+		return mk(d)
+	}
+
+	// is tests the kind and long tag of node.
+	is := func(kind yaml.Kind, tag string) bool {
+		return node.Kind == kind && node.LongTag() == tag
+	}
+	isExact := func() bool {
+		if node.Kind != yaml.ScalarNode {
+			return false
+		}
+		// We treat any string-ish YAML node as a string.
+		switch node.LongTag() {
+		case "!string", "tag:yaml.org,2002:int", "tag:yaml.org,2002:float", "tag:yaml.org,2002:bool", "tag:yaml.org,2002:binary":
+			return true
+		}
+		return false
+	}
+
+	// !!str nodes provide a short-hand syntax for several leaf domains that are
+	// also available under explicit tags. To simplify checking below, we set
+	// strVal to non-"" only for !!str nodes.
+	strVal := ""
+	isStr := is(yaml.ScalarNode, "tag:yaml.org,2002:str")
+	if isStr {
+		strVal = node.Value
+	}
+
+	switch {
+	case is(yaml.ScalarNode, "!var"):
+		strVal = "$" + node.Value
+		fallthrough
+	case strings.HasPrefix(strVal, "$"):
+		id, ok := dec.vars[strVal]
+		if !ok {
+			// We encode different idents with the same string name by adding a
+			// #N suffix. Strip that off so it doesn't accumulate. This isn't
+			// meant to be used in user-written input, though nothing stops that.
+			name, _, _ := strings.Cut(strVal, "#")
+			id = &ident{name: name}
+			dec.vars[strVal] = id
+			dec.env = dec.env.bind(id, topValue)
+		}
+		return mk(Var{id: id})
+
+	case strVal == "_" || is(yaml.ScalarNode, "!top"):
+		return mk(Top{})
+
+	case strVal == "_|_" || is(yaml.ScalarNode, "!bottom"):
+		return nil, errors.New("found bottom")
+
+	case isExact():
+		val := node.Value
+		if dec.opts.StringReplacer != nil {
+			val = dec.opts.StringReplacer(val)
+		}
+		return mk(NewStringExact(val))
+
+	case isStr || is(yaml.ScalarNode, "!regex"):
+		// Any other string we treat as a regex. This will produce an exact
+		// string anyway if the regex is literal.
+		val := node.Value
+		if dec.opts.StringReplacer != nil {
+			val = dec.opts.StringReplacer(val)
+		}
+		return mk2(NewStringRegex(val))
+
+	case is(yaml.SequenceNode, "!regex"):
+		var vals []string
+		if err := node.Decode(&vals); err != nil {
+			return nil, err
+		}
+		return mk2(NewStringRegex(vals...))
+
+	case is(yaml.MappingNode, "tag:yaml.org,2002:map"):
+		var fields []string
+		var vals []*Value
+		for i := 0; i < len(node.Content); i += 2 {
+			key := node.Content[i]
+			if key.Kind != yaml.ScalarNode {
+				return nil, fmt.Errorf("non-scalar key %q", key.Value)
+			}
+			val, err := dec.value(node.Content[i+1])
+			if err != nil {
+				return nil, err
+			}
+			fields = append(fields, key.Value)
+			vals = append(vals, val)
+		}
+		return mk(NewDef(fields, vals))
+
+	case is(yaml.SequenceNode, "tag:yaml.org,2002:seq"):
+		elts := node.Content
+		vs := make([]*Value, 0, len(elts))
+		for _, elt := range elts {
+			v, err := dec.value(elt)
+			if err != nil {
+				return nil, err
+			}
+			vs = append(vs, v)
+		}
+		return mk(NewTuple(vs...))
+
+	case is(yaml.SequenceNode, "!repeat") || is(yaml.SequenceNode, "!repeat-unify"):
+		// !repeat must have one child. !repeat-unify is used internally for
+		// delayed unification, and is the same, it's just allowed to have more
+		// than one child.
+		if node.LongTag() == "!repeat" && len(node.Content) != 1 {
+			return nil, fmt.Errorf("!repeat must have exactly one child")
+		}
+
+		// Decode the children to make sure they're well-formed, but otherwise
+		// discard that decoding and do it again every time we need a new
+		// element.
+		var gen []func(e nonDetEnv) (*Value, nonDetEnv)
+		origEnv := dec.env
+		elts := node.Content
+		for i, elt := range elts {
+			_, err := dec.value(elt)
+			if err != nil {
+				return nil, err
+			}
+			// Undo any effects on the environment. We *do* keep any named
+			// variables that were added to the vars map in case they were
+			// introduced within the element.
+			dec.env = origEnv
+			// Add a generator function
+			gen = append(gen, func(e nonDetEnv) (*Value, nonDetEnv) {
+				dec.env = e
+				// TODO: If this is in a sum, this tends to generate a ton of
+				// fresh variables that are different on each branch of the
+				// parent sum. Does it make sense to hold on to the i'th value
+				// of the tuple after we've generated it?
+				v, err := dec.value(elts[i])
+				if err != nil {
+					// It worked the first time, so this really shouldn't hapen.
+					panic("decoding repeat element failed")
+				}
+				return v, dec.env
+			})
+		}
+		return mk(NewRepeat(gen...))
+
+	case is(yaml.SequenceNode, "!sum"):
+		vs := make([]*Value, 0, len(node.Content))
+		for _, elt := range node.Content {
+			v, err := dec.value(elt)
+			if err != nil {
+				return nil, err
+			}
+			vs = append(vs, v)
+		}
+		if len(vs) == 1 {
+			return vs[0], nil
+		}
+
+		// A sum is implemented as a fresh variable that's simultaneously bound
+		// to each of the descendants.
+		id := &ident{name: fmt.Sprintf("sum%d", dec.nSums)}
+		dec.nSums++
+		dec.env = dec.env.bind(id, vs...)
+		return mk(Var{id: id})
+	}
+
+	return nil, fmt.Errorf("unknown node kind %d %v", node.Kind, node.Tag)
+}
+
+type yamlEncoder struct {
+	idp identPrinter
+	e   nonDetEnv // We track the environment for !repeat nodes.
+}
+
+// TODO: Switch some Value marshaling to Closure?
+
+func (c Closure) MarshalYAML() (any, error) {
+	// TODO: If the environment is trivial, just marshal the value.
+	enc := &yamlEncoder{}
+	return enc.closure(c), nil
+}
+
+func (c Closure) String() string {
+	b, err := yaml.Marshal(c)
+	if err != nil {
+		return fmt.Sprintf("marshal failed: %s", err)
+	}
+	return string(b)
+}
+
+func (v *Value) MarshalYAML() (any, error) {
+	enc := &yamlEncoder{}
+	return enc.value(v), nil
+}
+
+func (v *Value) String() string {
+	b, err := yaml.Marshal(v)
+	if err != nil {
+		return fmt.Sprintf("marshal failed: %s", err)
+	}
+	return string(b)
+}
+
+func (enc *yamlEncoder) closure(c Closure) *yaml.Node {
+	enc.e = c.env
+	var n yaml.Node
+	n.Kind = yaml.MappingNode
+	n.Tag = "!closure"
+	n.Content = make([]*yaml.Node, 4)
+	n.Content[0] = new(yaml.Node)
+	n.Content[0].SetString("env")
+	n.Content[2] = new(yaml.Node)
+	n.Content[2].SetString("in")
+	n.Content[3] = enc.value(c.val)
+	// Fill in the env after we've written the value in case value encoding
+	// affects the env.
+	n.Content[1] = enc.env(enc.e)
+	enc.e = nonDetEnv{} // Allow GC'ing the env
+	return &n
+}
+
+func (enc *yamlEncoder) env(e nonDetEnv) *yaml.Node {
+	var n yaml.Node
+	n.Kind = yaml.SequenceNode
+	n.Tag = "!env"
+	for _, term := range e.factors {
+		var nTerm yaml.Node
+		n.Content = append(n.Content, &nTerm)
+		nTerm.Kind = yaml.SequenceNode
+		for _, det := range term.terms {
+			var nDet yaml.Node
+			nTerm.Content = append(nTerm.Content, &nDet)
+			nDet.Kind = yaml.MappingNode
+			for i, val := range det.vals {
+				var nLabel yaml.Node
+				nLabel.SetString(enc.idp.unique(term.ids[i]))
+				nDet.Content = append(nDet.Content, &nLabel, enc.value(val))
+			}
+		}
+	}
+	return &n
+}
+
+var yamlIntRe = regexp.MustCompile(`^-?[0-9]+$`)
+
+func (enc *yamlEncoder) value(v *Value) *yaml.Node {
+	var n yaml.Node
+	switch d := v.Domain.(type) {
+	case nil:
+		// Not allowed by unmarshaler, but useful for understanding when
+		// something goes horribly wrong.
+		//
+		// TODO: We might be able to track useful provenance for this, which
+		// would really help with debugging unexpected bottoms.
+		n.SetString("_|_")
+		return &n
+
+	case Top:
+		n.SetString("_")
+		return &n
+
+	case Def:
+		n.Kind = yaml.MappingNode
+		for k, elt := range d.All() {
+			var kn yaml.Node
+			kn.SetString(k)
+			n.Content = append(n.Content, &kn, enc.value(elt))
+		}
+		n.HeadComment = v.PosString()
+		return &n
+
+	case Tuple:
+		n.Kind = yaml.SequenceNode
+		if d.repeat == nil {
+			for _, elt := range d.vs {
+				n.Content = append(n.Content, enc.value(elt))
+			}
+		} else {
+			if len(d.repeat) == 1 {
+				n.Tag = "!repeat"
+			} else {
+				n.Tag = "!repeat-unify"
+			}
+			// TODO: I'm not positive this will round-trip everything correctly.
+			for _, gen := range d.repeat {
+				v, e := gen(enc.e)
+				enc.e = e
+				n.Content = append(n.Content, enc.value(v))
+			}
+		}
+		return &n
+
+	case String:
+		switch d.kind {
+		case stringExact:
+			// Make this into a "nice" !!int node if I can.
+			if yamlIntRe.MatchString(d.exact) {
+				n.SetString(d.exact)
+				n.Tag = "tag:yaml.org,2002:int"
+				return &n
+			}
+			n.SetString(regexp.QuoteMeta(d.exact))
+			return &n
+		case stringRegex:
+			o := make([]string, 0, 1)
+			for _, re := range d.re {
+				s := re.String()
+				s = strings.TrimSuffix(strings.TrimPrefix(s, `\A(?:`), `)\z`)
+				o = append(o, s)
+			}
+			if len(o) == 1 {
+				n.SetString(o[0])
+				return &n
+			}
+			n.Encode(o)
+			n.Tag = "!regex"
+			return &n
+		}
+		panic("bad String kind")
+
+	case Var:
+		// TODO: If Var only appears once in the whole Value and is independent
+		// in the environment (part of a term that is only over Var), then emit
+		// this as a !sum instead.
+		if false {
+			var vs []*Value // TODO: Get values of this var.
+			if len(vs) == 1 {
+				return enc.value(vs[0])
+			}
+			n.Kind = yaml.SequenceNode
+			n.Tag = "!sum"
+			for _, elt := range vs {
+				n.Content = append(n.Content, enc.value(elt))
+			}
+			return &n
+		}
+		n.SetString(enc.idp.unique(d.id))
+		if !strings.HasPrefix(d.id.name, "$") {
+			n.Tag = "!var"
+		}
+		return &n
+	}
+	panic(fmt.Sprintf("unknown domain type %T", v.Domain))
+}
diff --git a/internal/unify/yaml_test.go b/internal/unify/yaml_test.go
new file mode 100644
index 00000000..af73001d
--- /dev/null
+++ b/internal/unify/yaml_test.go
@@ -0,0 +1,91 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unify
+
+import (
+	"bytes"
+	"fmt"
+	"iter"
+
+	"gopkg.in/yaml.v3"
+)
+
+func mustParse(expr string) Closure {
+	var c Closure
+	if err := yaml.Unmarshal([]byte(expr), &c); err != nil {
+		panic(err)
+	}
+	return c
+}
+
+func printYaml(val any) {
+	b, err := yaml.Marshal(val)
+	if err != nil {
+		panic(err)
+	}
+	var node yaml.Node
+	if err := yaml.Unmarshal(b, &node); err != nil {
+		panic(err)
+	}
+
+	// Map lines to start offsets. We'll use this to figure out when nodes are
+	// "small" and should use inline style.
+	lines := []int{-1, 0}
+	for pos := 0; pos < len(b); {
+		next := bytes.IndexByte(b[pos:], '\n')
+		if next == -1 {
+			break
+		}
+		pos += next + 1
+		lines = append(lines, pos)
+	}
+	lines = append(lines, len(b))
+
+	// Strip comments and switch small nodes to inline style
+	cleanYaml(&node, lines, len(b))
+
+	b, err = yaml.Marshal(&node)
+	if err != nil {
+		panic(err)
+	}
+	fmt.Println(string(b))
+}
+
+func cleanYaml(node *yaml.Node, lines []int, endPos int) {
+	node.HeadComment = ""
+	node.FootComment = ""
+	node.LineComment = ""
+
+	for i, n2 := range node.Content {
+		end2 := endPos
+		if i < len(node.Content)-1 {
+			end2 = lines[node.Content[i+1].Line]
+		}
+		cleanYaml(n2, lines, end2)
+	}
+
+	// Use inline style?
+	switch node.Kind {
+	case yaml.MappingNode, yaml.SequenceNode:
+		if endPos-lines[node.Line] < 40 {
+			node.Style = yaml.FlowStyle
+		}
+	}
+}
+
+func allYamlNodes(n *yaml.Node) iter.Seq[*yaml.Node] {
+	return func(yield func(*yaml.Node) bool) {
+		if !yield(n) {
+			return
+		}
+		for _, n2 := range n.Content {
+			for n3 := range allYamlNodes(n2) {
+				if !yield(n3) {
+					return
+				}
+			}
+		}
+	}
+}

From bd330f759f85522c5d585854ef9dcc217a26b712 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Sun, 20 Apr 2025 17:09:40 -0400
Subject: [PATCH 052/200] internal/simdgen: initial work on Go<->SIMD generator

This can parse XED data into a unifier structure, and unify it with
hand-written definitions of SIMD-to-Go mappings.

Change-Id: Ie89e328845cde5752ddb3013ebfccc167e85b0bf
Reviewed-on: https://go-review.googlesource.com/c/arch/+/667035
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/asm.yaml.toy    |  92 +++++++++++++
 internal/simdgen/categories.yaml |   7 +
 internal/simdgen/go.yaml         |  19 +++
 internal/simdgen/godefs.go       |  98 ++++++++++++++
 internal/simdgen/main.go         | 217 ++++++++++++++++++++++++++++++
 internal/simdgen/types.yaml      |  41 ++++++
 internal/simdgen/xed.go          | 221 +++++++++++++++++++++++++++++++
 7 files changed, 695 insertions(+)
 create mode 100644 internal/simdgen/asm.yaml.toy
 create mode 100644 internal/simdgen/categories.yaml
 create mode 100644 internal/simdgen/go.yaml
 create mode 100644 internal/simdgen/godefs.go
 create mode 100644 internal/simdgen/main.go
 create mode 100644 internal/simdgen/types.yaml
 create mode 100644 internal/simdgen/xed.go

diff --git a/internal/simdgen/asm.yaml.toy b/internal/simdgen/asm.yaml.toy
new file mode 100644
index 00000000..76970868
--- /dev/null
+++ b/internal/simdgen/asm.yaml.toy
@@ -0,0 +1,92 @@
+# Hand-written toy input like -xedPath would generate.
+# This input can be substituted for -xedPath.
+!sum
+- asm: ADDPS
+  goarch: amd64
+  feature: "SSE2"
+  in:
+    - asmPos: 0
+      base: float
+      bits: 32
+      w: 128
+    - asmPos: 1
+      base: float
+      bits: 32
+      w: 128
+  out:
+    - asmPos: 0
+      base: float
+      bits: 32
+      w: 128
+
+- asm: ADDPD
+  goarch: amd64
+  feature: "SSE2"
+  in:
+    - asmPos: 0
+      base: float
+      bits: 64
+      w: 128
+    - asmPos: 1
+      base: float
+      bits: 64
+      w: 128
+  out:
+    - asmPos: 0
+      base: float
+      bits: 64
+      w: 128
+
+- asm: PADDB
+  goarch: amd64
+  feature: "SSE2"
+  in:
+    - asmPos: 0
+      base: int|uint
+      bits: 32
+      w: 128
+    - asmPos: 1
+      base: int|uint
+      bits: 32
+      w: 128
+  out:
+    - asmPos: 0
+      base: int|uint
+      bits: 32
+      w: 128
+
+- asm: VPADDB
+  goarch: amd64
+  feature: "AVX"
+  in:
+    - asmPos: 1
+      base: int|uint
+      bits: 8
+      w: 128
+    - asmPos: 2
+      base: int|uint
+      bits: 8
+      w: 128
+  out:
+    - asmPos: 0
+      base: int|uint
+      bits: 8
+      w: 128
+
+- asm: VPADDB
+  goarch: amd64
+  feature: "AVX2"
+  in:
+    - asmPos: 1
+      base: int|uint
+      bits: 8
+      w: 256
+    - asmPos: 2
+      base: int|uint
+      bits: 8
+      w: 256
+  out:
+    - asmPos: 0
+      base: int|uint
+      bits: 8
+      w: 256
diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
new file mode 100644
index 00000000..b7be71ce
--- /dev/null
+++ b/internal/simdgen/categories.yaml
@@ -0,0 +1,7 @@
+!sum
+- go: Add
+  category: binary
+- go: AddSaturated
+  category: binary
+- go: Sub
+  category: binary
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
new file mode 100644
index 00000000..efa51303
--- /dev/null
+++ b/internal/simdgen/go.yaml
@@ -0,0 +1,19 @@
+!sum
+# For binary operations, we constrain their two inputs and one output to the
+# same Go type using a variable.
+- go: Add
+  asm: "V?PADD$xi|V?ADDP$xf"
+  in:
+  - go: $t
+  - go: $t
+  out:
+  - go: $t
+
+- go: Sub
+  goarch: amd64
+  asm: "V?PSUB$xi|V?SUBP$xf"
+  in:
+  - go: $t
+  - go: $t
+  out:
+  - go: $t
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
new file mode 100644
index 00000000..6a6ff4f5
--- /dev/null
+++ b/internal/simdgen/godefs.go
@@ -0,0 +1,98 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"io"
+	"log"
+
+	"golang.org/x/arch/internal/unify"
+)
+
+type Operation struct {
+	Go       string  // Go method name
+	Category *string // General operation category (optional)
+
+	GoArch string // GOARCH for this definition
+	Asm    string // Assembly mnemonic
+
+	In  []Operand // Arguments
+	Out []Operand // Results
+}
+
+type Operand struct {
+	Go     string // Go type of this operand
+	AsmPos int    // Position of this operand in the assembly instruction
+
+	Base string // Base Go type ("int", "uint", "float")
+	Bits int    // Element bit width
+	W    int    // Total vector bit width
+}
+
+func writeGoDefs(w io.Writer, cl unify.Closure) {
+	// TODO: Merge operations with the same signature but multiple
+	// implementations (e.g., SSE vs AVX)
+
+	// TODO: This code is embarrassing, but I'm very tired.
+
+	var op Operation
+	for def := range cl.All() {
+		if !def.Exact() {
+			continue
+		}
+		if err := def.Decode(&op); err != nil {
+			log.Println(err.Error())
+			continue
+		}
+
+		fmt.Fprintf(w, "func (x %s) %s(", op.In[0].Go, op.Go)
+		for i, arg := range op.In[1:] {
+			if i > 0 {
+				fmt.Fprint(w, ", ")
+			}
+			fmt.Fprintf(w, "%c %s", 'y'+i, arg.Go)
+		}
+		fmt.Fprintf(w, ") (")
+		for i, res := range op.Out {
+			if i > 0 {
+				fmt.Fprint(w, ", ")
+			}
+			fmt.Fprintf(w, "%c %s", 'o'+i, res.Go)
+		}
+		fmt.Fprintf(w, ") {\n")
+
+		asmPosToArg := make(map[int]byte)
+		asmPosToRes := make(map[int]byte)
+		for i, arg := range op.In {
+			asmPosToArg[arg.AsmPos] = 'x' + byte(i)
+		}
+		for i, res := range op.Out {
+			asmPosToRes[res.AsmPos] = 'o' + byte(i)
+		}
+		fmt.Fprintf(w, "\t// %s", op.Asm)
+		for i := 0; ; i++ {
+			arg, okArg := asmPosToArg[i]
+			if okArg {
+				fmt.Fprintf(w, " %c", arg)
+			}
+			res, okRes := asmPosToRes[i]
+			if okRes {
+				if okArg {
+					fmt.Fprintf(w, "/")
+				} else {
+					fmt.Fprintf(w, " ")
+				}
+				fmt.Fprintf(w, "%c", res)
+			}
+			if !okArg && !okRes {
+				break
+			}
+		}
+		fmt.Fprintf(w, "\n")
+
+		fmt.Fprintf(w, "}\n")
+	}
+}
diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
new file mode 100644
index 00000000..91aa07ce
--- /dev/null
+++ b/internal/simdgen/main.go
@@ -0,0 +1,217 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// simdgen is an experiment in generating Go <-> asm SIMD mappings.
+//
+// Usage: simdgen [-xedPath=path] [-q=query] input.yaml...
+//
+// If -xedPath is provided, one of the inputs is a sum of op-code definitions
+// generated from the Intel XED data at path.
+//
+// If input YAML files are provided, each file is read as an input value. See
+// [unify.Closure.UnmarshalYAML] or "go doc unify.Closure.UnmarshalYAML" for the
+// format of these files.
+//
+// TODO: Example definitions and values.
+//
+// The command unifies across all of the inputs and prints all possible results
+// of this unification.
+//
+// If the -q flag is provided, its string value is parsed as a value and treated
+// as another input to unification. This is intended as a way to "query" the
+// result, typically by narrowing it down to a small subset of results.
+//
+// Typical usage:
+//
+//	go run . -xedPath $XEDPATH *.yaml
+//
+// To see just the definitions generated from XED, run:
+//
+//	go run . -xedPath $XEDPATH
+//
+// (This works because if there's only one input, there's nothing to unify it
+// with, so the result is simply itself.)
+//
+// To see just the definitions for VPADDQ:
+//
+//	go run . -xedPath $XEDPATH -q '{asm: VPADDQ}'
+package main
+
+// Big TODOs:
+//
+// - This can produce duplicates, which can also lead to less efficient
+// environment merging. Add hashing and use it for deduplication. Be careful
+// about how this shows up in debug traces, since it could make things
+// confusing if we don't show it happening.
+//
+// - Do I need Closure, Value, and Domain? It feels like I should only need two
+// types.
+
+import (
+	"cmp"
+	"flag"
+	"fmt"
+	"log"
+	"maps"
+	"os"
+	"slices"
+	"strings"
+
+	"golang.org/x/arch/internal/unify"
+	"gopkg.in/yaml.v3"
+)
+
+var (
+	xedPath = flag.String("xedPath", "", "load XED datafiles from `path`")
+	flagQ   = flag.String("q", "", "query: read `def` as another input (skips final validation)")
+	flagO   = flag.String("o", "yaml", "output type: yaml, godefs")
+
+	flagDebugXED   = flag.Bool("debug-xed", false, "show XED instructions")
+	flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace")
+	flagDebugHTML  = flag.String("debug-html", "", "write unification trace to `file.html`")
+)
+
+var yamlSubs = strings.NewReplacer(
+	"$xi", "[BWDQ]", // x86 integer suffixes
+	"$xf", "[SD]", // x86 float suffixes
+)
+
+func main() {
+	flag.Parse()
+
+	var inputs []unify.Closure
+
+	// Load XED into a defs set.
+	if *xedPath != "" {
+		xedDefs := loadXED(*xedPath)
+		inputs = append(inputs, unify.NewSum(xedDefs...))
+	}
+
+	// Load query.
+	if *flagQ != "" {
+		r := strings.NewReader(*flagQ)
+		var def unify.Closure
+		if err := def.Unmarshal(r, unify.UnmarshalOpts{Path: "<query>", StringReplacer: yamlSubs.Replace}); err != nil {
+			log.Fatalf("parsing -q: %s", err)
+		}
+		inputs = append(inputs, def)
+	}
+
+	// Load defs files.
+	must := make(map[*unify.Value]struct{})
+	for _, path := range flag.Args() {
+		defs, err := loadValue(path)
+		if err != nil {
+			log.Fatal(err)
+		}
+		inputs = append(inputs, defs)
+
+		if path == "go.yaml" {
+			// These must all be used in the final result
+			for def := range defs.Summands() {
+				must[def] = struct{}{}
+			}
+		}
+	}
+
+	// Prepare for unification
+	if *flagDebugUnify {
+		unify.Debug.UnifyLog = os.Stderr
+	}
+	if *flagDebugHTML != "" {
+		f, err := os.Create(*flagDebugHTML)
+		if err != nil {
+			log.Fatal(err)
+		}
+		unify.Debug.HTML = f
+		defer f.Close()
+	}
+
+	// Unify!
+	unified, err := unify.Unify(inputs...)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	// Print results.
+	switch *flagO {
+	case "yaml":
+		// Produce a result that looks like encoding a slice, but stream it.
+		var val1 [1]*unify.Value
+		for val := range unified.All() {
+			val1[0] = val
+			// We have to make a new encoder each time or it'll print a document
+			// separator between each object.
+			enc := yaml.NewEncoder(os.Stdout)
+			if err := enc.Encode(val1); err != nil {
+				log.Fatal(err)
+			}
+			enc.Close()
+		}
+	case "godefs":
+		writeGoDefs(os.Stdout, unified)
+	}
+
+	// Validate results.
+	//
+	// Don't validate if this is a command-line query because that tends to
+	// eliminate lots of required defs and is used in cases where maybe defs
+	// aren't enumerable anyway.
+	if *flagQ == "" && len(must) > 0 {
+		validate(unified, must)
+	}
+}
+
+func loadValue(path string) (unify.Closure, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return unify.Closure{}, err
+	}
+	defer f.Close()
+
+	var c unify.Closure
+	if err := c.Unmarshal(f, unify.UnmarshalOpts{StringReplacer: yamlSubs.Replace}); err != nil {
+		return unify.Closure{}, fmt.Errorf("%s: %v", path, err)
+	}
+	return c, nil
+}
+
+func validate(cl unify.Closure, required map[*unify.Value]struct{}) {
+	// Validate that:
+	// 1. All final defs are exact
+	// 2. All required defs are used
+	for def := range cl.All() {
+		if _, ok := def.Domain.(unify.Def); !ok {
+			fmt.Fprintf(os.Stderr, "%s: expected Def, got %T\n", def.PosString(), def.Domain)
+			continue
+		}
+
+		if !def.Exact() {
+			fmt.Fprintf(os.Stderr, "%s: def not reduced to an exact value:\n", def.PosString())
+			fmt.Fprintf(os.Stderr, "\t%s\n", strings.ReplaceAll(def.String(), "\n", "\n\t"))
+		}
+
+		for root := range def.Provenance() {
+			delete(required, root)
+		}
+	}
+	// Report unused defs
+	unused := slices.SortedFunc(maps.Keys(required),
+		func(a, b *unify.Value) int {
+			return cmp.Or(
+				cmp.Compare(a.Pos().Path, b.Pos().Path),
+				cmp.Compare(a.Pos().Line, b.Pos().Line),
+			)
+		})
+	for _, def := range unused {
+		// TODO: Can we say anything more actionable? This is always a problem
+		// with unification: if it fails, it's very hard to point a finger at
+		// any particular reason. We could go back and try unifying this again
+		// with each subset of the inputs (starting with individual inputs) to
+		// at least say "it doesn't unify with anything in x.yaml". That's a lot
+		// of work, but if we have trouble debugging unification failure it may
+		// be worth it.
+		fmt.Fprintf(os.Stderr, "%s: def required, but did not unify\n", def.PosString())
+	}
+}
diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml
new file mode 100644
index 00000000..9397888c
--- /dev/null
+++ b/internal/simdgen/types.yaml
@@ -0,0 +1,41 @@
+# This file defines the possible types of each operand and result.
+#
+# In general, we're able to narrow this down on some attributes directly from
+# the machine instruction descriptions, but the Go mappings need to further
+# constrain them and how they relate. For example, on x86 we can't distinguish
+# int and uint, though we can distinguish these from float.
+
+in: !repeat
+- !sum &types
+  - {go: Int8x16,    base: "int",   bits: 8,  w: 128}
+  - {go: Uint8x16,   base: "uint",  bits: 8,  w: 128}
+  - {go: Int16x8,    base: "int",   bits: 16, w: 128}
+  - {go: Uint16x8,   base: "uint",  bits: 16, w: 128}
+  - {go: Int32x4,    base: "int",   bits: 32, w: 128}
+  - {go: Uint32x4,   base: "uint",  bits: 32, w: 128}
+  - {go: Int64x2,    base: "int",   bits: 64, w: 128}
+  - {go: Uint64x2,   base: "uint",  bits: 64, w: 128}
+  - {go: Float32x4,  base: "float", bits: 32, w: 128}
+  - {go: Float64x2,  base: "float", bits: 64, w: 128}
+  - {go: Int8x32,    base: "int",   bits: 8,  w: 256}
+  - {go: Uint8x32,   base: "uint",  bits: 8,  w: 256}
+  - {go: Int16x16,   base: "int",   bits: 16, w: 256}
+  - {go: Uint16x16,  base: "uint",  bits: 16, w: 256}
+  - {go: Int32x8,    base: "int",   bits: 32, w: 256}
+  - {go: Uint32x8,   base: "uint",  bits: 32, w: 256}
+  - {go: Int64x4,    base: "int",   bits: 64, w: 256}
+  - {go: Uint64x4,   base: "uint",  bits: 64, w: 256}
+  - {go: Float32x8,  base: "float", bits: 32, w: 256}
+  - {go: Float64x4,  base: "float", bits: 64, w: 256}
+  - {go: Int8x64,    base: "int",   bits: 8,  w: 512}
+  - {go: Uint8x64,   base: "uint",  bits: 8,  w: 512}
+  - {go: Int16x32,   base: "int",   bits: 16, w: 512}
+  - {go: Uint16x32,  base: "uint",  bits: 16, w: 512}
+  - {go: Int32x16,   base: "int",   bits: 32, w: 512}
+  - {go: Uint32x16,  base: "uint",  bits: 32, w: 512}
+  - {go: Int64x8,    base: "int",   bits: 64, w: 512}
+  - {go: Uint64x8,   base: "uint",  bits: 64, w: 512}
+  - {go: Float32x16, base: "float", bits: 32, w: 512}
+  - {go: Float64x8,  base: "float", bits: 64, w: 512}
+out: !repeat
+- *types
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
new file mode 100644
index 00000000..e500d713
--- /dev/null
+++ b/internal/simdgen/xed.go
@@ -0,0 +1,221 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"log"
+	"regexp"
+	"strconv"
+	"strings"
+
+	"golang.org/x/arch/internal/unify"
+	"golang.org/x/arch/x86/xeddata"
+	"gopkg.in/yaml.v3"
+)
+
+// TODO: Doc. Returns Values with Def domains.
+func loadXED(xedPath string) []*unify.Value {
+	// TODO: Obviously a bunch more to do here.
+
+	db, err := xeddata.NewDatabase(xedPath)
+	if err != nil {
+		log.Fatalf("open database: %v", err)
+	}
+
+	var defs []*unify.Value
+	err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) {
+		inst.Pattern = xeddata.ExpandStates(db, inst.Pattern)
+
+		switch {
+		case inst.RealOpcode == "N":
+			return // Skip unstable instructions
+		case !(strings.HasPrefix(inst.Extension, "SSE") || strings.HasPrefix(inst.Extension, "AVX")):
+			// We're only intested in SSE and AVX instuctions.
+			return // Skip non-AVX or SSE instructions
+		}
+
+		if *flagDebugXED {
+			fmt.Printf("%s:\n%+v\n", inst.Pos, inst)
+		}
+
+		ins, outs := decodeOperands(db, strings.Fields(inst.Operands))
+		// TODO: "feature"
+		fields := []string{"goarch", "asm", "in", "out"}
+		values := []*unify.Value{
+			unify.NewValue(unify.NewStringExact("amd64")),
+			unify.NewValue(unify.NewStringExact(inst.Opcode())),
+			unify.NewValue(ins),
+			unify.NewValue(outs),
+		}
+		pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line}
+		defs = append(defs, unify.NewValuePos(unify.NewDef(fields, values), pos))
+		if *flagDebugXED {
+			y, _ := yaml.Marshal(defs[len(defs)-1])
+			fmt.Printf("==>\n%s\n", y)
+		}
+	})
+	if err != nil {
+		log.Fatalf("walk insts: %v", err)
+	}
+	return defs
+}
+
+func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tuple) {
+	var inVals, outVals []*unify.Value
+	for asmPos, o := range operands {
+		op, err := xeddata.NewOperand(db, o)
+		if err != nil {
+			log.Fatalf("parsing operand %q: %v", o, err)
+		}
+		if *flagDebugXED {
+			fmt.Printf("  %+v\n", op)
+		}
+
+		// TODO: We should have a fixed set of fields once this gets more cleaned up.
+		var fields []string
+		var values []*unify.Value
+		add := func(f string, v *unify.Value) {
+			fields = append(fields, f)
+			values = append(values, v)
+		}
+
+		add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))
+
+		var r, w bool
+		switch op.Action {
+		case "r":
+			r = true
+		case "w":
+			w = true
+		case "rw":
+			r, w = true, true
+		default:
+			continue
+		}
+
+		lhs := op.NameLHS()
+		if strings.HasPrefix(lhs, "MEM") {
+			add("mem", unify.NewValue(unify.NewStringExact("true")))
+			add("w", unify.NewValue(unify.NewStringExact("TODO")))
+			add("base", unify.NewValue(unify.NewStringExact("TODO")))
+		} else if strings.HasPrefix(lhs, "REG") {
+			if op.Width == "mskw" {
+				add("mask", unify.NewValue(unify.NewStringExact("true")))
+				add("w", unify.NewValue(unify.NewStringExact("TODO")))
+				add("base", unify.NewValue(unify.NewStringExact("TODO")))
+			} else {
+				width, ok := decodeReg(op)
+				if !ok {
+					return
+				}
+				baseRe, bits, ok := decodeBits(op)
+				if !ok {
+					return
+				}
+				baseDomain, err := unify.NewStringRegex(baseRe)
+				if err != nil {
+					panic("parsing baseRe: " + err.Error())
+				}
+				add("bits", unify.NewValue(unify.NewStringExact(fmt.Sprint(bits))))
+				add("w", unify.NewValue(unify.NewStringExact(fmt.Sprint(width))))
+				add("base", unify.NewValue(baseDomain))
+			}
+		} else {
+			// TODO: Immediates
+			add("UNKNOWN", unify.NewValue(unify.NewStringExact(o)))
+		}
+		// dq => 128 bits (XMM)
+		// qq => 256 bits (YMM)
+		// mskw => K
+		// z[iuf?](8|16|32|...) => 512 bits (ZMM)
+		//
+		// Are these always XMM/YMM/ZMM or can other irregular things
+		// with large widths use these same codes?
+		//
+		// The only zi* is zi32. I don't understand the difference between
+		// zi32 and zu32 or why there are a bunch of zu* but only one zi.
+		//
+		// The xtype tells you the element type. i8, i16, i32, i64, etc.
+		//
+		// Things like AVX2 VPAND have an xtype of u256.
+		// I think we have to map that to all widths.
+		// There's no u512 (presumably those are all masked, so elem width matters).
+		// These are all Category: LOGICAL. Maybe we use that info?
+
+		if r {
+			inVal := unify.NewValue(unify.NewDef(fields, values))
+			inVals = append(inVals, inVal)
+		}
+		if w {
+			outVal := unify.NewValue(unify.NewDef(fields, values))
+			outVals = append(outVals, outVal)
+		}
+	}
+
+	return unify.NewTuple(inVals...), unify.NewTuple(outVals...)
+}
+
+func decodeReg(op *xeddata.Operand) (w int, ok bool) {
+	if !strings.HasPrefix(op.NameLHS(), "REG") {
+		return 0, false
+	}
+	// TODO: We shouldn't be relying on the macro naming conventions. We should
+	// use all-dec-patterns.txt, but xeddata doesn't support that table right now.
+	rhs := op.NameRHS()
+	if !strings.HasSuffix(rhs, "()") {
+		return 0, false
+	}
+	switch {
+	case strings.HasPrefix(rhs, "XMM_"):
+		return 128, true
+	case strings.HasPrefix(rhs, "YMM_"):
+		return 256, true
+	case strings.HasPrefix(rhs, "ZMM_"):
+		return 512, true
+	}
+	return 0, false
+}
+
+var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`)
+
+func decodeBits(op *xeddata.Operand) (baseRe string, bits int, ok bool) {
+	// Handle some weird ones.
+	switch op.Xtype {
+	// 8-bit float formats as defined by Open Compute Project "OCP 8-bit
+	// Floating Point Specification (OFP8)".
+	case "bf8", // E5M2 float
+		"hf8": // E4M3 float
+		return "", 0, false // TODO
+	case "bf16": // bfloat16 float
+		return "", 0, false // TODO
+	case "2f16":
+		// Complex consisting of 2 float16s. Doesn't exist in Go, but we can say
+		// what it would be.
+		return "complex", 32, true
+	case "2i8", "2I8":
+		// These just use the lower INT8 in each 16 bit field.
+		// As far as I can tell, "2I8" is a typo.
+		return "int", 8, true
+	}
+
+	// The rest follow a simple pattern.
+	m := xtypeRe.FindStringSubmatch(op.Xtype)
+	if m == nil {
+		// TODO: Report unrecognized xtype
+		return "", 0, false
+	}
+	bits, _ = strconv.Atoi(m[2])
+	switch m[1] {
+	case "i", "u":
+		// XED is rather inconsistent about what's signed, unsigned, or doesn't
+		// matter, so merge them together and let the Go definitions narrow as
+		// appropriate. Maybe there's a better way to do this.
+		baseRe = "int|uint"
+	case "f":
+		baseRe = "float"
+	}
+	return baseRe, bits, true
+}

From ca84b2ccdbe04587d9f0bddda2d3b22e4ee9f31b Mon Sep 17 00:00:00 2001
From: Koichi Shiraishi <zchee.io@gmail.com>
Date: Sat, 26 Apr 2025 04:48:13 +0900
Subject: [PATCH 053/200] go.mod: format require section

Change-Id: Ia54ecb1cd5f05c2502acd13f1cf3db5f66070d15
Reviewed-on: https://go-review.googlesource.com/c/arch/+/668335
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Sean Liao <sean@liao.dev>
---
 go.mod | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 72642f75..bcca36b6 100644
--- a/go.mod
+++ b/go.mod
@@ -2,6 +2,7 @@ module golang.org/x/arch
 
 go 1.23.0
 
-require rsc.io/pdf v0.1.1
-
-require gopkg.in/yaml.v3 v3.0.1
+require (
+	gopkg.in/yaml.v3 v3.0.1
+	rsc.io/pdf v0.1.1
+)

From 097aeb1e7accfa1038f6f6b6f31a1a6493e63d99 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Mon, 19 May 2025 20:09:00 -0400
Subject: [PATCH 054/200] internal/ximdgen: drop $xi/$xf shorthands

These were string shorthands for x86 integer and float suffixes,
respectively, but in practice they were easy to confuse with
unification variables (also $<varname>) and just required knowing more
stuff in order to understand the unification rules.

Drop them and just spell it out.

Change-Id: I38a0446e428a92c9e89be4e90d1beff16e48e714
Reviewed-on: https://go-review.googlesource.com/c/arch/+/674177
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/go.yaml | 4 ++--
 internal/simdgen/main.go | 9 ++-------
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index efa51303..a0665947 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -2,7 +2,7 @@
 # For binary operations, we constrain their two inputs and one output to the
 # same Go type using a variable.
 - go: Add
-  asm: "V?PADD$xi|V?ADDP$xf"
+  asm: "V?PADD[BWDQ]|V?ADDP[SD]"
   in:
   - go: $t
   - go: $t
@@ -11,7 +11,7 @@
 
 - go: Sub
   goarch: amd64
-  asm: "V?PSUB$xi|V?SUBP$xf"
+  asm: "V?PSUB[BWDQ]|V?SUBP[SD]"
   in:
   - go: $t
   - go: $t
diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index 91aa07ce..82c31c06 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -72,11 +72,6 @@ var (
 	flagDebugHTML  = flag.String("debug-html", "", "write unification trace to `file.html`")
 )
 
-var yamlSubs = strings.NewReplacer(
-	"$xi", "[BWDQ]", // x86 integer suffixes
-	"$xf", "[SD]", // x86 float suffixes
-)
-
 func main() {
 	flag.Parse()
 
@@ -92,7 +87,7 @@ func main() {
 	if *flagQ != "" {
 		r := strings.NewReader(*flagQ)
 		var def unify.Closure
-		if err := def.Unmarshal(r, unify.UnmarshalOpts{Path: "<query>", StringReplacer: yamlSubs.Replace}); err != nil {
+		if err := def.Unmarshal(r, unify.UnmarshalOpts{Path: "<query>"}); err != nil {
 			log.Fatalf("parsing -q: %s", err)
 		}
 		inputs = append(inputs, def)
@@ -171,7 +166,7 @@ func loadValue(path string) (unify.Closure, error) {
 	defer f.Close()
 
 	var c unify.Closure
-	if err := c.Unmarshal(f, unify.UnmarshalOpts{StringReplacer: yamlSubs.Replace}); err != nil {
+	if err := c.Unmarshal(f, unify.UnmarshalOpts{}); err != nil {
 		return unify.Closure{}, fmt.Errorf("%s: %v", path, err)
 	}
 	return c, nil

From 4fbd317a2cd482e27515e5ae3e271b2a10918c13 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 22 Apr 2025 20:33:28 -0400
Subject: [PATCH 055/200] internal/simdgen: support masks and immediates

This significantly reworks the XED loader and tweaks the operand
representation to support more than just vector registers. In
particular, each operand now has a "class" string that determines the
meaning of several other fields.

We add AVX-512 == and < to demonstrate both masks and immediates.

Change-Id: I6d025dbcb66e5914472b60697b3a7e4cc6174d78
Reviewed-on: https://go-review.googlesource.com/c/arch/+/667435
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/asm.yaml.toy |  75 +++---
 internal/simdgen/go.yaml      | 204 ++++++++++++++++
 internal/simdgen/godefs.go    |  87 +++++--
 internal/simdgen/types.yaml   |  73 +++---
 internal/simdgen/xed.go       | 421 +++++++++++++++++++++++++++-------
 x86/xeddata/operand.go        |   4 +-
 6 files changed, 691 insertions(+), 173 deletions(-)

diff --git a/internal/simdgen/asm.yaml.toy b/internal/simdgen/asm.yaml.toy
index 76970868..7885c776 100644
--- a/internal/simdgen/asm.yaml.toy
+++ b/internal/simdgen/asm.yaml.toy
@@ -6,87 +6,102 @@
   feature: "SSE2"
   in:
     - asmPos: 0
+      class: vreg
       base: float
-      bits: 32
-      w: 128
+      elemBits: 32
+      bits: 128
     - asmPos: 1
+      class: vreg
       base: float
-      bits: 32
-      w: 128
+      elemBits: 32
+      bits: 128
   out:
     - asmPos: 0
+      class: vreg
       base: float
-      bits: 32
-      w: 128
+      elemBits: 32
+      bits: 128
 
 - asm: ADDPD
   goarch: amd64
   feature: "SSE2"
   in:
     - asmPos: 0
+      class: vreg
       base: float
-      bits: 64
-      w: 128
+      elemBits: 64
+      bits: 128
     - asmPos: 1
+      class: vreg
       base: float
-      bits: 64
-      w: 128
+      elemBits: 64
+      bits: 128
   out:
     - asmPos: 0
+      class: vreg
       base: float
-      bits: 64
-      w: 128
+      elemBits: 64
+      bits: 128
 
 - asm: PADDB
   goarch: amd64
   feature: "SSE2"
   in:
     - asmPos: 0
+      class: vreg
       base: int|uint
-      bits: 32
-      w: 128
+      elemBits: 32
+      bits: 128
     - asmPos: 1
+      class: vreg
       base: int|uint
-      bits: 32
-      w: 128
+      elemBits: 32
+      bits: 128
   out:
     - asmPos: 0
+      class: vreg
       base: int|uint
-      bits: 32
-      w: 128
+      elemBits: 32
+      bits: 128
 
 - asm: VPADDB
   goarch: amd64
   feature: "AVX"
   in:
     - asmPos: 1
+      class: vreg
       base: int|uint
-      bits: 8
-      w: 128
+      elemBits: 8
+      bits: 128
     - asmPos: 2
+      class: vreg
       base: int|uint
-      bits: 8
-      w: 128
+      elemBits: 8
+      bits: 128
   out:
     - asmPos: 0
+      class: vreg
       base: int|uint
-      bits: 8
-      w: 128
+      elemBits: 8
+      bits: 128
 
 - asm: VPADDB
   goarch: amd64
   feature: "AVX2"
   in:
     - asmPos: 1
+      class: vreg
       base: int|uint
-      bits: 8
-      w: 256
+      elemBits: 8
+      bits: 256
     - asmPos: 2
+      class: vreg
       base: int|uint
-      bits: 8
-      w: 256
+      elemBits: 8
+      bits: 256
   out:
     - asmPos: 0
+      class: vreg
       base: int|uint
-      bits: 8
-      w: 256
+      elemBits: 8
+      bits: 256
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index a0665947..6a6ca1eb 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -17,3 +17,207 @@
   - go: $t
   out:
   - go: $t
+
+#
+# AVX-512 Comparisons
+#
+
+# TODO(austin): I'm not happy with how much copy-pasting this requires. We could
+# do a functional abstraction, but that feels bolted on. Unification is *almost*
+# capable of collapsing all of this.
+#
+# One thing that might work is having a !let node that lets you extend the
+# nonDetEnv with explicit values:
+#
+#     !let
+#     - {$go: Equal,       $imm: 0, $mask: K0}
+#     - {$go: EqualMasked, $imm: 0, $mask: _}
+#     - {$go: Less,        $imm: 1, $mask: K0}
+#     - {$go: LessMasked,  $imm: 1, $mask: _}
+#     - !let
+#       - {$asm: "VPCMP[BWDQ]",  $base: int}
+#       - {$asm: "VPCMPU[BWDQ]", $base: uint}
+#       - go: $go
+#         asm: $asm
+#         in:
+#         - const: $mask
+#         - base: $base
+#           go: $t
+#         - base: $base
+#           go: $t
+#         - class: immediate
+#           const: $imm
+#         out:
+#         - class: mask
+#
+# That's not bad, but it's very hierachical. CUE has a "mixin" approach to this.
+#
+#     - !unify
+#       # All AVX-512 comparisons have the same basic operand shape
+#       - {in: [_, {go: $t}, {go: $t}, _], out: [{class: mask}]}
+#       # There are signed and unsigned variants
+#       - !sum
+#         - {asm: "VPCMP[BWDQ]", in: [_, {base: int}, {base: int}, _]}
+#         - {asm: "VPCMPU[BWDQ]", in: [_, {base: uint}, {base: uint}, _]}
+#       # Finally, list out the operations.
+#       - !let
+#         - $equal:    {in: [_, _, _, {class: immedate, const: 0}]}
+#           $less:     {in: [_, _, _, {class: immedate, const: 1}]}
+#           $masked:   _
+#           $unmasked: {in: [const: K0, _, _, _]}
+#         - !sum
+#           - !unify [go: Equal,       $equal, $unmasked]
+#           - !unify [go: EqualMasked, $equal, $masked]
+#           - !unify [go: Less,        $less, $unmasked]
+#           - !unify [go: LessMasked,  $less, $masked]
+#
+# Maybe !let is just a feature of !sum that introduces an environment factor for
+# all following branches? That would let me do the above in-line with the big
+# top-level !sum:
+#
+#     - !sum
+#       ...
+#       - !let # Adds a factor that is the sum of the following terms:
+#         - {$go: Equal,       $imm: 0, $mask: K0}
+#         - {$go: EqualMasked, $imm: 0, $mask: _}
+#         - {$go: Less,        $imm: 1, $mask: K0}
+#         - {$go: LessMasked,  $imm: 1, $mask: _}
+#       - !let # Adds another factor:
+#         - {$asm: "VPCMP[BWDQ]",  $base: int}
+#         - {$asm: "VPCMPU[BWDQ]", $base: uint}
+#       - go: $go
+#         asm: $asm
+#         in:
+#         - const: $mask
+#         - base: $base
+#           go: $t
+#         - base: $base
+#           go: $t
+#         - class: immediate
+#           const: $imm
+#         out:
+#         - class: mask
+#
+# I may need to choose names more carefully in that case. This is a general
+# problem with names being file-global. (This is less of a problem with the
+# mixin style because those names tend to be more specific anyway.) Or maybe it
+# makes sense for each !let to introduce fresh idents, even if the string names
+# are the same?
+
+- go: Equal
+  goarch: amd64
+  asm: "VPCMP[BWDQ]"  # Signed comparison
+  in:
+  - const: K0
+  - base: int
+    go: $t
+  - base: int
+    go: $t
+  - class: immediate
+    const: 0
+  out:
+  - class: mask
+
+- go: EqualMasked
+  goarch: amd64
+  asm: "VPCMP[BWDQ]"  # Signed comparison
+  in:
+  - _
+  - base: int
+    go: $t
+  - base: int
+    go: $t
+  - class: immediate
+    const: 0
+  out:
+  - class: mask
+
+- go: Equal
+  goarch: amd64
+  asm: "VPCMPU[BWDQ]"  # Unsigned comparison
+  in:
+  - const: K0
+  - base: uint
+    go: $t
+  - base: uint
+    go: $t
+  - class: immediate
+    const: 0
+  out:
+  - class: mask
+
+- go: EqualMasked
+  goarch: amd64
+  asm: "VPCMPU[BWDQ]"  # Unsigned comparison
+  in:
+  - _
+  - base: uint
+    go: $t
+  - base: uint
+    go: $t
+  - class: immediate
+    const: 0
+  out:
+  - class: mask
+
+- go: Less
+  goarch: amd64
+  asm: "VPCMP[BWDQ]"  # Signed comparison
+  in:
+  - const: K0
+  - base: int
+    go: $t
+  - base: int
+    go: $t
+  - class: immediate
+    const: 1
+  out:
+  - class: mask
+
+- go: LessMasked
+  goarch: amd64
+  asm: "VPCMP[BWDQ]"  # Signed comparison
+  in:
+  - _
+  - base: int
+    go: $t
+  - base: int
+    go: $t
+  - class: immediate
+    const: 1
+  out:
+  - class: mask
+
+- go: Less
+  goarch: amd64
+  asm: "VPCMPU[BWDQ]"  # Unsigned comparison
+  in:
+  - const: K0
+  - base: uint
+    go: $t
+  - base: uint
+    go: $t
+  - class: immediate
+    const: 1
+  out:
+  - class: mask
+
+- go: LessMasked
+  goarch: amd64
+  asm: "VPCMPU[BWDQ]"  # Unsigned comparison
+  in:
+  - _
+  - base: uint
+    go: $t
+  - base: uint
+    go: $t
+  - class: immediate
+    const: 1
+  out:
+  - class: mask
+
+# TODO:
+# 2: OP := LE;
+# 4: OP := NEQ;
+# 5: OP := NLT;
+# 6: OP := NLE;
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 6a6ff4f5..037c11fa 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -8,6 +8,7 @@ import (
 	"fmt"
 	"io"
 	"log"
+	"slices"
 
 	"golang.org/x/arch/internal/unify"
 )
@@ -24,14 +25,31 @@ type Operation struct {
 }
 
 type Operand struct {
-	Go     string // Go type of this operand
-	AsmPos int    // Position of this operand in the assembly instruction
+	Class string
 
-	Base string // Base Go type ("int", "uint", "float")
-	Bits int    // Element bit width
-	W    int    // Total vector bit width
+	Go     *string // Go type of this operand
+	AsmPos int     // Position of this operand in the assembly instruction
+
+	Base     *string // Base Go type ("int", "uint", "float")
+	ElemBits *int    // Element bit width
+	Bits     int     // Total vector bit width
+
+	Const *string // Optional constant value
+}
+
+func (o Operand) Compare(p Operand) int {
+	// Put mask operands after others
+	if o.Class != "mask" && p.Class == "mask" {
+		return -1
+	}
+	if o.Class == "mask" && p.Class != "mask" {
+		return 1
+	}
+	return 0
 }
 
+var argNames = []string{"x", "y", "z", "w"}
+
 func writeGoDefs(w io.Writer, cl unify.Closure) {
 	// TODO: Merge operations with the same signature but multiple
 	// implementations (e.g., SSE vs AVX)
@@ -45,39 +63,64 @@ func writeGoDefs(w io.Writer, cl unify.Closure) {
 		}
 		if err := def.Decode(&op); err != nil {
 			log.Println(err.Error())
+			log.Println(def)
 			continue
 		}
 
-		fmt.Fprintf(w, "func (x %s) %s(", op.In[0].Go, op.Go)
-		for i, arg := range op.In[1:] {
-			if i > 0 {
-				fmt.Fprint(w, ", ")
+		in := slices.Clone(op.In)
+		slices.SortStableFunc(in, Operand.Compare)
+		out := slices.Clone(op.Out)
+		slices.SortStableFunc(out, Operand.Compare)
+
+		type argExtra struct {
+			*Operand
+			varName string
+		}
+		asmPosToArg := make(map[int]argExtra)
+		asmPosToRes := make(map[int]argExtra)
+		argNames := argNames
+
+		fmt.Fprintf(w, "func (%s %s) %s(", argNames[0], *in[0].Go, op.Go)
+		asmPosToArg[in[0].AsmPos] = argExtra{&in[0], argNames[0]}
+		argNames = argNames[1:]
+		i := 0
+		for _, arg := range in[1:] {
+			varName := ""
+
+			// Drop operands with constant values
+			if arg.Const == nil {
+				if i > 0 {
+					fmt.Fprint(w, ", ")
+				}
+				i++
+				varName = argNames[0]
+				fmt.Fprintf(w, "%s %s", varName, *arg.Go)
+				argNames = argNames[1:]
 			}
-			fmt.Fprintf(w, "%c %s", 'y'+i, arg.Go)
+			asmPosToArg[arg.AsmPos] = argExtra{&arg, varName}
 		}
 		fmt.Fprintf(w, ") (")
-		for i, res := range op.Out {
+		for i, res := range out {
 			if i > 0 {
 				fmt.Fprint(w, ", ")
 			}
-			fmt.Fprintf(w, "%c %s", 'o'+i, res.Go)
+			varName := string('o' + byte(i))
+			fmt.Fprintf(w, "%s %s", varName, *res.Go)
+			asmPosToRes[res.AsmPos] = argExtra{&res, varName}
 		}
 		fmt.Fprintf(w, ") {\n")
 
-		asmPosToArg := make(map[int]byte)
-		asmPosToRes := make(map[int]byte)
-		for i, arg := range op.In {
-			asmPosToArg[arg.AsmPos] = 'x' + byte(i)
-		}
-		for i, res := range op.Out {
-			asmPosToRes[res.AsmPos] = 'o' + byte(i)
-		}
 		fmt.Fprintf(w, "\t// %s", op.Asm)
 		for i := 0; ; i++ {
 			arg, okArg := asmPosToArg[i]
 			if okArg {
-				fmt.Fprintf(w, " %c", arg)
+				if arg.Const != nil {
+					fmt.Fprintf(w, " %s", *arg.Const)
+				} else {
+					fmt.Fprintf(w, " %s", arg.varName)
+				}
 			}
+
 			res, okRes := asmPosToRes[i]
 			if okRes {
 				if okArg {
@@ -85,7 +128,7 @@ func writeGoDefs(w io.Writer, cl unify.Closure) {
 				} else {
 					fmt.Fprintf(w, " ")
 				}
-				fmt.Fprintf(w, "%c", res)
+				fmt.Fprintf(w, "%s", res.varName)
 			}
 			if !okArg && !okRes {
 				break
diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml
index 9397888c..9092224e 100644
--- a/internal/simdgen/types.yaml
+++ b/internal/simdgen/types.yaml
@@ -7,35 +7,48 @@
 
 in: !repeat
 - !sum &types
-  - {go: Int8x16,    base: "int",   bits: 8,  w: 128}
-  - {go: Uint8x16,   base: "uint",  bits: 8,  w: 128}
-  - {go: Int16x8,    base: "int",   bits: 16, w: 128}
-  - {go: Uint16x8,   base: "uint",  bits: 16, w: 128}
-  - {go: Int32x4,    base: "int",   bits: 32, w: 128}
-  - {go: Uint32x4,   base: "uint",  bits: 32, w: 128}
-  - {go: Int64x2,    base: "int",   bits: 64, w: 128}
-  - {go: Uint64x2,   base: "uint",  bits: 64, w: 128}
-  - {go: Float32x4,  base: "float", bits: 32, w: 128}
-  - {go: Float64x2,  base: "float", bits: 64, w: 128}
-  - {go: Int8x32,    base: "int",   bits: 8,  w: 256}
-  - {go: Uint8x32,   base: "uint",  bits: 8,  w: 256}
-  - {go: Int16x16,   base: "int",   bits: 16, w: 256}
-  - {go: Uint16x16,  base: "uint",  bits: 16, w: 256}
-  - {go: Int32x8,    base: "int",   bits: 32, w: 256}
-  - {go: Uint32x8,   base: "uint",  bits: 32, w: 256}
-  - {go: Int64x4,    base: "int",   bits: 64, w: 256}
-  - {go: Uint64x4,   base: "uint",  bits: 64, w: 256}
-  - {go: Float32x8,  base: "float", bits: 32, w: 256}
-  - {go: Float64x4,  base: "float", bits: 64, w: 256}
-  - {go: Int8x64,    base: "int",   bits: 8,  w: 512}
-  - {go: Uint8x64,   base: "uint",  bits: 8,  w: 512}
-  - {go: Int16x32,   base: "int",   bits: 16, w: 512}
-  - {go: Uint16x32,  base: "uint",  bits: 16, w: 512}
-  - {go: Int32x16,   base: "int",   bits: 32, w: 512}
-  - {go: Uint32x16,  base: "uint",  bits: 32, w: 512}
-  - {go: Int64x8,    base: "int",   bits: 64, w: 512}
-  - {go: Uint64x8,   base: "uint",  bits: 64, w: 512}
-  - {go: Float32x16, base: "float", bits: 32, w: 512}
-  - {go: Float64x8,  base: "float", bits: 64, w: 512}
+  - {class: vreg, go: Int8x16,    base: "int",   elemBits: 8,  bits: 128}
+  - {class: vreg, go: Uint8x16,   base: "uint",  elemBits: 8,  bits: 128}
+  - {class: vreg, go: Int16x8,    base: "int",   elemBits: 16, bits: 128}
+  - {class: vreg, go: Uint16x8,   base: "uint",  elemBits: 16, bits: 128}
+  - {class: vreg, go: Int32x4,    base: "int",   elemBits: 32, bits: 128}
+  - {class: vreg, go: Uint32x4,   base: "uint",  elemBits: 32, bits: 128}
+  - {class: vreg, go: Int64x2,    base: "int",   elemBits: 64, bits: 128}
+  - {class: vreg, go: Uint64x2,   base: "uint",  elemBits: 64, bits: 128}
+  - {class: vreg, go: Float32x4,  base: "float", elemBits: 32, bits: 128}
+  - {class: vreg, go: Float64x2,  base: "float", elemBits: 64, bits: 128}
+  - {class: vreg, go: Int8x32,    base: "int",   elemBits: 8,  bits: 256}
+  - {class: vreg, go: Uint8x32,   base: "uint",  elemBits: 8,  bits: 256}
+  - {class: vreg, go: Int16x16,   base: "int",   elemBits: 16, bits: 256}
+  - {class: vreg, go: Uint16x16,  base: "uint",  elemBits: 16, bits: 256}
+  - {class: vreg, go: Int32x8,    base: "int",   elemBits: 32, bits: 256}
+  - {class: vreg, go: Uint32x8,   base: "uint",  elemBits: 32, bits: 256}
+  - {class: vreg, go: Int64x4,    base: "int",   elemBits: 64, bits: 256}
+  - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 64, bits: 256}
+  - {class: vreg, go: Float32x8,  base: "float", elemBits: 32, bits: 256}
+  - {class: vreg, go: Float64x4,  base: "float", elemBits: 64, bits: 256}
+  - {class: vreg, go: Int8x64,    base: "int",   elemBits: 8,  bits: 512}
+  - {class: vreg, go: Uint8x64,   base: "uint",  elemBits: 8,  bits: 512}
+  - {class: vreg, go: Int16x32,   base: "int",   elemBits: 16, bits: 512}
+  - {class: vreg, go: Uint16x32,  base: "uint",  elemBits: 16, bits: 512}
+  - {class: vreg, go: Int32x16,   base: "int",   elemBits: 32, bits: 512}
+  - {class: vreg, go: Uint32x16,  base: "uint",  elemBits: 32, bits: 512}
+  - {class: vreg, go: Int64x8,    base: "int",   elemBits: 64, bits: 512}
+  - {class: vreg, go: Uint64x8,   base: "uint",  elemBits: 64, bits: 512}
+  - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512}
+  - {class: vreg, go: Float64x8,  base: "float", elemBits: 64, bits: 512}
+  - {class: mask, go: Mask8x16,  elemBits: 8,  bits: 128}
+  - {class: mask, go: Mask16x8,  elemBits: 16, bits: 128}
+  - {class: mask, go: Mask32x4,  elemBits: 32, bits: 128}
+  - {class: mask, go: Mask64x2,  elemBits: 64, bits: 128}
+  - {class: mask, go: Mask8x32,  elemBits: 8,  bits: 256}
+  - {class: mask, go: Mask16x16, elemBits: 16, bits: 256}
+  - {class: mask, go: Mask32x8,  elemBits: 32, bits: 256}
+  - {class: mask, go: Mask64x4,  elemBits: 64, bits: 256}
+  - {class: mask, go: Mask8x64,  elemBits: 8,  bits: 512}
+  - {class: mask, go: Mask16x32, elemBits: 16, bits: 512}
+  - {class: mask, go: Mask32x16, elemBits: 32, bits: 512}
+  - {class: mask, go: Mask64x8,  elemBits: 64, bits: 512}
+  - {class: immediate} # TODO
 out: !repeat
 - *types
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index e500d713..292411cb 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -41,7 +41,11 @@ func loadXED(xedPath string) []*unify.Value {
 			fmt.Printf("%s:\n%+v\n", inst.Pos, inst)
 		}
 
-		ins, outs := decodeOperands(db, strings.Fields(inst.Operands))
+		ins, outs, err := decodeOperands(db, strings.Fields(inst.Operands))
+		if err != nil {
+			log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err)
+			return
+		}
 		// TODO: "feature"
 		fields := []string{"goarch", "asm", "in", "out"}
 		values := []*unify.Value{
@@ -63,102 +67,296 @@ func loadXED(xedPath string) []*unify.Value {
 	return defs
 }
 
-func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tuple) {
-	var inVals, outVals []*unify.Value
-	for asmPos, o := range operands {
-		op, err := xeddata.NewOperand(db, o)
+type operandCommon struct {
+	action operandAction
+}
+
+// operandAction defines whether this operand is read and/or written.
+//
+// TODO: Should this live in [xeddata.Operand]?
+type operandAction struct {
+	r  bool // Read
+	w  bool // Written
+	cr bool // Read is conditional (implies r==true)
+	cw bool // Write is conditional (implies w==true)
+}
+
+type operandMem struct {
+	operandCommon
+	// TODO
+}
+
+type vecShape struct {
+	elemBits int // Element size in bits
+	bits     int // Register width in bits (total vector bits)
+}
+
+type operandVReg struct { // Vector register
+	operandCommon
+	vecShape
+	elemBaseType scalarBaseType
+}
+
+// operandMask is a vector mask.
+//
+// Regardless of the actual mask representation, the [vecShape] of this operand
+// corresponds to the "bit for bit" type of mask. That is, elemBits gives the
+// element width covered by each mask element, and bits/elemBits gives the total
+// number of mask elements. (bits gives the total number of bits as if this were
+// a bit-for-bit mask, which may be meaningless on its own.)
+type operandMask struct {
+	operandCommon
+	vecShape
+}
+
+type operandImm struct {
+	operandCommon
+	bits int // Immediate size in bits
+}
+
+type operand interface {
+	common() operandCommon
+	toValue() (fields []string, vals []*unify.Value)
+}
+
+func strVal(s any) *unify.Value {
+	return unify.NewValue(unify.NewStringExact(fmt.Sprint(s)))
+}
+
+func (o operandCommon) common() operandCommon {
+	return o
+}
+
+func (o operandMem) toValue() (fields []string, vals []*unify.Value) {
+	// TODO: w, base
+	return []string{"class"}, []*unify.Value{strVal("memory")}
+}
+
+func (o operandVReg) toValue() (fields []string, vals []*unify.Value) {
+	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
+	if err != nil {
+		panic("parsing baseRe: " + err.Error())
+	}
+	return []string{"class", "elemBits", "bits", "base"}, []*unify.Value{
+		strVal("vreg"),
+		strVal(o.elemBits),
+		strVal(o.bits),
+		unify.NewValue(baseDomain)}
+}
+
+func (o operandMask) toValue() (fields []string, vals []*unify.Value) {
+	return []string{"class", "elemBits", "bits"}, []*unify.Value{strVal("mask"), strVal(o.elemBits), strVal(o.bits)}
+}
+
+func (o operandImm) toValue() (fields []string, vals []*unify.Value) {
+	return []string{"class", "bits"}, []*unify.Value{strVal("immediate"), strVal(o.bits)}
+}
+
+var actionEncoding = map[string]operandAction{
+	"r":   {r: true},
+	"cr":  {r: true, cr: true},
+	"w":   {w: true},
+	"cw":  {w: true, cw: true},
+	"rw":  {r: true, w: true},
+	"crw": {r: true, w: true, cr: true},
+	"rcw": {r: true, w: true, cw: true},
+}
+
+func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
+	op, err := xeddata.NewOperand(db, operand)
+	if err != nil {
+		log.Fatalf("parsing operand %q: %v", operand, err)
+	}
+	if *flagDebugXED {
+		fmt.Printf("  %+v\n", op)
+	}
+
+	// TODO: See xed_decoded_inst_operand_action. This might need to be more
+	// complicated.
+	action, ok := actionEncoding[op.Action]
+	if !ok {
+		return nil, fmt.Errorf("unknown action %q", op.Action)
+	}
+	common := operandCommon{action: action}
+
+	lhs := op.NameLHS()
+	if strings.HasPrefix(lhs, "MEM") {
+		// TODO: Width, base type
+		return operandMem{
+			operandCommon: common,
+		}, nil
+	} else if strings.HasPrefix(lhs, "REG") {
+		if op.Width == "mskw" {
+			// The mask operand doesn't specify a width. We have to infer it.
+			return operandMask{
+				operandCommon: common,
+			}, nil
+		} else {
+			regBits, ok := decodeReg(op)
+			if !ok {
+				return nil, fmt.Errorf("failed to decode register %q", operand)
+			}
+			baseType, elemBits, ok := decodeType(op)
+			if !ok {
+				return nil, fmt.Errorf("failed to decode register width %q", operand)
+			}
+			shape := vecShape{elemBits: elemBits, bits: regBits}
+			return operandVReg{
+				operandCommon: common,
+				vecShape:      shape,
+				elemBaseType:  baseType,
+			}, nil
+		}
+	} else if strings.HasPrefix(lhs, "IMM") {
+		_, bits, ok := decodeType(op)
+		if !ok {
+			return nil, fmt.Errorf("failed to decode register width %q", operand)
+		}
+		return operandImm{
+			operandCommon: common,
+			bits:          bits,
+		}, nil
+	}
+
+	// TODO: BASE and SEG
+	return nil, fmt.Errorf("unknown operand LHS %q in %q", lhs, operand)
+}
+
+func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tuple, err error) {
+	fail := func(err error) (unify.Tuple, unify.Tuple, error) {
+		return unify.Tuple{}, unify.Tuple{}, err
+	}
+
+	// Decode all of the operands.
+	var ops []operand
+	for _, o := range operands {
+		op, err := decodeOperand(db, o)
 		if err != nil {
-			log.Fatalf("parsing operand %q: %v", o, err)
+			return unify.Tuple{}, unify.Tuple{}, err
 		}
-		if *flagDebugXED {
-			fmt.Printf("  %+v\n", op)
-		}
-
-		// TODO: We should have a fixed set of fields once this gets more cleaned up.
-		var fields []string
-		var values []*unify.Value
-		add := func(f string, v *unify.Value) {
-			fields = append(fields, f)
-			values = append(values, v)
-		}
-
-		add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))
-
-		var r, w bool
-		switch op.Action {
-		case "r":
-			r = true
-		case "w":
-			w = true
-		case "rw":
-			r, w = true, true
-		default:
-			continue
-		}
-
-		lhs := op.NameLHS()
-		if strings.HasPrefix(lhs, "MEM") {
-			add("mem", unify.NewValue(unify.NewStringExact("true")))
-			add("w", unify.NewValue(unify.NewStringExact("TODO")))
-			add("base", unify.NewValue(unify.NewStringExact("TODO")))
-		} else if strings.HasPrefix(lhs, "REG") {
-			if op.Width == "mskw" {
-				add("mask", unify.NewValue(unify.NewStringExact("true")))
-				add("w", unify.NewValue(unify.NewStringExact("TODO")))
-				add("base", unify.NewValue(unify.NewStringExact("TODO")))
-			} else {
-				width, ok := decodeReg(op)
-				if !ok {
-					return
+		ops = append(ops, op)
+	}
+
+	// XED doesn't encode the size of mask operands. If there are mask operands,
+	// try to infer their sizes from other operands.
+	//
+	// This is a heuristic and it falls apart in some cases:
+	//
+	// - Mask operations like KAND[BWDQ] have *nothing* in the XED to indicate
+	// mask size.
+	//
+	// - VINSERT*, VPSLL*, VPSRA*, and VPSRL* and some others naturally have
+	// mixed input sizes and the XED doesn't indicate which operands the mask
+	// applies to.
+	//
+	// - VPDP* and VP4DP* have really complex mixed operand patterns.
+	//
+	// I think for these we may just have to hand-write a table of which
+	// operands each mask applies to.
+	inferMask := func(r, w bool) error {
+		var masks []int
+		var rSizes, wSizes, sizes []vecShape
+		for i, op := range ops {
+			action := op.common().action
+			if _, ok := op.(operandMask); ok {
+				if action.r && action.w {
+					return fmt.Errorf("unexpected rw mask")
+				}
+				if action.r == r || action.w == w {
+					masks = append(masks, i)
 				}
-				baseRe, bits, ok := decodeBits(op)
-				if !ok {
-					return
+			} else if reg, ok := op.(operandVReg); ok {
+				if action.r {
+					rSizes = append(rSizes, reg.vecShape)
 				}
-				baseDomain, err := unify.NewStringRegex(baseRe)
-				if err != nil {
-					panic("parsing baseRe: " + err.Error())
+				if action.w {
+					wSizes = append(wSizes, reg.vecShape)
 				}
-				add("bits", unify.NewValue(unify.NewStringExact(fmt.Sprint(bits))))
-				add("w", unify.NewValue(unify.NewStringExact(fmt.Sprint(width))))
-				add("base", unify.NewValue(baseDomain))
 			}
-		} else {
-			// TODO: Immediates
-			add("UNKNOWN", unify.NewValue(unify.NewStringExact(o)))
-		}
-		// dq => 128 bits (XMM)
-		// qq => 256 bits (YMM)
-		// mskw => K
-		// z[iuf?](8|16|32|...) => 512 bits (ZMM)
-		//
-		// Are these always XMM/YMM/ZMM or can other irregular things
-		// with large widths use these same codes?
-		//
-		// The only zi* is zi32. I don't understand the difference between
-		// zi32 and zu32 or why there are a bunch of zu* but only one zi.
-		//
-		// The xtype tells you the element type. i8, i16, i32, i64, etc.
-		//
-		// Things like AVX2 VPAND have an xtype of u256.
-		// I think we have to map that to all widths.
-		// There's no u512 (presumably those are all masked, so elem width matters).
-		// These are all Category: LOGICAL. Maybe we use that info?
+		}
+		if len(masks) == 0 {
+			return nil
+		}
 
 		if r {
+			sizes = rSizes
+			if len(sizes) == 0 {
+				sizes = wSizes
+			}
+		}
+		if w {
+			sizes = wSizes
+			if len(sizes) == 0 {
+				sizes = rSizes
+			}
+		}
+
+		if len(sizes) == 0 {
+			return fmt.Errorf("cannot infer mask size: no register operands")
+		}
+		shape, ok := singular(sizes)
+		if !ok {
+			return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes)
+		}
+		for _, i := range masks {
+			m := ops[i].(operandMask)
+			m.vecShape = shape
+			ops[i] = m
+		}
+		return nil
+	}
+	if err := inferMask(true, false); err != nil {
+		return fail(err)
+	}
+	if err := inferMask(false, true); err != nil {
+		return fail(err)
+	}
+
+	var inVals, outVals []*unify.Value
+	for asmPos, op := range ops {
+		fields, values := op.toValue()
+
+		fields = append(fields, "asmPos")
+		values = append(values, unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))
+
+		action := op.common().action
+		if action.r {
 			inVal := unify.NewValue(unify.NewDef(fields, values))
 			inVals = append(inVals, inVal)
 		}
-		if w {
+		if action.w {
 			outVal := unify.NewValue(unify.NewDef(fields, values))
 			outVals = append(outVals, outVal)
 		}
 	}
 
-	return unify.NewTuple(inVals...), unify.NewTuple(outVals...)
+	return unify.NewTuple(inVals...), unify.NewTuple(outVals...), nil
+}
+
+func singular[T comparable](xs []T) (T, bool) {
+	if len(xs) == 0 {
+		return *new(T), false
+	}
+	for _, x := range xs[1:] {
+		if x != xs[0] {
+			return *new(T), false
+		}
+	}
+	return xs[0], true
 }
 
 func decodeReg(op *xeddata.Operand) (w int, ok bool) {
+	// op.Width tells us the total width, e.g.,:
+	//
+	//    dq => 128 bits (XMM)
+	//    qq => 256 bits (YMM)
+	//    mskw => K
+	//    z[iuf?](8|16|32|...) => 512 bits (ZMM)
+	//
+	// But the encoding is really weird and it's not clear if these *always*
+	// mean XMM/YMM/ZMM or if other irregular things can use these large widths.
+	// Hence, we dig into the register sets themselves.
+
 	if !strings.HasPrefix(op.NameLHS(), "REG") {
 		return 0, false
 	}
@@ -181,31 +379,75 @@ func decodeReg(op *xeddata.Operand) (w int, ok bool) {
 
 var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`)
 
-func decodeBits(op *xeddata.Operand) (baseRe string, bits int, ok bool) {
+// scalarBaseType describes the base type of a scalar element. This is a Go
+// type, but without the bit width suffix (with the exception of
+// scalarBaseIntOrUint).
+type scalarBaseType int
+
+const (
+	scalarBaseInt scalarBaseType = iota
+	scalarBaseUint
+	scalarBaseIntOrUint // Signed or unsigned is unspecified
+	scalarBaseFloat
+	scalarBaseComplex
+	scalarBaseBFloat
+	scalarBaseHFloat
+)
+
+func (s scalarBaseType) regex() string {
+	switch s {
+	case scalarBaseInt:
+		return "int"
+	case scalarBaseUint:
+		return "uint"
+	case scalarBaseIntOrUint:
+		return "int|uint"
+	case scalarBaseFloat:
+		return "float"
+	case scalarBaseComplex:
+		return "complex"
+	case scalarBaseBFloat:
+		return "BFloat"
+	case scalarBaseHFloat:
+		return "HFloat"
+	}
+	panic(fmt.Sprintf("unknown scalar base type %d", s))
+}
+
+func decodeType(op *xeddata.Operand) (base scalarBaseType, bits int, ok bool) {
+	// The xtype tells you the element type. i8, i16, i32, i64, f32, etc.
+	//
+	// TODO: Things like AVX2 VPAND have an xtype of u256 because they're
+	// element-width agnostic. Do I map that to all widths, or just omit the
+	// element width and let unification flesh it out? There's no u512
+	// (presumably those are all masked, so elem width matters). These are all
+	// Category: LOGICAL, so maybe we could use that info?
+
 	// Handle some weird ones.
 	switch op.Xtype {
 	// 8-bit float formats as defined by Open Compute Project "OCP 8-bit
 	// Floating Point Specification (OFP8)".
-	case "bf8", // E5M2 float
-		"hf8": // E4M3 float
-		return "", 0, false // TODO
+	case "bf8": // E5M2 float
+		return scalarBaseBFloat, 8, true
+	case "hf8": // E4M3 float
+		return scalarBaseHFloat, 8, true
 	case "bf16": // bfloat16 float
-		return "", 0, false // TODO
+		return scalarBaseBFloat, 16, true
 	case "2f16":
 		// Complex consisting of 2 float16s. Doesn't exist in Go, but we can say
 		// what it would be.
-		return "complex", 32, true
+		return scalarBaseComplex, 32, true
 	case "2i8", "2I8":
 		// These just use the lower INT8 in each 16 bit field.
 		// As far as I can tell, "2I8" is a typo.
-		return "int", 8, true
+		return scalarBaseInt, 8, true
 	}
 
 	// The rest follow a simple pattern.
 	m := xtypeRe.FindStringSubmatch(op.Xtype)
 	if m == nil {
 		// TODO: Report unrecognized xtype
-		return "", 0, false
+		return 0, 0, false
 	}
 	bits, _ = strconv.Atoi(m[2])
 	switch m[1] {
@@ -213,9 +455,10 @@ func decodeBits(op *xeddata.Operand) (baseRe string, bits int, ok bool) {
 		// XED is rather inconsistent about what's signed, unsigned, or doesn't
 		// matter, so merge them together and let the Go definitions narrow as
 		// appropriate. Maybe there's a better way to do this.
-		baseRe = "int|uint"
+		return scalarBaseIntOrUint, bits, true
 	case "f":
-		baseRe = "float"
+		return scalarBaseFloat, bits, true
+	default:
+		panic("unreachable")
 	}
-	return baseRe, bits, true
 }
diff --git a/x86/xeddata/operand.go b/x86/xeddata/operand.go
index 8de99d67..33c5610f 100644
--- a/x86/xeddata/operand.go
+++ b/x86/xeddata/operand.go
@@ -44,8 +44,8 @@ type Operand struct {
 
 	// Action describes argument types.
 	//
-	// Possible values: "r", "w", "rw", "cr", "cw", "crw".
-	// Optional "c" prefix represents conditional access.
+	// Possible values: "r", "w", "rw", "cr", "cw", "crw", "rcw".
+	// The "c" prefix before "r" or "w" represents conditional read or write.
 	Action string
 
 	// Width descriptor. It can express simple width like "w" (word, 16bit)

From 302262805d806c94f67ec5ee3585f3ddbb1caa84 Mon Sep 17 00:00:00 2001
From: Vishwanatha HD <Vishwanatha.HD@ibm.com>
Date: Wed, 9 Apr 2025 12:17:20 +0000
Subject: [PATCH 056/200] s390x/s390xasm: fix plan9 disassembly regressions on
 s390x

Regressions were seen in plan9 disassembled syntax on s390x machines.
Raising a CL to fix all of them.

Below are the plan9 syntax regressions noticed:
1) LARL was printed instead of MOVD.
2) Operands for LGDR and LCDBR were printed in a reverse order.
3) MADBR was printed instead of FMADD.
4) VFM was printed instead of WFMDB. Also the mask fields were unnecessarily getting printed.
5) VFS was printed instead of WFSDB. Also the mask fields were unnecessarily getting printed.
6) JMP was printed instead of BR.
7) JMP R14 was printed instead of RET.
8) BRC was printed instead of BVS.

Change-Id: I9166f8ab51ad827bfeeed24a219ceb9b8c41c470
Reviewed-on: https://go-review.googlesource.com/c/arch/+/663756
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
---
 s390x/s390xasm/decode_test.go      |  9 --------
 s390x/s390xasm/plan9.go            | 35 ++++++++++++++++++++----------
 s390x/s390xasm/testdata/decode.txt |  8 +++++++
 3 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/s390x/s390xasm/decode_test.go b/s390x/s390xasm/decode_test.go
index 29bce8e8..047ebeed 100644
--- a/s390x/s390xasm/decode_test.go
+++ b/s390x/s390xasm/decode_test.go
@@ -30,15 +30,6 @@ func TestDecode(t *testing.T) {
 	}
 }
 
-// Provide a fake symbol to verify PCrel argument decoding.
-func symlookup(pc uint64) (string, uint64) {
-	foopc := uint64(0x100000)
-	if pc >= foopc && pc < foopc+0x10 {
-		return "foo", foopc
-	}
-	return "", 0
-}
-
 func decode(data []byte, t *testing.T, filename string) {
 	all := string(data)
 	// Simulate PC based on number of instructions found in the test file.
diff --git a/s390x/s390xasm/plan9.go b/s390x/s390xasm/plan9.go
index 95464294..482433b4 100644
--- a/s390x/s390xasm/plan9.go
+++ b/s390x/s390xasm/plan9.go
@@ -142,7 +142,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		case LRVH:
 			op = "MOVHBR"
 		}
-	case LA, LAY:
+	case LA, LAY, LARL:
 		args[0], args[1] = args[1], args[0]
 		op = "MOVD"
 
@@ -349,6 +349,17 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 	case SLBGR:
 		op = "SUBE"
 		args[0], args[1] = args[1], args[0]
+	case MADBR:
+		op = "FMADD"
+		args[0], args[1], args[2] = args[1], args[2], args[0]
+	case VFM:
+		op = "WFMDB"
+		args[0], args[1], args[2] = args[1], args[2], args[0]
+		args = args[0:3]
+	case VFS:
+		op = "WFSDB"
+		args[0], args[1], args[2] = args[2], args[1], args[0]
+		args = args[0:3]
 	case MSGFR, MHI, MSFI, MSGFI:
 		switch inst.Op {
 		case MSGFR, MHI, MSFI:
@@ -500,16 +511,16 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		if err != nil {
 			return fmt.Sprintf("GoSyntax: error in converting Atoi:%s", err)
 		}
-		opStr, check := branchOnConditionOp(mask, inst.Op)
+		opStr := branchOnConditionOp(mask, inst.Op)
 		if opStr != "" {
 			op = opStr
 		}
 		if op == "SYNC" || op == "NOPH" {
 			return op
 		}
-		if check {
-			args[0] = args[1]
-			args = args[:1]
+		if op == "RET" {
+			args = args[:0]
+			return op
 		}
 	case LOCGR:
 		mask, err := strconv.Atoi(args[2][1:])
@@ -1036,6 +1047,9 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 // branch on relative mnemonic.
 func branch_relative_op(mask int, opconst Op) (op string, check bool) {
 	switch mask & 0xf {
+	case 1:
+		op = "BVS"
+		check = true
 	case 2:
 		op = "BGT"
 		check = true
@@ -1061,7 +1075,7 @@ func branch_relative_op(mask int, opconst Op) (op string, check bool) {
 		op = "BLEU"
 		check = true
 	case 15:
-		op = "JMP" // BR
+		op = "BR"
 		check = true
 	}
 	return op, check
@@ -1069,17 +1083,16 @@ func branch_relative_op(mask int, opconst Op) (op string, check bool) {
 
 // This function returns corresponding extended mnemonic for the given
 // brach on condition mnemonic.
-func branchOnConditionOp(mask int, opconst Op) (op string, check bool) {
+func branchOnConditionOp(mask int, opconst Op) (op string) {
 	switch mask & 0xf {
 	case 0:
 		op = "NOPH"
 	case 14:
 		op = "SYNC"
 	case 15:
-		op = "JMP"
-		check = true
+		op = "RET"
 	}
-	return op, check
+	return op
 }
 
 // This function returns corresponding plan9 mnemonic for the native bitwise mnemonic.
@@ -1260,7 +1273,7 @@ func reverseOperandOrder(op Op) bool {
 	switch op {
 	case LOCR, MLGR:
 		return true
-	case LTEBR, LTDBR:
+	case LTEBR, LTDBR, LCDBR, LGDR:
 		return true
 	case VLEIB, VLEIH, VLEIF, VLEIG, VPDI:
 		return true
diff --git a/s390x/s390xasm/testdata/decode.txt b/s390x/s390xasm/testdata/decode.txt
index f04715b2..29a5e699 100644
--- a/s390x/s390xasm/testdata/decode.txt
+++ b/s390x/s390xasm/testdata/decode.txt
@@ -14,6 +14,7 @@
     b9e24098|	plan9	MOVDLT R8, R9
     b9e270ba|	plan9	MOVDNE R10, R11
     b9f23012|	plan9	LOCR $3, R2, R1
+    b3130020|	plan9	LCDBR F0, F2
     b9e27065|	plan9	MOVDNE R5, R6
 e310f0000004|	plan9	MOVD (R15), R1
 e320f0000014|	plan9	MOVW (R15), R2
@@ -122,6 +123,13 @@ c017fffffffe|	plan9	XORW $-2, R1
     b93a0008|	plan9	KDSA R0, R8
     b9296024|	plan9	KMA R2, R6, R4
     b92d6024|	plan9	KMCTR R2, R6, R4
+    b31e0042|	plan9	FMADD F4, F2, F0
+e748a00830e7|	plan9	WFMDB V8, V10, V4
+e743000830e2|	plan9	WFSDB V0, V3, V4
+    b3cd0026|	plan9	LGDR F6, R2
+    a7f4008c|	plan9	BR 70(PC)
+    a7140005|	plan9	BVS 2(PC)
+        07fe|	plan9	RET
 e743400000f3|	plan9	VAB V3, V4
 e743600000f3|	plan9	VAB V3, V6, V4
 e743400010f3|	plan9	VAH V3, V4

From b0f513ddd30597a7a8c8ec3e32df653a6b016054 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 21 Apr 2025 19:48:03 +0000
Subject: [PATCH 057/200] internal/simdgen: complete the godef code gen

This generator supports generating the godefs of SIMD instructions with
some shape constraints, please check gen_utility.go for the details.

Change-Id: I87151740e161919c67c3f20c0258de6611e4955e
Reviewed-on: https://go-review.googlesource.com/c/arch/+/667155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/.gitignore                   |   1 +
 internal/simdgen/categories.yaml              | 119 +++-
 internal/simdgen/gen_simdGenericOps.go        |  57 ++
 internal/simdgen/gen_simdIntrinsics.go        | 109 ++++
 internal/simdgen/gen_simdMachineOps.go        | 130 ++++
 internal/simdgen/gen_simdTypes.go             | 227 +++++++
 internal/simdgen/gen_simdrules.go             | 146 +++++
 internal/simdgen/gen_simdssa.go               | 210 +++++++
 internal/simdgen/gen_utility.go               | 581 ++++++++++++++++++
 internal/simdgen/go.yaml                      | 558 +++++++++++------
 internal/simdgen/godefs.go                    | 170 +++--
 internal/simdgen/main.go                      |  61 +-
 internal/simdgen/ops/AddSub/categories.yaml   |  29 +
 internal/simdgen/ops/AddSub/go.yaml           | 101 +++
 .../simdgen/ops/BitwiseLogic/categories.yaml  |  31 +
 internal/simdgen/ops/BitwiseLogic/go.yaml     | 149 +++++
 internal/simdgen/ops/Compares/categories.yaml |  19 +
 internal/simdgen/ops/Compares/go.yaml         |  57 ++
 .../simdgen/ops/FPonlyArith/categories.yaml   |   8 +
 internal/simdgen/ops/FPonlyArith/go.yaml      |  18 +
 internal/simdgen/ops/Mul/categories.yaml      |  35 ++
 internal/simdgen/ops/Mul/go.yaml              | 116 ++++
 internal/simdgen/ops/main.go                  |  75 +++
 internal/simdgen/types.yaml                   |  86 +--
 internal/simdgen/xed.go                       |  52 +-
 25 files changed, 2817 insertions(+), 328 deletions(-)
 create mode 100644 internal/simdgen/.gitignore
 create mode 100644 internal/simdgen/gen_simdGenericOps.go
 create mode 100644 internal/simdgen/gen_simdIntrinsics.go
 create mode 100644 internal/simdgen/gen_simdMachineOps.go
 create mode 100644 internal/simdgen/gen_simdTypes.go
 create mode 100644 internal/simdgen/gen_simdrules.go
 create mode 100644 internal/simdgen/gen_simdssa.go
 create mode 100644 internal/simdgen/gen_utility.go
 create mode 100644 internal/simdgen/ops/AddSub/categories.yaml
 create mode 100644 internal/simdgen/ops/AddSub/go.yaml
 create mode 100644 internal/simdgen/ops/BitwiseLogic/categories.yaml
 create mode 100644 internal/simdgen/ops/BitwiseLogic/go.yaml
 create mode 100644 internal/simdgen/ops/Compares/categories.yaml
 create mode 100644 internal/simdgen/ops/Compares/go.yaml
 create mode 100644 internal/simdgen/ops/FPonlyArith/categories.yaml
 create mode 100644 internal/simdgen/ops/FPonlyArith/go.yaml
 create mode 100644 internal/simdgen/ops/Mul/categories.yaml
 create mode 100644 internal/simdgen/ops/Mul/go.yaml
 create mode 100644 internal/simdgen/ops/main.go

diff --git a/internal/simdgen/.gitignore b/internal/simdgen/.gitignore
new file mode 100644
index 00000000..1cc9ae43
--- /dev/null
+++ b/internal/simdgen/.gitignore
@@ -0,0 +1 @@
+testdata/*
diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index b7be71ce..1f2fb056 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -1,7 +1,118 @@
 !sum
 - go: Add
-  category: binary
-- go: AddSaturated
-  category: binary
+  commutative: "true"
+  extension: "AVX.*"
+- go: SaturatedAdd
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedAdd
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedSaturatedAdd
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
 - go: Sub
-  category: binary
+  commutative: "true"
+  extension: "AVX.*"
+- go: SaturatedSub
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedSub
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedSaturatedSub
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: And
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedAnd
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: Or
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedOr
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: AndNot
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedAndNot
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: Xor
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedXor
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+# We also have PTEST and VPTERNLOG, those should be hidden from the users
+# and only appear in rewrite rules.
+- go: Equal
+  constImm: 0
+  commutative: "true"
+  extension: "AVX.*"
+- go: Greater
+  constImm: 6
+  commutative: "false"
+  extension: "AVX.*"
+- go: MaskedEqual
+  constImm: 0
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedGreater
+  constImm: 6
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+- go: Div
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedDiv
+  commutative: "true"
+  masked: "true"
+  extension: "AVX.*"
+- go: Mul
+  commutative: "true"
+  extension: "AVX.*"
+- go: MulEvenWiden
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+- go: MulHigh
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+- go: MulLow
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
+- go: MaskedMul
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedMulEvenWiden
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+- go: MaskedMulHigh
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+- go: MaskedMulLow
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
new file mode 100644
index 00000000..2b0fa008
--- /dev/null
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -0,0 +1,57 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"sort"
+)
+
+const simdGenericOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+package main
+
+func simdGenericOps() []opData {
+	return []opData{
+{{- range . }}
+		{name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}},
+{{- end }}
+	}
+}
+`
+
+// writeSIMDGenericOps generates the generic ops and writes it to simdAMD64ops.go
+// within the specified directory.
+func writeSIMDGenericOps(directory string, ops []Operation) error {
+	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go", simdGenericOpsTmpl)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+	type genericOpsData struct {
+		sortKey string
+		OpName  string
+		OpInLen int
+		Comm    string
+	}
+	opsData := make([]genericOpsData, 0)
+	for _, op := range ops {
+		_, _, _, _, _, gOp, err := op.shape()
+		if err != nil {
+			return err
+		}
+		genericNames := gOp.Go + *gOp.In[0].Go
+		opsData = append(opsData, genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative})
+	}
+	sort.Slice(opsData, func(i, j int) bool {
+		return opsData[i].sortKey < opsData[j].sortKey
+	})
+
+	err = t.Execute(file, opsData)
+	if err != nil {
+		return fmt.Errorf("failed to execute template: %w", err)
+	}
+
+	return nil
+}
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
new file mode 100644
index 00000000..5d4a27f2
--- /dev/null
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -0,0 +1,109 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+)
+
+const simdIntrinsicsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+package ssagen
+
+import (
+	"cmd/compile/internal/ir"
+	"cmd/compile/internal/ssa"
+	"cmd/compile/internal/types"
+	"cmd/internal/sys"
+)
+
+func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
+{{- range .OpsLen1}}
+	addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{- end}}
+{{- range .OpsLen2}}
+	addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{- end}}
+{{- range .OpsLen3}}
+	addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{- end}}
+
+{{- range .VectorConversions }}
+	addF("internal/simd", "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
+{{- end}}
+
+{{- range $size, $ts := .TypeMap }}
+{{- range $t := $ts }}
+	addF("internal/simd", "Load{{$t.Name}}", simdLoad(), sys.AMD64)
+	addF("internal/simd", "{{$t.Name}}.Store", simdStore(), sys.AMD64)
+{{- end}}
+{{- end}}
+{{- range .Masks }}
+	addF("internal/simd", "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
+	addF("internal/simd", "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
+	addF("internal/simd", "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
+	addF("internal/simd", "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
+{{- end}}
+}
+
+func opLen1(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+		return s.newValue1(op, t, args[0])
+	}
+}
+
+func opLen2(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+		return s.newValue2(op, t, args[0], args[1])
+	}
+}
+
+func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+		return s.newValue3(op, t, args[0], args[1], args[2])
+	}
+}
+
+func simdLoad() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+		return s.newValue2(ssa.OpLoad, n.Type(), args[0], s.mem())
+	}
+}
+
+func simdStore() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+		s.store(args[0].Type, args[1], args[0])
+		return nil
+	}
+}
+`
+
+// writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go
+// within the specified directory.
+func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap) error {
+	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssagen/simdintrinsics.go", simdIntrinsicsTmpl)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+	opsLen1, opsLen2, opsLen3, err := genericOpsByLen(ops)
+	if err != nil {
+		return err
+	}
+
+	type templateData struct {
+		OpsLen1           []Operation
+		OpsLen2           []Operation
+		OpsLen3           []Operation
+		TypeMap           simdTypeMap
+		VectorConversions []simdTypePair
+		Masks             []simdType
+	}
+	err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, typeMap, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)})
+	if err != nil {
+		return fmt.Errorf("failed to execute template: %w", err)
+	}
+
+	return nil
+}
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
new file mode 100644
index 00000000..0deec9c6
--- /dev/null
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -0,0 +1,130 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"sort"
+)
+
+const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+package main
+
+func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp2m1fp1, fp2m1m1 regInfo) []opData {
+	return []opData{
+{{- range .OpsData }}
+		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}"},
+{{- end }}
+{{- range .OpsDataImm }}
+		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "Int8", commutative: {{.Comm}}, typ: "{{.Type}}"},
+{{- end }}
+	}
+}
+`
+
+// writeSIMDMachineOps generates the machine ops and writes it to simdAMD64ops.go
+// within the specified directory.
+func writeSIMDMachineOps(directory string, ops []Operation) error {
+	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go", simdMachineOpsTmpl)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+	type opData struct {
+		sortKey string
+		OpName  string
+		Asm     string
+		OpInLen int
+		RegInfo string
+		Comm    string
+		Type    string
+	}
+	type machineOpsData struct {
+		OpsData    []opData
+		OpsDataImm []opData
+	}
+	seen := map[string]struct{}{}
+	regInfoSet := map[string]bool{"fp1fp1": true, "fp2fp1": true, "fp2m1": true, "fp2m1fp1": true, "fp2m1m1": true}
+	opsData := make([]opData, 0)
+	opsDataImm := make([]opData, 0)
+	for _, op := range ops {
+		shapeIn, shapeOut, maskType, _, _, gOp, err := op.shape()
+		if err != nil {
+			return err
+		}
+		asm := gOp.Asm
+		if maskType == OneMask {
+			asm += "Masked"
+		}
+		asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits)
+		// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
+		// one here with a name suffix "Merging". The rewrite rules will need them.
+		if _, ok := seen[asm]; ok {
+			continue
+		}
+		seen[asm] = struct{}{}
+		var regInfo string
+		// Process input reg shapes.
+		var vRegInCnt, kMaskInCnt, vRegOutCnt, kMaskOutCnt int
+		for _, in := range gOp.In {
+			if in.Class == "vreg" {
+				vRegInCnt++
+			} else if in.Class == "mask" {
+				kMaskInCnt++
+			}
+		}
+		for _, out := range gOp.Out {
+			// If class overwrite is happening, that's not really a mask but a vreg.
+			if out.Class == "vreg" || out.OverwriteClass != nil {
+				vRegOutCnt++
+			} else if out.Class == "mask" {
+				kMaskOutCnt++
+			}
+		}
+		var vRegInS, kMaskInS, vRegOutS, kMaskOutS string
+		if vRegInCnt > 0 {
+			vRegInS = fmt.Sprintf("fp%d", vRegInCnt)
+		}
+		if kMaskInCnt > 0 {
+			kMaskInS = fmt.Sprintf("m%d", kMaskInCnt)
+		}
+		if vRegOutCnt > 0 {
+			vRegOutS = fmt.Sprintf("fp%d", vRegOutCnt)
+		}
+		if kMaskOutCnt > 0 {
+			kMaskOutS = fmt.Sprintf("m%d", kMaskOutCnt)
+		}
+		regInfo = fmt.Sprintf("%s%s%s%s", vRegInS, kMaskInS, vRegOutS, kMaskOutS)
+		if _, ok := regInfoSet[regInfo]; !ok {
+			return fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s", regInfo)
+		}
+		var outType string
+		if shapeOut == OneVregOut || gOp.Out[0].OverwriteClass != nil {
+			// If class overwrite is happening, that's not really a mask but a vreg.
+			outType = fmt.Sprintf("Vec%d", *gOp.Out[0].Bits)
+		} else if shapeOut == OneKmaskOut {
+			outType = "Mask"
+		} else {
+			return fmt.Errorf("simdgen does not recognize this output shape: %+v", shapeOut)
+		}
+		if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn {
+			opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType})
+		} else {
+			opsData = append(opsData, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType})
+		}
+	}
+	sort.Slice(opsData, func(i, j int) bool {
+		return opsData[i].sortKey < opsData[j].sortKey
+	})
+	sort.Slice(opsDataImm, func(i, j int) bool {
+		return opsDataImm[i].sortKey < opsDataImm[j].sortKey
+	})
+	err = t.Execute(file, machineOpsData{opsData, opsDataImm})
+	if err != nil {
+		return fmt.Errorf("failed to execute template: %w", err)
+	}
+
+	return nil
+}
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
new file mode 100644
index 00000000..14395010
--- /dev/null
+++ b/internal/simdgen/gen_simdTypes.go
@@ -0,0 +1,227 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"strings"
+)
+
+type simdType struct {
+	Name                    string // The go type name of this simd type, for example Int32x4.
+	Lanes                   int    // The number of elements in this vector/mask.
+	Base                    string // The element's type, like for Int32x4 it will be int32.
+	Fields                  string // The struct fields, it should be right formatted.
+	Type                    string // Either "mask" or "vreg"
+	VectorCounterpart       string // For mask use only: just replacing the "Mask" in [simdType.Name] with "Int"
+	ReshapedVectorWithAndOr string // For mask use only: vector AND and OR are only available in some shape with element width 32.
+	Size                    int    // The size of the type
+}
+
+type simdTypeMap map[int][]simdType
+
+type simdTypePair struct {
+	Tsrc simdType
+	Tdst simdType
+}
+
+const simdTypesTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+package simd
+
+{{- range $size, $ts := .TypeMap }}
+
+// v{{$size}} is a tag type that tells the compiler that this is really {{$size}}-bit SIMD
+type v{{$size}} struct {
+	_{{$size}} struct{}
+}
+
+{{- range $i, $tsrc := $ts }}
+
+// {{$tsrc.Name}} is a {{$size}}-bit SIMD vector of {{$tsrc.Lanes}} {{$tsrc.Base}}
+type {{$tsrc.Name}} struct {
+{{$tsrc.Fields}}
+}
+
+// Len returns the number of elements in a {{$tsrc.Name}}
+func (x {{$tsrc.Name}}) Len() int { return {{$tsrc.Lanes}} }
+
+// Load{{$tsrc.Name}} loads a {{$tsrc.Name}} from an array
+//
+//go:noescape
+func Load{{$tsrc.Name}}(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}}) {{$tsrc.Name}}
+
+// Store stores a {{$tsrc.Name}} to an array
+//
+//go:noescape
+func (x {{$tsrc.Name}}) Store(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}})
+
+{{- end}}
+{{- end}}
+`
+
+const simdStubsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+package simd
+
+{{- range .OpsLen1}}
+
+// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}}
+func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}
+
+{{- end}}
+{{- range .OpsLen2}}
+
+// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}
+
+{{- end}}
+{{- range .OpsLen3}}
+
+// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}
+
+{{- end}}
+{{- range .VectorConversions }}
+
+// {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}}
+func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}})
+
+{{- end}}
+{{- range .Masks }}
+
+// converts from {{.Name}} to {{.VectorCounterpart}}
+func (from {{.Name}}) As{{.VectorCounterpart}}() (to {{.VectorCounterpart}})
+
+// converts from {{.VectorCounterpart}} to {{.Name}}
+func (from {{.VectorCounterpart}}) As{{.Name}}() (to {{.Name}})
+
+func (x {{.Name}}) And(y {{.Name}}) {{.Name}}
+
+func (x {{.Name}}) Or(y {{.Name}}) {{.Name}}
+
+{{- end}}
+`
+
+// parseSIMDTypes groups go simd types by their vector sizes, and
+// returns a map whose key is the vector size, value is the simd type.
+func parseSIMDTypes(ops []Operation) simdTypeMap {
+	// TODO: maybe instead of going over ops, let's try go over types.yaml.
+	ret := map[int][]simdType{}
+	seen := map[string]struct{}{}
+	processArg := func(arg Operand) {
+		if arg.Class == "immediate" {
+			// Immediates are not encoded as vector types.
+			return
+		}
+		if _, ok := seen[*arg.Go]; ok {
+			return
+		}
+		seen[*arg.Go] = struct{}{}
+		lanes := *arg.Bits / *arg.ElemBits
+		base := fmt.Sprintf("%s%d", *arg.Base, *arg.ElemBits)
+		tagFieldNameS := fmt.Sprintf("%sx%d", base, lanes)
+		tagFieldS := fmt.Sprintf("%s v%d", tagFieldNameS, *arg.Bits)
+		valFieldS := fmt.Sprintf("vals%s[%d]%s", strings.Repeat(" ", len(tagFieldNameS)-3), lanes, base)
+		fields := fmt.Sprintf("\t%s\n\t%s", tagFieldS, valFieldS)
+		if arg.Class == "mask" {
+			vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int")
+			reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32)
+			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits})
+			// In case the vector counterpart of a mask is not present, put its vector counterpart typedef into the map as well.
+			if _, ok := seen[vectorCounterpart]; !ok {
+				seen[vectorCounterpart] = struct{}{}
+				ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits})
+			}
+		} else {
+			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits})
+		}
+	}
+	for _, op := range ops {
+		for _, arg := range op.In {
+			processArg(arg)
+		}
+		for _, arg := range op.Out {
+			processArg(arg)
+		}
+	}
+	return ret
+}
+
+func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair {
+	v := []simdTypePair{}
+	for _, ts := range typeMap {
+		for i, tsrc := range ts {
+			for j, tdst := range ts {
+				if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" {
+					v = append(v, simdTypePair{tsrc, tdst})
+				}
+			}
+		}
+	}
+	return v
+}
+
+func masksFromTypeMap(typeMap simdTypeMap) []simdType {
+	m := []simdType{}
+	for _, ts := range typeMap {
+		for _, tsrc := range ts {
+			if tsrc.Type == "mask" {
+				m = append(m, tsrc)
+			}
+		}
+	}
+	return m
+}
+
+// writeSIMDTypes generates the simd vector type and writes it to types_amd64.go
+// within the specified directory.
+func writeSIMDTypes(directory string, typeMap simdTypeMap) error {
+	file, t, err := openFileAndPrepareTemplate(directory, "src/internal/simd/types_amd64.go", simdTypesTmpl)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	type templateData struct {
+		TypeMap simdTypeMap
+	}
+
+	err = t.Execute(file, templateData{typeMap})
+	if err != nil {
+		return fmt.Errorf("failed to execute template: %w", err)
+	}
+
+	return nil
+}
+
+// writeSIMDStubs generates the simd vector intrinsic stubs and writes it to stubs_amd64.go
+// within the specified directory.
+func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) error {
+	file, t, err := openFileAndPrepareTemplate(directory, "src/internal/simd/stubs_amd64.go", simdStubsTmpl)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+	opsLen1, opsLen2, opsLen3, err := genericOpsByLen(ops)
+	if err != nil {
+		return err
+	}
+
+	type templateData struct {
+		OpsLen1           []Operation
+		OpsLen2           []Operation
+		OpsLen3           []Operation
+		VectorConversions []simdTypePair
+		Masks             []simdType
+	}
+
+	err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)})
+	if err != nil {
+		return fmt.Errorf("failed to execute template : %w", err)
+	}
+
+	return nil
+}
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
new file mode 100644
index 00000000..c368c770
--- /dev/null
+++ b/internal/simdgen/gen_simdrules.go
@@ -0,0 +1,146 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"sort"
+)
+
+const simdrulesTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+// The AVX instruction encodings orders vector register from right to left, for example:
+// VSUBPS X Y Z means Z=Y-X
+// The rules here swapped the order of such X and Y because the ssa to prog lowering in simdssa.go assumes a
+// left to right order.
+// TODO: we should offload the logic to simdssa.go, instead of here.
+//
+// Masks are always at the end, immediates always at the beginning.
+
+{{- range .Ops }}
+{{if eq (len .In) 1}}({{.Go}}{{(index .In 0).Go}} x) => ({{.Asm}} x){{end}}{{if eq (len .In) 2}}({{.Go}}{{(index .In 0).Go}} x y) => ({{.Asm}} y x){{end}}
+{{- end }}
+{{- range .OpsImm }}
+({{.Go}}{{(index .In 1).Go}} x y) => ({{.Asm}} [{{(index .In 0).Const}}] y x)
+{{- end }}
+{{- range .OpsMask}}
+({{.Go}}{{(index .In 0).Go}} x y z) => ({{.Asm}} y x (VPMOVVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}}ToM <types.TypeMask> z))
+{{- end }}
+{{- range .OpsImmMask}}
+({{.Go}}{{(index .In 1).Go}} x y z) => ({{.Asm}} [{{(index .In 0).Const}}] y x (VPMOVVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}}ToM <types.TypeMask> z))
+{{- end }}
+{{- range .OpsMaskOut}}
+({{.Go}}{{(index .In 0).Go}} x y) => (VPMOVMToVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}} ({{.Asm}} y x))
+{{- end }}
+{{- range .OpsImmInMaskOut}}
+({{.Go}}{{(index .In 1).Go}} x y) => (VPMOVMToVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}} ({{.Asm}} [{{(index .In 0).Const}}] y x))
+{{- end }}
+{{- range .OpsMaskInMaskOut}}
+({{.Go}}{{(index .In 0).Go}} x y z) => (VPMOVMToVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}} ({{.Asm}} y x (VPMOVVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}}ToM <types.TypeMask> z)))
+{{- end }}
+{{- range .OpsImmMaskInMaskOut}}
+({{.Go}}{{(index .In 1).Go}} x y z) => (VPMOVMToVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}} ({{.Asm}} [{{(index .In 0).Const}}] y x (VPMOVVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}}ToM <types.TypeMask> z)))
+{{- end }}
+`
+
+// writeSIMDRules generates the lowering and rewrite rules for ssa and writes it to simdAMD64.rules
+// within the specified directory.
+func writeSIMDRules(directory string, ops []Operation) error {
+	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules", simdrulesTmpl)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+	Ops := make([]Operation, 0)
+	OpsImm := make([]Operation, 0)
+	OpsMask := make([]Operation, 0)
+	OpsImmMask := make([]Operation, 0)
+	OpsMaskOut := make([]Operation, 0)
+	OpsImmInMaskOut := make([]Operation, 0)
+	OpsMaskInMaskOut := make([]Operation, 0)
+	OpsImmMaskInMaskOut := make([]Operation, 0)
+
+	for _, op := range ops {
+		opInShape, opOutShape, maskType, _, op, _, err := op.shape()
+		if err != nil {
+			return err
+		}
+		if maskType == OneMask {
+			op.Asm += "Masked"
+		}
+		op.Asm = fmt.Sprintf("%s%d", op.Asm, *op.Out[0].Bits)
+		// If class overwrite is happening, that's not really a mask but a vreg.
+		if opOutShape == OneVregOut || op.Out[0].OverwriteClass != nil {
+			switch opInShape {
+			case PureVregIn:
+				Ops = append(Ops, op)
+			case OneKmaskIn:
+				OpsMask = append(OpsMask, op)
+			case OneConstImmIn:
+				OpsImm = append(OpsImm, op)
+			case OneKmaskConstImmIn:
+				OpsImmMask = append(OpsImmMask, op)
+			case PureKmaskIn:
+				return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")
+			}
+		} else {
+			// OneKmaskOut case
+			switch opInShape {
+			case PureVregIn:
+				OpsMaskOut = append(OpsMaskOut, op)
+			case OneKmaskIn:
+				OpsMaskInMaskOut = append(OpsMaskInMaskOut, op)
+			case OneConstImmIn:
+				OpsImmInMaskOut = append(OpsImmInMaskOut, op)
+			case OneKmaskConstImmIn:
+				OpsImmMaskInMaskOut = append(OpsImmMaskInMaskOut, op)
+			case PureKmaskIn:
+				return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")
+			}
+		}
+	}
+	sortKey := func(op *Operation) string {
+		return *op.In[0].Go + op.Go
+	}
+	sortBySortKey := func(ops []Operation) {
+		sort.Slice(ops, func(i, j int) bool {
+			return sortKey(&ops[i]) < sortKey(&ops[j])
+		})
+	}
+	sortBySortKey(Ops)
+	sortBySortKey(OpsImm)
+	sortBySortKey(OpsMask)
+	sortBySortKey(OpsImmMask)
+	sortBySortKey(OpsMaskOut)
+	sortBySortKey(OpsImmInMaskOut)
+	sortBySortKey(OpsMaskInMaskOut)
+	sortBySortKey(OpsImmMaskInMaskOut)
+
+	type templateData struct {
+		Ops                 []Operation
+		OpsImm              []Operation
+		OpsMask             []Operation
+		OpsImmMask          []Operation
+		OpsMaskOut          []Operation
+		OpsImmInMaskOut     []Operation
+		OpsMaskInMaskOut    []Operation
+		OpsImmMaskInMaskOut []Operation
+	}
+
+	err = t.Execute(file, templateData{
+		Ops,
+		OpsImm,
+		OpsMask,
+		OpsImmMask,
+		OpsMaskOut,
+		OpsImmInMaskOut,
+		OpsMaskInMaskOut,
+		OpsImmMaskInMaskOut})
+	if err != nil {
+		return fmt.Errorf("failed to execute template: %w", err)
+	}
+
+	return nil
+}
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
new file mode 100644
index 00000000..10222bc7
--- /dev/null
+++ b/internal/simdgen/gen_simdssa.go
@@ -0,0 +1,210 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"strings"
+)
+
+const simdssaTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+package amd64
+
+import (
+	"cmd/compile/internal/ssa"
+	"cmd/compile/internal/ssagen"
+	"cmd/internal/obj"
+	"cmd/internal/obj/x86"
+)
+
+func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
+	p := s.Prog(v.Op.Asm())
+	// First arg
+	switch v.Op {{"{"}}{{if gt (len .ImmFirst) 0}}
+	// Imm
+	case {{.ImmFirst}}:
+		imm := v.AuxInt
+		if imm < 0 || imm > 255 {
+			v.Fatalf("Invalid source selection immediate")
+		}
+		p.From.Offset = imm
+		p.From.Type = obj.TYPE_CONST
+{{end}}{{if gt (len .VregFirst) 0}}
+	// vreg
+	case {{.VregFirst}}:
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = simdReg(v.Args[0])
+{{end}}
+	default:
+		// At least one arg is required.
+		return false
+	}
+
+	// Second arg
+	switch v.Op {{"{"}}{{if gt (len .VregSecond) 0}}
+	// vreg
+	case {{.VregSecond}}:
+		if p.From.Type == obj.TYPE_CONST {
+			p.AddRestSourceReg(simdReg(v.Args[0]))
+		} else {
+			p.AddRestSourceReg(simdReg(v.Args[1]))
+		}{{end}}
+	}
+
+	// Third arg
+	switch v.Op {{"{"}}{{if gt (len .VregThird) 0}}
+	// vreg
+	case {{.VregThird}}:
+		if p.From.Type == obj.TYPE_CONST {
+			p.AddRestSourceReg(simdReg(v.Args[1]))
+		} else {
+			p.AddRestSourceReg(simdReg(v.Args[2]))
+		}
+{{end}}{{if gt (len .MaskThird) 0}}
+	// k mask
+	case {{.MaskThird}}:
+		if p.From.Type == obj.TYPE_CONST {
+			p.AddRestSourceReg(v.Args[1].Reg())
+		} else {
+			p.AddRestSourceReg(v.Args[2].Reg())
+		}{{end}}
+	}
+
+	// Fourth arg
+	switch v.Op {{"{"}}{{if gt (len .MaskFourth) 0}}
+	case {{.MaskFourth}}:
+		if p.From.Type == obj.TYPE_CONST {
+			p.AddRestSourceReg(v.Args[2].Reg())
+		} else {
+			p.AddRestSourceReg(v.Args[3].Reg())
+		}{{end}}
+	}
+
+	// Output
+	switch v.Op {{"{"}}{{if gt (len .VregOut) 0}}
+	case {{.VregOut}}:
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = simdReg(v)
+{{end}}{{if gt (len .MaskOut) 0}}
+	case {{.MaskOut}}:
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = v.Reg()
+{{end}}
+	default:
+		// One result is required.
+		return false
+	}
+{{if gt (len .ZeroingMask) 0}}
+	// Masked operation are always compiled with zeroing.
+	switch v.Op {
+	case {{.ZeroingMask}}:
+		x86.ParseSuffix(p, "Z")
+	}
+{{end}}
+	return true
+}
+`
+
+// writeSIMDSSA generates the ssa to prog lowering codes and writes it to simdssa.go
+// within the specified directory.
+func writeSIMDSSA(directory string, ops []Operation) error {
+	var ImmFirst []string
+	var VregFirst []string
+	var VregSecond []string
+	var MaskThird []string
+	var VregThird []string
+	var MaskFourth []string
+	var VregOut []string
+	var MaskOut []string
+	var ZeroingMask []string
+
+	seen := map[string]struct{}{}
+	for _, op := range ops {
+		asm := op.Asm
+		shapeIn, shapeOut, maskType, _, _, gOp, err := op.shape()
+		if err != nil {
+			return err
+		}
+		if maskType == 2 {
+			asm += "Masked"
+		}
+		asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits)
+		if _, ok := seen[asm]; ok {
+			continue
+		}
+		seen[asm] = struct{}{}
+		caseStr := fmt.Sprintf("ssa.OpAMD64%s", asm)
+		if shapeIn == PureVregIn || shapeIn == PureKmaskIn {
+			// Masks and vreg are handled together by simdReg()
+			VregFirst = append(VregFirst, caseStr)
+			if len(gOp.In) > 1 {
+				VregSecond = append(VregSecond, caseStr)
+			}
+		} else if shapeIn == OneKmaskIn {
+			VregFirst = append(VregFirst, caseStr)
+			VregSecond = append(VregSecond, caseStr)
+			MaskThird = append(MaskThird, caseStr)
+			if gOp.Zeroing == nil {
+				ZeroingMask = append(ZeroingMask, caseStr)
+			}
+		} else if shapeIn == OneConstImmIn {
+			ImmFirst = append(ImmFirst, caseStr)
+			VregSecond = append(VregSecond, caseStr)
+			VregThird = append(VregThird, caseStr)
+		} else {
+			// OneKmaskConstImmIn case
+			ImmFirst = append(ImmFirst, caseStr)
+			VregSecond = append(VregSecond, caseStr)
+			VregThird = append(VregThird, caseStr)
+			MaskFourth = append(MaskFourth, caseStr)
+			if gOp.Zeroing == nil {
+				ZeroingMask = append(ZeroingMask, caseStr)
+			}
+		}
+		if shapeOut == OneVregOut || gOp.Out[0].OverwriteClass != nil {
+			// If class overwrite is happening, that's not really a mask but a vreg.
+			VregOut = append(VregOut, caseStr)
+		} else {
+			// OneKmaskOut case
+			MaskOut = append(MaskOut, caseStr)
+		}
+	}
+
+	data := struct {
+		ImmFirst    string
+		VregFirst   string
+		VregSecond  string
+		MaskThird   string
+		VregThird   string
+		MaskFourth  string
+		VregOut     string
+		MaskOut     string
+		ZeroingMask string
+	}{
+		strings.Join(ImmFirst, ", "),
+		strings.Join(VregFirst, ", "),
+		strings.Join(VregSecond, ", "),
+		strings.Join(MaskThird, ", "),
+		strings.Join(VregThird, ", "),
+		strings.Join(MaskFourth, ", "),
+		strings.Join(VregOut, ", "),
+		strings.Join(MaskOut, ", "),
+		strings.Join(ZeroingMask, ", "),
+	}
+
+	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/amd64/simdssa.go", simdssaTmpl)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	err = t.Execute(file, data)
+	if err != nil {
+		return fmt.Errorf("failed to execute template: %w", err)
+	}
+
+	return nil
+}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
new file mode 100644
index 00000000..1e822980
--- /dev/null
+++ b/internal/simdgen/gen_utility.go
@@ -0,0 +1,581 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"reflect"
+	"slices"
+	"sort"
+	"strings"
+	"text/template"
+	"unicode"
+)
+
+func openFileAndPrepareTemplate(goroot string, file string, temp string) (*os.File, *template.Template, error) {
+	fp := filepath.Join(goroot, file)
+	dir := filepath.Dir(fp)
+	err := os.MkdirAll(dir, 0755)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to create directory %s: %w", dir, err)
+	}
+	f, err := os.Create(fp)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to create file %s: %w", fp, err)
+	}
+	t, err := template.New(fp).Parse(temp)
+	if err != nil {
+		f.Close()
+		return nil, nil, fmt.Errorf("failed to parse template: %w", err)
+	}
+	return f, t, nil
+}
+
+const (
+	InvalidIn int = iota
+	PureVregIn
+	OneKmaskIn
+	OneConstImmIn
+	OneKmaskConstImmIn
+	PureKmaskIn
+)
+
+const (
+	InvalidOut int = iota
+	NoOut
+	OneVregOut
+	OneKmaskOut
+)
+
+const (
+	InvalidMask int = iota
+	NoMask
+	OneMask
+	OneConstMask
+	AllMasks
+)
+
+// opShape returns the an int denoting the shape of the operation:
+//
+//	shapeIn:
+//		InvalidIn: unknown, with err set to the error message
+//		PureVregIn: pure vreg operation
+//		OneKmaskIn: operation with one k mask input (TODO: verify if it's always opmask predicate)
+//		OneConstImmIn: operation with one const imm input
+//		OneKmaskConstImmIn: operation with one k mask input and one const imm input
+//		PureKmaskIn: it's a K mask instruction (which can use K0)
+//
+//	shapeOut:
+//	 	InvalidOut: unknown, with err set to the error message
+//		NoOut: no outputs, this is invalid now.
+//		OneVregOut: one vreg output
+//		OneKmaskOut: one mask output
+//
+//	maskType:
+//		InvalidMask: unknown, with err set to the error message
+//		NoMask: no mask
+//		OneMask: with mask (K1 to K7)
+//		OneConstMask: with const mask K0
+//		AllMasks: it's a K mask instruction
+//
+// opNoImm is op with its inputs excluding the const imm.
+// opNoConstMask is op with its inputs excluding the const mask.
+// opNoConstImmMask is op with its inputs excluding the const imm and mask.
+func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Operation, opNoConstMask Operation, opNoConstImmMask Operation, err error) {
+	if len(op.Out) > 1 {
+		err = fmt.Errorf("simdgen only supports 1 output: %s", op)
+		return
+	}
+	if len(op.Out) == 1 {
+		if op.Out[0].Class == "vreg" {
+			shapeOut = OneVregOut
+		} else if op.Out[0].Class == "mask" {
+			shapeOut = OneKmaskOut
+		} else {
+			err = fmt.Errorf("simdgen only supports output of class vreg or mask: %s", op)
+			return
+		}
+	} else {
+		shapeOut = NoOut
+		// TODO: are these only Load/Stores?
+		// We manually supported two Load and Store, are those enough?
+		err = fmt.Errorf("simdgen only supports 1 output: %s", op)
+		return
+	}
+	hasImm := false
+	maskCount := 0
+	iConstMask := -1
+	hasVreg := false
+	for i, in := range op.In {
+		if in.Class == "immediate" {
+			// A manual check on XED data found that AMD64 SIMD instructions at most
+			// have 1 immediates. So we don't need to check this here.
+			if in.Const == nil {
+				err = fmt.Errorf("simdgen doesn't support non-const immediates: %s", op)
+				return
+			}
+			if *in.Bits != 8 {
+				err = fmt.Errorf("simdgen only supports immediates of 8 bits: %s", op)
+				return
+			}
+			hasImm = true
+		} else if in.Class == "mask" {
+			if in.Const != nil {
+				if *in.Const == "K0" {
+					if iConstMask != -1 {
+						err = fmt.Errorf("simdgen only supports one const mask in inputs: %s", op)
+						return
+					}
+					iConstMask = i
+					// Const mask should be invisible in ssa and prog, so we don't treat it as a mask.
+					// More specifically in prog, it's optional: when missing the assembler will default it to K0).
+					// TODO: verify the above assumption is safe.
+				} else {
+					err = fmt.Errorf("simdgen only supports const mask K0 in inputs: %s", op)
+				}
+			} else {
+				maskCount++
+			}
+		} else {
+			hasVreg = true
+		}
+	}
+	opNoConstImm = *op
+	opNoConstMask = *op
+	opNoConstImmMask = *op
+	removeConstMask := func(o *Operation) {
+		o.In = append(o.In[:iConstMask], o.In[iConstMask+1:]...)
+	}
+	if iConstMask != -1 {
+		removeConstMask(&opNoConstMask)
+		removeConstMask(&opNoConstImmMask)
+	}
+	removeConstImm := func(o *Operation) {
+		o.In = o.In[1:]
+	}
+	if hasImm {
+		removeConstImm(&opNoConstImm)
+		removeConstImm(&opNoConstImmMask)
+	}
+	if maskCount == 0 {
+		if iConstMask == -1 {
+			maskType = NoMask
+		} else {
+			maskType = OneConstMask
+		}
+	} else {
+		maskType = OneMask
+	}
+	checkPureMask := func() bool {
+		if hasImm {
+			err = fmt.Errorf("simdgen does not support immediates in pure mask operations: %s", op)
+			return true
+		}
+		if iConstMask != -1 {
+			err = fmt.Errorf("simdgen does not support const mask in pure mask operations: %s", op)
+			return true
+		}
+		if hasVreg {
+			err = fmt.Errorf("simdgen does not support more than 1 masks in non-pure mask operations: %s", op)
+			return true
+		}
+		return false
+	}
+	if !hasImm && maskCount == 0 {
+		shapeIn = PureVregIn
+	} else if !hasImm && maskCount > 0 {
+		if maskCount == 1 {
+			shapeIn = OneKmaskIn
+		} else {
+			if checkPureMask() {
+				return
+			}
+			shapeIn = PureKmaskIn
+			maskType = AllMasks
+		}
+	} else if hasImm && maskCount == 0 {
+		shapeIn = OneConstImmIn
+	} else {
+		if maskCount == 1 {
+			shapeIn = OneKmaskConstImmIn
+		} else {
+			checkPureMask()
+			return
+		}
+	}
+	// Exclude some shape combination that are not yet supported in simdssa.go
+	if shapeIn == PureVregIn {
+		if len(opNoConstImmMask.In) > 2 {
+			err = fmt.Errorf("simdgen doesn't support more than 2 vreg args: %s", op)
+			return
+		}
+	}
+	if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn {
+		if len(opNoConstImmMask.In) != 3 {
+			err = fmt.Errorf("simdgen only supports mask operations with 2 vreg args: %s", op)
+			return
+		}
+	}
+	if shapeIn == OneConstImmIn {
+		if len(opNoConstImmMask.In) != 2 {
+			err = fmt.Errorf("simdgen only supports immediate operations with 2 vreg args: %s", op)
+			return
+		}
+	}
+	if shapeIn == PureKmaskIn {
+		if len(opNoConstImmMask.In) != 2 {
+			err = fmt.Errorf("simdgen only supports pure k mask operations with 2 vreg args: %s", op)
+			return
+		}
+	}
+	return
+}
+
+// sortOperand sorts op.In by putting immediates first, then vreg, and mask the last.
+// TODO: verify that this is a safe assumption of the prog strcture.
+// from my observation looks like in asm, imms are always the first, masks are always the last, with
+// vreg in betwee...
+func (op *Operation) sortOperand() {
+	priority := map[string]int{"immediate": 2, "vreg": 1, "mask": 0}
+	sort.SliceStable(op.In, func(i, j int) bool {
+		return priority[op.In[i].Class]-priority[op.In[j].Class] > 0
+	})
+}
+
+// genericOpsByLen returns the lists of generic ops aggregated by input length.
+func genericOpsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3 []Operation, e error) {
+	opsLen1 = make([]Operation, 0)
+	opsLen2 = make([]Operation, 0)
+	opsLen3 = make([]Operation, 0)
+	for _, op := range ops {
+		_, shapeOut, _, _, _, gOp, err := op.shape()
+		if err != nil {
+			e = err
+			return
+		}
+		// Put the go ssa type in Class field, simd intrinsics need it.
+		if shapeOut == OneVregOut || shapeOut == OneKmaskOut {
+			gOp.GoArch = fmt.Sprintf("types.TypeVec%d", *gOp.Out[0].Bits)
+		}
+		if len(gOp.In) == 1 {
+			opsLen1 = append(opsLen1, gOp)
+		} else if len(gOp.In) == 2 {
+			opsLen2 = append(opsLen2, gOp)
+		} else if len(gOp.In) == 3 {
+			opsLen3 = append(opsLen3, gOp)
+		}
+	}
+	sortKey := func(op *Operation) string {
+		return *op.In[0].Go + op.Go
+	}
+	sortBySortKey := func(ops []Operation) {
+		sort.Slice(ops, func(i, j int) bool {
+			return sortKey(&ops[i]) < sortKey(&ops[j])
+		})
+	}
+	sortBySortKey(opsLen1)
+	sortBySortKey(opsLen2)
+	sortBySortKey(opsLen3)
+	return
+}
+
+// dedup is deduping operations in the full structure level.
+func dedup(ops []Operation) (deduped []Operation) {
+	for _, op := range ops {
+		seen := false
+		for _, dop := range deduped {
+			if reflect.DeepEqual(op, dop) {
+				seen = true
+				break
+			}
+		}
+		if !seen {
+			deduped = append(deduped, op)
+		}
+	}
+	return
+}
+
+// splitMask splits operations with a single mask vreg input to be masked and unmasked(const: K0).
+// It also remove the "Masked" keyword from the name.
+func splitMask(ops []Operation) ([]Operation, error) {
+	splited := []Operation{}
+	for _, op := range ops {
+		splited = append(splited, op)
+		if op.Masked == nil || *op.Masked != "true" {
+			continue
+		}
+		shapeIn, _, _, _, _, _, err := op.shape()
+		if err != nil {
+			return nil, err
+		}
+		if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn {
+			op2 := op
+			op2.In = slices.Clone(op.In)
+			constMask := "K0"
+			// The ops should be sorted when calling this function, the mask is in the end.
+			op2.In[len(op2.In)-1].Const = &constMask
+			if !strings.HasPrefix(op2.Go, "Masked") {
+				return nil, fmt.Errorf("simdgen only recognizes masked operations with name starting with 'Masked': %s", op)
+			}
+			op2.Go = strings.ReplaceAll(op2.Go, "Masked", "")
+			splited = append(splited, op2)
+		} else {
+			return nil, fmt.Errorf("simdgen only recognizes masked operations with exactly one mask input: %s", op)
+		}
+	}
+	return splited, nil
+}
+
+// dedupGodef is deduping operations in [Op.Go]+[*Op.In[0].Go] level.
+// By deduping, it means picking the least advanced architecture that satisfy the requirement:
+// AVX512 will be least preferred.
+// If FlagNoDedup is set, it will report the duplicates to the console.
+func dedupGodef(ops []Operation) ([]Operation, error) {
+	seen := map[string][]Operation{}
+	for _, op := range ops {
+		_, _, _, _, _, gOp, err := op.shape()
+		if err != nil {
+			return nil, err
+		}
+		genericNames := gOp.Go + *gOp.In[0].Go
+		seen[genericNames] = append(seen[genericNames], op)
+	}
+	if *FlagReportDup {
+		for gName, dup := range seen {
+			if len(dup) > 1 {
+				log.Printf("Duplicate for %s:\n", gName)
+				for _, op := range dup {
+					log.Printf("%s\n", op)
+				}
+			}
+		}
+		return ops, nil
+	}
+	isAVX512 := func(op Operation) bool {
+		return strings.Contains(op.Extension, "AVX512")
+	}
+	deduped := []Operation{}
+	for _, dup := range seen {
+		if len(dup) > 1 {
+			sort.Slice(dup, func(i, j int) bool {
+				// Put non-AVX512 candidates at the beginning
+				if !isAVX512(dup[i]) && isAVX512(dup[j]) {
+					return true
+				}
+				// TODO: make the sorting logic finer-grained.
+				return false
+			})
+		}
+		deduped = append(deduped, dup[0])
+	}
+	return deduped, nil
+}
+
+// Copy op.ConstImm to op.In[0].Const
+// This is a hack to reduce the size of defs we need for const imm operations.
+func copyConstImm(ops []Operation) error {
+	for _, op := range ops {
+		if op.ConstImm == nil {
+			continue
+		}
+		shapeIn, _, _, _, _, _, err := op.shape()
+		if err != nil {
+			return err
+		}
+		if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn {
+			op.In[0].Const = op.ConstImm
+		}
+		// Otherwise, just not port it - e.g. {VPCMP[BWDQ] imm=0} and {VPCMPEQ[BWDQ]} are
+		// the same operations "Equal", [dedupgodef] should be able to distinguish them.
+	}
+	return nil
+}
+
+// overwrite corrects some errors due to:
+//   - The XED data is wrong
+//   - Go's SIMD API requirement, for example AVX2 compares should also produce masks.
+//     This rewrite has strict constraints, please see the error message.
+//     These constraints are also explointed in [writeSIMDRules], [writeSIMDMachineOps]
+//     and [writeSIMDSSA], please be careful when updating these constraints.
+func overwrite(ops []Operation) error {
+	capitalizeFirst := func(s string) string {
+		if s == "" {
+			return ""
+		}
+		// Convert the string to a slice of runes to handle multi-byte characters correctly.
+		r := []rune(s)
+		r[0] = unicode.ToUpper(r[0])
+		return string(r)
+	}
+	hasClassOverwrite := false
+	overwrite := func(op []Operand, idx int) error {
+		if op[idx].OverwriteClass != nil {
+			if op[idx].OverwriteBase == nil {
+				return fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx])
+			}
+			oBase := *op[idx].OverwriteBase
+			oClass := *op[idx].OverwriteClass
+			if oClass != "mask" {
+				return fmt.Errorf("simdgen: [Class] overwrite only supports overwritting to mask: %s", op[idx])
+			}
+			if oBase != "uint" {
+				return fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to uint: %s", op[idx])
+			}
+			if op[idx].Class != "vreg" {
+				return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx])
+			}
+			if *op[idx].Base != "uint" && *op[idx].Base != "int" {
+				return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Base] from int|uint: %s", op[idx])
+			}
+			hasClassOverwrite = true
+			*op[idx].Base = oBase
+			op[idx].Class = oClass
+			*op[idx].Go = fmt.Sprintf("Mask%dx%d", *op[idx].ElemBits, *op[idx].Lanes)
+		} else if op[idx].OverwriteBase != nil {
+			oBase := *op[idx].OverwriteBase
+			*op[idx].Go = strings.ReplaceAll(*op[idx].Go, capitalizeFirst(*op[idx].Base), capitalizeFirst(oBase))
+			*op[idx].Base = oBase
+		}
+		return nil
+	}
+	for i := range ops {
+		hasClassOverwrite = false
+		for j := range ops[i].In {
+			if err := overwrite(ops[i].In, j); err != nil {
+				return err
+			}
+			if hasClassOverwrite {
+				return fmt.Errorf("simdgen does not support [OverwriteClass] in inputs: %s", ops[i])
+			}
+		}
+		for j := range ops[i].Out {
+			if err := overwrite(ops[i].Out, j); err != nil {
+				return err
+			}
+		}
+		if hasClassOverwrite {
+			for _, in := range ops[i].In {
+				if in.Class == "mask" {
+					return fmt.Errorf("simdgen only supports [OverwriteClass] for operations without mask inputs")
+				}
+			}
+		}
+	}
+	return nil
+}
+
+func (o Operation) String() string {
+	var sb strings.Builder
+	sb.WriteString("Operation {\n")
+	sb.WriteString(fmt.Sprintf("  Go: %s\n", o.Go))
+	sb.WriteString(fmt.Sprintf("  GoArch: %s\n", o.GoArch))
+	sb.WriteString(fmt.Sprintf("  Asm: %s\n", o.Asm))
+
+	sb.WriteString("  In: [\n")
+	for _, op := range o.In {
+		sb.WriteString(fmt.Sprintf("    %s,\n", op.String()))
+	}
+	sb.WriteString("  ]\n")
+
+	sb.WriteString("  Out: [\n")
+	for _, op := range o.Out {
+		sb.WriteString(fmt.Sprintf("    %s,\n", op.String()))
+	}
+	sb.WriteString("  ]\n")
+
+	sb.WriteString(fmt.Sprintf("  Commutative: %s\n", o.Commutative))
+	sb.WriteString(fmt.Sprintf("  Extension: %s\n", o.Extension))
+
+	if o.Zeroing != nil {
+		sb.WriteString(fmt.Sprintf("  Zeroing: %s\n", *o.Zeroing))
+	} else {
+		sb.WriteString("  Zeroing: <nil>\n")
+	}
+
+	if o.Documentation != nil {
+		sb.WriteString(fmt.Sprintf("  Documentation: %s\n", *o.Documentation))
+	} else {
+		sb.WriteString("  Documentation: <nil>\n")
+	}
+
+	if o.ConstImm != nil {
+		sb.WriteString(fmt.Sprintf("  ConstImm: %s\n", *o.ConstImm))
+	} else {
+		sb.WriteString("  ConstImm: <nil>\n")
+	}
+
+	if o.Masked != nil {
+		sb.WriteString(fmt.Sprintf("  Masked: %s\n", *o.Masked))
+	} else {
+		sb.WriteString("  Masked: <nil>\n")
+	}
+
+	sb.WriteString("}\n")
+	return sb.String()
+}
+
+// String returns a string representation of the Operand.
+func (op Operand) String() string {
+	var sb strings.Builder
+	sb.WriteString("Operand {\n")
+	sb.WriteString(fmt.Sprintf("    Class: %s\n", op.Class))
+
+	if op.Go != nil {
+		sb.WriteString(fmt.Sprintf("    Go: %s\n", *op.Go))
+	} else {
+		sb.WriteString("    Go: <nil>\n")
+	}
+
+	sb.WriteString(fmt.Sprintf("    AsmPos: %d\n", op.AsmPos))
+
+	if op.Base != nil {
+		sb.WriteString(fmt.Sprintf("    Base: %s\n", *op.Base))
+	} else {
+		sb.WriteString("    Base: <nil>\n")
+	}
+
+	if op.ElemBits != nil {
+		sb.WriteString(fmt.Sprintf("    ElemBits: %d\n", *op.ElemBits))
+	} else {
+		sb.WriteString("    ElemBits: <nil>\n")
+	}
+
+	if op.Bits != nil {
+		sb.WriteString(fmt.Sprintf("    Bits: %d\n", *op.Bits))
+	} else {
+		sb.WriteString("    Bits: <nil>\n")
+	}
+
+	if op.Const != nil {
+		sb.WriteString(fmt.Sprintf("    Const: %s\n", *op.Const))
+	} else {
+		sb.WriteString("    Const: <nil>\n")
+	}
+
+	if op.Lanes != nil {
+		sb.WriteString(fmt.Sprintf("    Lanes: %d\n", *op.Lanes))
+	} else {
+		sb.WriteString("    Lanes: <nil>\n")
+	}
+
+	if op.OverwriteClass != nil {
+		sb.WriteString(fmt.Sprintf("    OverwriteClass: %s\n", *op.OverwriteClass))
+	} else {
+		sb.WriteString("    OverwriteClass: <nil>\n")
+	}
+
+	if op.OverwriteBase != nil {
+		sb.WriteString(fmt.Sprintf("    OverwriteBase: %s\n", *op.OverwriteBase))
+	} else {
+		sb.WriteString("    OverwriteBase: <nil>\n")
+	}
+
+	sb.WriteString("  }\n")
+	return sb.String()
+}
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 6a6ca1eb..0ec1ee0d 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -1,223 +1,437 @@
 !sum
-# For binary operations, we constrain their two inputs and one output to the
-# same Go type using a variable.
+# Add
 - go: Add
-  asm: "V?PADD[BWDQ]|V?ADDP[SD]"
+  asm: "VPADD[BWDQ]|VADDP[SD]"
+  in:
+  - &any
+    go: $t
+  - *any
+  out:
+  - *any
+- go: MaskedAdd
+  asm: "VPADD[BWDQ]|VADDP[SD]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+# Saturated Add
+- go: SaturatedAdd
+  asm: "VPADDS[BWDQ]"
+  in:
+  - &int
+    go: $t
+    base: int
+  - *int
+  out:
+  - *int
+- go: SaturatedAdd
+  asm: "VPADDS[BWDQ]"
+  in:
+  - &uint
+    go: $t
+    base: uint
+  - *uint
+  out:
+  - *uint
+- go: MaskedSaturatedAdd
+  asm: "VPADDS[BWDQ]"
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int
+- go: MaskedSaturatedAdd
+  asm: "VPADDS[BWDQ]"
   in:
-  - go: $t
-  - go: $t
+  - class: mask
+  - *uint
+  - *uint
   out:
-  - go: $t
+  - *uint
 
+# Sub
 - go: Sub
-  goarch: amd64
-  asm: "V?PSUB[BWDQ]|V?SUBP[SD]"
+  asm: "VPSUB[BWDQ]|VADDP[SD]"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedSub
+  asm: "VPSUB[BWDQ]|VADDP[SD]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+# Saturated Sub
+- go: SaturatedSub
+  asm: "VPSUBS[BWDQ]"
+  in:
+  - *int
+  - *int
+  out:
+  - *int
+- go: SaturatedSub
+  asm: "VPSUBS[BWDQ]"
   in:
-  - go: $t
-  - go: $t
+  - *uint
+  - *uint
   out:
-  - go: $t
+  - *uint
+- go: MaskedSaturatedSub
+  asm: "VPSUBS[BWDQ]"
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int
+- go: MaskedSaturatedSub
+  asm: "VPSUBS[BWDQ]"
+  in:
+  - class: mask
+  - *uint
+  - *uint
+  out:
+  - *uint
+# In the XED data, *all* floating point bitwise logic operation has their
+# operand type marked as uint. We are not trying to understand why Intel
+# decided that they want FP bit-wise logic operations, but this irregularity
+# has to be dealed with in separate rules with some overwrites.
 
-#
-# AVX-512 Comparisons
-#
+# Int/Uint operations.
+# Non-masked for 128/256-bit vectors
+# For binary operations, we constrain their two inputs and one output to the
+# same Go type using a variable. This will map to instructions before AVX512.
+- go: And
+  asm: "VPAND"
+  in:
+  - &any
+    go: $t
+  - *any
+  out:
+  - *any
+# Masked
+# Looks like VPAND$xi works only for 2 shapes for integer:
+# Dword and Qword.
+# TODO: should we wildcard other smaller elemBits to VPANDQ or
+# VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations.
+- go: MaskedAnd
+  asm: "VPAND[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
 
-# TODO(austin): I'm not happy with how much copy-pasting this requires. We could
-# do a functional abstraction, but that feels bolted on. Unification is *almost*
-# capable of collapsing all of this.
-#
-# One thing that might work is having a !let node that lets you extend the
-# nonDetEnv with explicit values:
-#
-#     !let
-#     - {$go: Equal,       $imm: 0, $mask: K0}
-#     - {$go: EqualMasked, $imm: 0, $mask: _}
-#     - {$go: Less,        $imm: 1, $mask: K0}
-#     - {$go: LessMasked,  $imm: 1, $mask: _}
-#     - !let
-#       - {$asm: "VPCMP[BWDQ]",  $base: int}
-#       - {$asm: "VPCMPU[BWDQ]", $base: uint}
-#       - go: $go
-#         asm: $asm
-#         in:
-#         - const: $mask
-#         - base: $base
-#           go: $t
-#         - base: $base
-#           go: $t
-#         - class: immediate
-#           const: $imm
-#         out:
-#         - class: mask
-#
-# That's not bad, but it's very hierachical. CUE has a "mixin" approach to this.
-#
-#     - !unify
-#       # All AVX-512 comparisons have the same basic operand shape
-#       - {in: [_, {go: $t}, {go: $t}, _], out: [{class: mask}]}
-#       # There are signed and unsigned variants
-#       - !sum
-#         - {asm: "VPCMP[BWDQ]", in: [_, {base: int}, {base: int}, _]}
-#         - {asm: "VPCMPU[BWDQ]", in: [_, {base: uint}, {base: uint}, _]}
-#       # Finally, list out the operations.
-#       - !let
-#         - $equal:    {in: [_, _, _, {class: immedate, const: 0}]}
-#           $less:     {in: [_, _, _, {class: immedate, const: 1}]}
-#           $masked:   _
-#           $unmasked: {in: [const: K0, _, _, _]}
-#         - !sum
-#           - !unify [go: Equal,       $equal, $unmasked]
-#           - !unify [go: EqualMasked, $equal, $masked]
-#           - !unify [go: Less,        $less, $unmasked]
-#           - !unify [go: LessMasked,  $less, $masked]
-#
-# Maybe !let is just a feature of !sum that introduces an environment factor for
-# all following branches? That would let me do the above in-line with the big
-# top-level !sum:
-#
-#     - !sum
-#       ...
-#       - !let # Adds a factor that is the sum of the following terms:
-#         - {$go: Equal,       $imm: 0, $mask: K0}
-#         - {$go: EqualMasked, $imm: 0, $mask: _}
-#         - {$go: Less,        $imm: 1, $mask: K0}
-#         - {$go: LessMasked,  $imm: 1, $mask: _}
-#       - !let # Adds another factor:
-#         - {$asm: "VPCMP[BWDQ]",  $base: int}
-#         - {$asm: "VPCMPU[BWDQ]", $base: uint}
-#       - go: $go
-#         asm: $asm
-#         in:
-#         - const: $mask
-#         - base: $base
-#           go: $t
-#         - base: $base
-#           go: $t
-#         - class: immediate
-#           const: $imm
-#         out:
-#         - class: mask
-#
-# I may need to choose names more carefully in that case. This is a general
-# problem with names being file-global. (This is less of a problem with the
-# mixin style because those names tend to be more specific anyway.) Or maybe it
-# makes sense for each !let to introduce fresh idents, even if the string names
-# are the same?
+- go: AndNot
+  asm: "VPANDN"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedAndNot
+  asm: "VPANDN[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
 
-- go: Equal
-  goarch: amd64
-  asm: "VPCMP[BWDQ]"  # Signed comparison
+- go: Or
+  asm: "VPOR"
   in:
-  - const: K0
-  - base: int
-    go: $t
-  - base: int
-    go: $t
-  - class: immediate
-    const: 0
+  - *any
+  - *any
   out:
+  - *any
+- go: MaskedOr
+  asm: "VPOR[DQ]"
+  in:
   - class: mask
+  - *any
+  - *any
+  out:
+  - *any
 
-- go: EqualMasked
-  goarch: amd64
-  asm: "VPCMP[BWDQ]"  # Signed comparison
+- go: Xor
+  asm: "VPXOR"
   in:
-  - _
-  - base: int
-    go: $t
-  - base: int
-    go: $t
-  - class: immediate
-    const: 0
+  - *any
+  - *any
   out:
+  - *any
+- go: MaskedXor
+  asm: "VPXOR[DQ]"
+  in:
   - class: mask
+  - *any
+  - *any
+  out:
+  - *any
 
-- go: Equal
-  goarch: amd64
-  asm: "VPCMPU[BWDQ]"  # Unsigned comparison
+# FP operations.
+# Set the [base] to be "int" to not include duplicates(excluding "uint").
+# [base] is not used when [overwriteBase] is present.
+- go: And
+  asm: "VANDP[SD]"
   in:
-  - const: K0
-  - base: uint
+  - &intToFloat
     go: $t
-  - base: uint
-    go: $t
-  - class: immediate
-    const: 0
+    base: int
+    overwriteBase: float
+  - *intToFloat
   out:
+  - *intToFloat
+- go: MaskedAnd
+  asm: "VANDP[SD]"
+  in:
   - class: mask
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
 
-- go: EqualMasked
-  goarch: amd64
-  asm: "VPCMPU[BWDQ]"  # Unsigned comparison
+- go: AndNot
+  asm: "VANDNP[SD]"
   in:
-  - _
-  - base: uint
-    go: $t
-  - base: uint
-    go: $t
-  - class: immediate
-    const: 0
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
+- go: MaskedAndNot
+  asm: "VANDNP[SD]"
+  in:
+  - class: mask
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
+
+- go: Or
+  asm: "VORP[SD]"
+  in:
+  - *intToFloat
+  - *intToFloat
   out:
+  - *intToFloat
+- go: MaskedOr
+  asm: "VORP[SD]"
+  in:
   - class: mask
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
 
-- go: Less
-  goarch: amd64
-  asm: "VPCMP[BWDQ]"  # Signed comparison
+- go: Xor
+  asm: "VXORP[SD]"
+  in:
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
+- go: MaskedXor
+  asm: "VXORP[SD]"
   in:
-  - const: K0
-  - base: int
+  - class: mask
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
+- go: Equal
+  asm: "V?PCMPEQ[BWDQ]"
+  in: &int2
+  - &int
     go: $t
-  - base: int
+    base: int # Looks like PCMP is on signed integers - but for equals does it really matters?
+  - *int
+  out:
+  - &anyvregToMask
+    go: $t # We still need the output to be the same shape as inputs.
+    overwriteBase: uint
+    overwriteClass: mask
+- go: Greater
+  asm: "V?PCMPGT[BWDQ]"
+  in: *int2
+  out:
+  - *anyvregToMask
+- go: MaskedEqual
+  asm: "V?PCMPEQ[BWDQ]"
+  in: &maskint2
+  - class: mask
+  - *int
+  - *int
+  out:
+  - class: mask
+- go: MaskedGreater
+  asm: "V?PCMPGT[BWDQ]"
+  in: *maskint2
+  out:
+  - class: mask
+# The const imm predicated compares after AVX512, please see categories.yaml
+# for const imm specification.
+- go: Masked(Equal|Greater)
+  asm: "VPCMP[BWDQ]"
+  in:
+  - class: mask
+  - &int
     go: $t
+    base: int
+  - *int
   - class: immediate
-    const: 1
+    const: 0 # Just a placeholder, will be overwritten by const imm porting.
   out:
   - class: mask
-
-- go: LessMasked
-  goarch: amd64
-  asm: "VPCMP[BWDQ]"  # Signed comparison
+- go: Masked(Equal|Greater)
+  asm: "VPCMPU[BWDQ]"
   in:
-  - _
-  - base: int
-    go: $t
-  - base: int
+  - class: mask
+  - &uint
     go: $t
+    base: uint
+  - *uint
   - class: immediate
-    const: 1
+    const: 0
   out:
   - class: mask
-
-- go: Less
-  goarch: amd64
-  asm: "VPCMPU[BWDQ]"  # Unsigned comparison
+- go: Div
+  asm: "V?DIVP[SD]"
   in:
-  - const: K0
-  - base: uint
+  - &fp
     go: $t
-  - base: uint
+    base: float
+  - *fp
+  out:
+  - *fp
+- go: MaskedDiv
+  asm: "V?DIVP[SD]"
+  in:
+  - class: mask
+  - *fp
+  - *fp
+  out:
+  - *fp
+# "Normal" multiplication is only available for floats.
+# This only covers the single and double precision.
+- go: Mul
+  asm: "VMULP[SD]"
+  in:
+  - &fp
     go: $t
-  - class: immediate
-    const: 1
+    base: float
+  - *fp
   out:
+  - *fp
+- go: MaskedMul
+  asm: "VMULP[SD]"
+  in:
   - class: mask
+  - *fp
+  - *fp
+  out:
+  - *fp
 
-- go: LessMasked
-  goarch: amd64
-  asm: "VPCMPU[BWDQ]"  # Unsigned comparison
+# Integer multiplications.
+
+# MulEvenWiden
+# Dword only.
+- go: MulEvenWiden
+  asm: "VPMULDQ"
   in:
-  - _
-  - base: uint
+  - &int
     go: $t
-  - base: uint
+    base: int
+  - *int
+  out:
+  - &int2
+    go: $t2
+    base: int
+- go: MulEvenWiden
+  asm: "VPMULUDQ"
+  in:
+  - &uint
     go: $t
-  - class: immediate
-    const: 1
+    base: uint
+  - *uint
+  out:
+  - &uint2
+    go: $t2
+    base: uint
+- go: MaskedMulEvenWiden
+  asm: "VPMULDQ"
+  in:
+  - class: mask
+  - *int
+  - *int
   out:
+  - *int2
+- go: MaskedMulEvenWiden
+  asm: "VPMULUDQ"
+  in:
   - class: mask
+  - *uint
+  - *uint
+  out:
+  - *uint2
 
-# TODO:
-# 2: OP := LE;
-# 4: OP := NEQ;
-# 5: OP := NLT;
-# 6: OP := NLE;
+# MulHigh
+# Word only.
+# Non-masked
+- go: MulHigh
+  asm: "VPMULHW"
+  in:
+  - *int
+  - *int
+  out:
+  - *int2
+- go: MulHigh
+  asm: "VPMULHUW"
+  in:
+  - *uint
+  - *uint
+  out:
+  - *uint2
+- go: MaskedMulHigh
+  asm: "VPMULHW"
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int2
+- go: MaskedMulHigh
+  asm: "VPMULHUW"
+  in:
+  - class: mask
+  - *uint
+  - *uint
+  out:
+  - *uint2
+
+# MulLow
+# Signed int only.
+# Non-masked
+- go: MulLow
+  asm: "VPMULL[WDQ]"
+  in:
+  - *int
+  - *int
+  out:
+  - *int2
+- go: MaskedMulLow
+  asm: "VPMULL[WDQ]"
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int2
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 037c11fa..2a611c9e 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -5,59 +5,57 @@
 package main
 
 import (
-	"fmt"
-	"io"
 	"log"
-	"slices"
 
 	"golang.org/x/arch/internal/unify"
 )
 
 type Operation struct {
-	Go       string  // Go method name
-	Category *string // General operation category (optional)
+	Go string // Go method name
 
 	GoArch string // GOARCH for this definition
 	Asm    string // Assembly mnemonic
 
-	In  []Operand // Arguments
-	Out []Operand // Results
+	In            []Operand // Arguments
+	Out           []Operand // Results
+	Commutative   string    // Commutativity
+	Extension     string    // Extension
+	Zeroing       *string   // Zeroing is a flag for asm prefix "Z", if non-nil it will always be "false"
+	Documentation *string   // Documentation will be appended to the stubs comments.
+	// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
+	// If present, it will be copied to [In[0].Const].
+	ConstImm *string
+	// Masked indicates that this is a masked operation, this field has to be set for masked operations
+	// otherwise simdgen won't recognize it in [splitMask].
+	Masked *string
 }
 
 type Operand struct {
-	Class string
+	Class string // One of "mask", "immediate", "vreg" and "mem"
 
 	Go     *string // Go type of this operand
 	AsmPos int     // Position of this operand in the assembly instruction
 
 	Base     *string // Base Go type ("int", "uint", "float")
 	ElemBits *int    // Element bit width
-	Bits     int     // Total vector bit width
+	Bits     *int    // Total vector bit width
 
 	Const *string // Optional constant value
+	Lanes *int    // Lanes should equal Bits/ElemBits
+	// If non-nil, it means the [Class] field is overwritten here, right now this is used to
+	// overwrite the results of AVX2 compares to masks.
+	OverwriteClass *string
+	// If non-nil, it means the [Base] field is overwritten here. This field exist solely
+	// because Intel's XED data is inconsistent. e.g. VANDNP[SD] marks its operand int.
+	OverwriteBase *string
 }
 
-func (o Operand) Compare(p Operand) int {
-	// Put mask operands after others
-	if o.Class != "mask" && p.Class == "mask" {
-		return -1
-	}
-	if o.Class == "mask" && p.Class != "mask" {
-		return 1
-	}
-	return 0
-}
-
-var argNames = []string{"x", "y", "z", "w"}
-
-func writeGoDefs(w io.Writer, cl unify.Closure) {
+func writeGoDefs(path string, cl unify.Closure) error {
 	// TODO: Merge operations with the same signature but multiple
 	// implementations (e.g., SSE vs AVX)
-
-	// TODO: This code is embarrassing, but I'm very tired.
-
-	var op Operation
+	var ops []Operation
 	for def := range cl.All() {
+		var op Operation
 		if !def.Exact() {
 			continue
 		}
@@ -66,76 +64,58 @@ func writeGoDefs(w io.Writer, cl unify.Closure) {
 			log.Println(def)
 			continue
 		}
-
-		in := slices.Clone(op.In)
-		slices.SortStableFunc(in, Operand.Compare)
-		out := slices.Clone(op.Out)
-		slices.SortStableFunc(out, Operand.Compare)
-
-		type argExtra struct {
-			*Operand
-			varName string
-		}
-		asmPosToArg := make(map[int]argExtra)
-		asmPosToRes := make(map[int]argExtra)
-		argNames := argNames
-
-		fmt.Fprintf(w, "func (%s %s) %s(", argNames[0], *in[0].Go, op.Go)
-		asmPosToArg[in[0].AsmPos] = argExtra{&in[0], argNames[0]}
-		argNames = argNames[1:]
-		i := 0
-		for _, arg := range in[1:] {
-			varName := ""
-
-			// Drop operands with constant values
-			if arg.Const == nil {
-				if i > 0 {
-					fmt.Fprint(w, ", ")
-				}
-				i++
-				varName = argNames[0]
-				fmt.Fprintf(w, "%s %s", varName, *arg.Go)
-				argNames = argNames[1:]
-			}
-			asmPosToArg[arg.AsmPos] = argExtra{&arg, varName}
+		// TODO: verify that this is safe.
+		op.sortOperand()
+		ops = append(ops, op)
+	}
+	// The parsed XED data might contain duplicates, like
+	// 512 bits VPADDP.
+	deduped := dedup(ops)
+	log.Printf("dedup len: %d\n", len(ops))
+	var err error
+	if err = overwrite(deduped); err != nil {
+		return err
+	}
+	log.Printf("dedup len: %d\n", len(deduped))
+	if !*FlagNoSplitMask {
+		if deduped, err = splitMask(deduped); err != nil {
+			return err
 		}
-		fmt.Fprintf(w, ") (")
-		for i, res := range out {
-			if i > 0 {
-				fmt.Fprint(w, ", ")
-			}
-			varName := string('o' + byte(i))
-			fmt.Fprintf(w, "%s %s", varName, *res.Go)
-			asmPosToRes[res.AsmPos] = argExtra{&res, varName}
+	}
+	log.Printf("dedup len: %d\n", len(deduped))
+	if !*FlagNoDedup {
+		if deduped, err = dedupGodef(deduped); err != nil {
+			return err
 		}
-		fmt.Fprintf(w, ") {\n")
-
-		fmt.Fprintf(w, "\t// %s", op.Asm)
-		for i := 0; ; i++ {
-			arg, okArg := asmPosToArg[i]
-			if okArg {
-				if arg.Const != nil {
-					fmt.Fprintf(w, " %s", *arg.Const)
-				} else {
-					fmt.Fprintf(w, " %s", arg.varName)
-				}
-			}
-
-			res, okRes := asmPosToRes[i]
-			if okRes {
-				if okArg {
-					fmt.Fprintf(w, "/")
-				} else {
-					fmt.Fprintf(w, " ")
-				}
-				fmt.Fprintf(w, "%s", res.varName)
-			}
-			if !okArg && !okRes {
-				break
-			}
+	}
+	log.Printf("dedup len: %d\n", len(deduped))
+	if !*FlagNoConstImmPorting {
+		if err = copyConstImm(deduped); err != nil {
+			return err
 		}
-		fmt.Fprintf(w, "\n")
-
-		fmt.Fprintf(w, "}\n")
 	}
+	log.Printf("dedup len: %d\n", len(deduped))
+	typeMap := parseSIMDTypes(deduped)
+	if err = writeSIMDTypes(path, typeMap); err != nil {
+		return err
+	}
+	if err = writeSIMDStubs(path, deduped, typeMap); err != nil {
+		return err
+	}
+	if err = writeSIMDIntrinsics(path, deduped, typeMap); err != nil {
+		return err
+	}
+	if err = writeSIMDGenericOps(path, deduped); err != nil {
+		return err
+	}
+	if err = writeSIMDMachineOps(path, deduped); err != nil {
+		return err
+	}
+	if err = writeSIMDRules(path, deduped); err != nil {
+		return err
+	}
+	if err = writeSIMDSSA(path, deduped); err != nil {
+		return err
+	}
+	return nil
 }
diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index 82c31c06..84d8a92f 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -36,6 +36,42 @@
 // To see just the definitions for VPADDQ:
 //
 //	go run . -xedPath $XEDPATH -q '{asm: VPADDQ}'
+//
+// simdgen can also generate Go definitions of SIMD mappings:
+// To generate go files to the go root, run:
+//
+//	go run . -xedPath $XEDPATH -godefroot $/PATH/TO/go go.yaml categories.yaml types.yaml
+//
+// types.yaml is already written, it specifies the shapes of vectors.
+// categories.yaml and go.yaml contains definitions that unifies with types.yaml and XED
+// data, you can find an example in ops/AddSub/.
+//
+// To produce an aggregation of go.yaml and categoris.yaml from ./ops/ to ./, run:
+//
+// go generate
+//
+// When generating Go definitions, simdgen do 3 "magic"s:
+// - It splits masked operations(with op's [Masked] field set) to const and non const:
+//   - One is a normal masked operation, the original
+//   - The other has its mask operand's [Const] fields set to "K0".
+//   - This way the user does not need to provide a separate "K0"-masked operation def.
+//
+// - It deduplicates intrinsic names that have duplicates:
+//   - If there are two operations that shares the same signature, one is AVX512 the other
+//     is before AVX512, the other will be selected.
+//   - This happens often when some operations are defined both before AVX512 and after.
+//     This way the user does not need to provide a separate "K0" operation for the
+//     AVX512 counterpart.
+//
+// - It copies the op's [ConstImm] field to its immediate operand's [Const] field.
+//   - This way the user does not need to provide verbose op definition while only
+//     the const immediate field is different. This is useful to reduce verbosity of
+//     compares with imm control predicates.
+//
+// These 3 magics could be disabled by enabling -nosplitmask, -nodedup or
+// -noconstimmporting flags.
+//
+// simdgen right now only supports amd64, -arch=$OTHERARCH will trigger a fatal error.
 package main
 
 // Big TODOs:
@@ -48,6 +84,8 @@ package main
 // - Do I need Closure, Value, and Domain? It feels like I should only need two
 // types.
 
+//go:generate go run ./ops/.
+
 import (
 	"cmp"
 	"flag"
@@ -55,6 +93,7 @@ import (
 	"log"
 	"maps"
 	"os"
+	"path/filepath"
 	"slices"
 	"strings"
 
@@ -63,13 +102,19 @@ import (
 )
 
 var (
-	xedPath = flag.String("xedPath", "", "load XED datafiles from `path`")
-	flagQ   = flag.String("q", "", "query: read `def` as another input (skips final validation)")
-	flagO   = flag.String("o", "yaml", "output type: yaml, godefs")
+	xedPath               = flag.String("xedPath", "", "load XED datafiles from `path`")
+	flagQ                 = flag.String("q", "", "query: read `def` as another input (skips final validation)")
+	flagO                 = flag.String("o", "yaml", "output type: yaml, godefs")
+	flagGoDefRoot         = flag.String("godefroot", ".", "the path to the directory containing the generated godefs")
+	FlagNoDedup           = flag.Bool("nodedup", false, "disable deduplicating godefs of 2 qualifying operations from different extensions")
+	FlagNoSplitMask       = flag.Bool("nosplitmask", false, "disable splitting the masks to const and non const")
+	FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand")
+	FlagArch              = flag.String("arch", "amd64", "the target architecture")
 
 	flagDebugXED   = flag.Bool("debug-xed", false, "show XED instructions")
 	flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace")
 	flagDebugHTML  = flag.String("debug-html", "", "write unification trace to `file.html`")
+	FlagReportDup  = flag.Bool("reportdup", false, "report the duplicate godefs")
 )
 
 func main() {
@@ -77,6 +122,10 @@ func main() {
 
 	var inputs []unify.Closure
 
+	if *FlagArch != "amd64" {
+		log.Fatalf("simdgen only supports amd64")
+	}
+
 	// Load XED into a defs set.
 	if *xedPath != "" {
 		xedDefs := loadXED(*xedPath)
@@ -102,7 +151,7 @@ func main() {
 		}
 		inputs = append(inputs, defs)
 
-		if path == "go.yaml" {
+		if filepath.Base(path) == "go.yaml" {
 			// These must all be used in the final result
 			for def := range defs.Summands() {
 				must[def] = struct{}{}
@@ -145,7 +194,9 @@ func main() {
 			enc.Close()
 		}
 	case "godefs":
-		writeGoDefs(os.Stdout, unified)
+		if err := writeGoDefs(*flagGoDefRoot, unified); err != nil {
+			log.Fatalf("Failed writing godefs: %+v", err)
+		}
 	}
 
 	// Validate results.
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
new file mode 100644
index 00000000..8da031f7
--- /dev/null
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -0,0 +1,29 @@
+!sum
+- go: Add
+  commutative: "true"
+  extension: "AVX.*"
+- go: SaturatedAdd
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedAdd
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedSaturatedAdd
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: Sub
+  commutative: "true"
+  extension: "AVX.*"
+- go: SaturatedSub
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedSub
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedSaturatedSub
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
\ No newline at end of file
diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml
new file mode 100644
index 00000000..9e8dc57d
--- /dev/null
+++ b/internal/simdgen/ops/AddSub/go.yaml
@@ -0,0 +1,101 @@
+!sum
+# Add
+- go: Add
+  asm: "VPADD[BWDQ]|VADDP[SD]"
+  in:
+  - &any
+    go: $t
+  - *any
+  out:
+  - *any
+- go: MaskedAdd
+  asm: "VPADD[BWDQ]|VADDP[SD]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+# Saturated Add
+- go: SaturatedAdd
+  asm: "VPADDS[BWDQ]"
+  in:
+  - &int
+    go: $t
+    base: int
+  - *int
+  out:
+  - *int
+- go: SaturatedAdd
+  asm: "VPADDS[BWDQ]"
+  in:
+  - &uint
+    go: $t
+    base: uint
+  - *uint
+  out:
+  - *uint
+- go: MaskedSaturatedAdd
+  asm: "VPADDS[BWDQ]"
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int
+- go: MaskedSaturatedAdd
+  asm: "VPADDS[BWDQ]"
+  in:
+  - class: mask
+  - *uint
+  - *uint
+  out:
+  - *uint
+
+# Sub
+- go: Sub
+  asm: "VPSUB[BWDQ]|VADDP[SD]"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedSub
+  asm: "VPSUB[BWDQ]|VADDP[SD]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+# Saturated Sub
+- go: SaturatedSub
+  asm: "VPSUBS[BWDQ]"
+  in:
+  - *int
+  - *int
+  out:
+  - *int
+- go: SaturatedSub
+  asm: "VPSUBS[BWDQ]"
+  in:
+  - *uint
+  - *uint
+  out:
+  - *uint
+- go: MaskedSaturatedSub
+  asm: "VPSUBS[BWDQ]"
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int
+- go: MaskedSaturatedSub
+  asm: "VPSUBS[BWDQ]"
+  in:
+  - class: mask
+  - *uint
+  - *uint
+  out:
+  - *uint
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
new file mode 100644
index 00000000..bc4eda74
--- /dev/null
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -0,0 +1,31 @@
+!sum
+- go: And
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedAnd
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: Or
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedOr
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: AndNot
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedAndNot
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: Xor
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedXor
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+# We also have PTEST and VPTERNLOG, those should be hidden from the users
+# and only appear in rewrite rules.
\ No newline at end of file
diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml
new file mode 100644
index 00000000..7e7adf7a
--- /dev/null
+++ b/internal/simdgen/ops/BitwiseLogic/go.yaml
@@ -0,0 +1,149 @@
+!sum
+# In the XED data, *all* floating point bitwise logic operation has their
+# operand type marked as uint. We are not trying to understand why Intel
+# decided that they want FP bit-wise logic operations, but this irregularity
+# has to be dealed with in separate rules with some overwrites.
+
+# Int/Uint operations.
+# Non-masked for 128/256-bit vectors
+# For binary operations, we constrain their two inputs and one output to the
+# same Go type using a variable. This will map to instructions before AVX512.
+- go: And
+  asm: "VPAND"
+  in:
+  - &any
+    go: $t
+  - *any
+  out:
+  - *any
+# Masked
+# Looks like VPAND$xi works only for 2 shapes for integer:
+# Dword and Qword.
+# TODO: should we wildcard other smaller elemBits to VPANDQ or
+# VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations.
+- go: MaskedAnd
+  asm: "VPAND[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+
+- go: AndNot
+  asm: "VPANDN"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedAndNot
+  asm: "VPANDN[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+
+- go: Or
+  asm: "VPOR"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedOr
+  asm: "VPOR[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+
+- go: Xor
+  asm: "VPXOR"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedXor
+  asm: "VPXOR[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+
+# FP operations.
+# Set the [base] to be "int" to not include duplicates(excluding "uint").
+# [base] is not used when [overwriteBase] is present.
+- go: And
+  asm: "VANDP[SD]"
+  in:
+  - &intToFloat
+    go: $t
+    base: int
+    overwriteBase: float
+  - *intToFloat
+  out:
+  - *intToFloat
+- go: MaskedAnd
+  asm: "VANDP[SD]"
+  in:
+  - class: mask
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
+
+- go: AndNot
+  asm: "VANDNP[SD]"
+  in:
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
+- go: MaskedAndNot
+  asm: "VANDNP[SD]"
+  in:
+  - class: mask
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
+
+- go: Or
+  asm: "VORP[SD]"
+  in:
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
+- go: MaskedOr
+  asm: "VORP[SD]"
+  in:
+  - class: mask
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
+
+- go: Xor
+  asm: "VXORP[SD]"
+  in:
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
+- go: MaskedXor
+  asm: "VXORP[SD]"
+  in:
+  - class: mask
+  - *intToFloat
+  - *intToFloat
+  out:
+  - *intToFloat
\ No newline at end of file
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
new file mode 100644
index 00000000..cac97d4e
--- /dev/null
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -0,0 +1,19 @@
+!sum
+- go: Equal
+  constImm: 0
+  commutative: "true"
+  extension: "AVX.*"
+- go: Greater
+  constImm: 6
+  commutative: "false"
+  extension: "AVX.*"
+- go: MaskedEqual
+  constImm: 0
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedGreater
+  constImm: 6
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
\ No newline at end of file
diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml
new file mode 100644
index 00000000..f0a8cab1
--- /dev/null
+++ b/internal/simdgen/ops/Compares/go.yaml
@@ -0,0 +1,57 @@
+!sum
+- go: Equal
+  asm: "V?PCMPEQ[BWDQ]"
+  in: &int2
+  - &int
+    go: $t
+    base: int # Looks like PCMP is on signed integers - but for equals does it really matters?
+  - *int
+  out:
+  - &anyvregToMask
+    go: $t # We still need the output to be the same shape as inputs.
+    overwriteBase: uint
+    overwriteClass: mask
+- go: Greater
+  asm: "V?PCMPGT[BWDQ]"
+  in: *int2
+  out:
+  - *anyvregToMask
+- go: MaskedEqual
+  asm: "V?PCMPEQ[BWDQ]"
+  in: &maskint2
+  - class: mask
+  - *int
+  - *int
+  out:
+  - class: mask
+- go: MaskedGreater
+  asm: "V?PCMPGT[BWDQ]"
+  in: *maskint2
+  out:
+  - class: mask
+# The const imm predicated compares after AVX512, please see categories.yaml
+# for const imm specification.
+- go: Masked(Equal|Greater)
+  asm: "VPCMP[BWDQ]"
+  in:
+  - class: mask
+  - &int
+    go: $t
+    base: int
+  - *int
+  - class: immediate
+    const: 0 # Just a placeholder, will be overwritten by const imm porting.
+  out:
+  - class: mask
+- go: Masked(Equal|Greater)
+  asm: "VPCMPU[BWDQ]"
+  in:
+  - class: mask
+  - &uint
+    go: $t
+    base: uint
+  - *uint
+  - class: immediate
+    const: 0
+  out:
+  - class: mask
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
new file mode 100644
index 00000000..9166f1fa
--- /dev/null
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -0,0 +1,8 @@
+!sum
+- go: Div
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedDiv
+  commutative: "true"
+  masked: "true"
+  extension: "AVX.*"
\ No newline at end of file
diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml
new file mode 100644
index 00000000..4c74d253
--- /dev/null
+++ b/internal/simdgen/ops/FPonlyArith/go.yaml
@@ -0,0 +1,18 @@
+!sum
+- go: Div
+  asm: "V?DIVP[SD]"
+  in:
+  - &fp
+    go: $t
+    base: float
+  - *fp
+  out:
+  - *fp
+- go: MaskedDiv
+  asm: "V?DIVP[SD]"
+  in:
+  - class: mask
+  - *fp
+  - *fp
+  out:
+  - *fp
\ No newline at end of file
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
new file mode 100644
index 00000000..0ef6cf57
--- /dev/null
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -0,0 +1,35 @@
+!sum
+- go: Mul
+  commutative: "true"
+  extension: "AVX.*"
+- go: MulEvenWiden
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+- go: MulHigh
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+- go: MulLow
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
+- go: MaskedMul
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedMulEvenWiden
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+- go: MaskedMulHigh
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+- go: MaskedMulLow
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
\ No newline at end of file
diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml
new file mode 100644
index 00000000..a75f4188
--- /dev/null
+++ b/internal/simdgen/ops/Mul/go.yaml
@@ -0,0 +1,116 @@
+!sum
+# "Normal" multiplication is only available for floats.
+# This only covers the single and double precision.
+- go: Mul
+  asm: "VMULP[SD]"
+  in:
+  - &fp
+    go: $t
+    base: float
+  - *fp
+  out:
+  - *fp
+- go: MaskedMul
+  asm: "VMULP[SD]"
+  in:
+  - class: mask
+  - *fp
+  - *fp
+  out:
+  - *fp
+
+# Integer multiplications.
+
+# MulEvenWiden
+# Dword only.
+- go: MulEvenWiden
+  asm: "VPMULDQ"
+  in:
+  - &int
+    go: $t
+    base: int
+  - *int
+  out:
+  - &int2
+    go: $t2
+    base: int
+- go: MulEvenWiden
+  asm: "VPMULUDQ"
+  in:
+  - &uint
+    go: $t
+    base: uint
+  - *uint
+  out:
+  - &uint2
+    go: $t2
+    base: uint
+- go: MaskedMulEvenWiden
+  asm: "VPMULDQ"
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int2
+- go: MaskedMulEvenWiden
+  asm: "VPMULUDQ"
+  in:
+  - class: mask
+  - *uint
+  - *uint
+  out:
+  - *uint2
+
+# MulHigh
+# Word only.
+# Non-masked
+- go: MulHigh
+  asm: "VPMULHW"
+  in:
+  - *int
+  - *int
+  out:
+  - *int2
+- go: MulHigh
+  asm: "VPMULHUW"
+  in:
+  - *uint
+  - *uint
+  out:
+  - *uint2
+- go: MaskedMulHigh
+  asm: "VPMULHW"
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int2
+- go: MaskedMulHigh
+  asm: "VPMULHUW"
+  in:
+  - class: mask
+  - *uint
+  - *uint
+  out:
+  - *uint2
+
+# MulLow
+# Signed int only.
+# Non-masked
+- go: MulLow
+  asm: "VPMULL[WDQ]"
+  in:
+  - *int
+  - *int
+  out:
+  - *int2
+- go: MaskedMulLow
+  asm: "VPMULL[WDQ]"
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int2
\ No newline at end of file
diff --git a/internal/simdgen/ops/main.go b/internal/simdgen/ops/main.go
new file mode 100644
index 00000000..7e462bf7
--- /dev/null
+++ b/internal/simdgen/ops/main.go
@@ -0,0 +1,75 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"path/filepath"
+)
+
+const baseDir = "ops" // The main directory containing A, B, C, etc.
+
+func main() {
+	if err := mergeYamlFiles("categories.yaml"); err != nil {
+		fmt.Printf("Error processing categories.yaml: %v\n", err)
+		os.Exit(1)
+	}
+	if err := mergeYamlFiles("go.yaml"); err != nil {
+		fmt.Printf("Error processing go.yaml: %v\n", err)
+		os.Exit(1)
+	}
+}
+
+func mergeYamlFiles(targetFileName string) error {
+	outputFile, err := os.Create(targetFileName)
+	if err != nil {
+		return fmt.Errorf("failed to create output file %s: %w", targetFileName, err)
+	}
+	defer outputFile.Close()
+
+	writer := bufio.NewWriter(outputFile)
+	_, err = writer.WriteString("!sum\n")
+	if err != nil {
+		return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err)
+	}
+
+	entries, err := os.ReadDir(baseDir)
+	if err != nil {
+		return fmt.Errorf("failed to read base directory %s: %w", baseDir, err)
+	}
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+
+		subdirPath := filepath.Join(baseDir, entry.Name())
+		sourceFilePath := filepath.Join(subdirPath, targetFileName)
+
+		sourceFile, err := os.Open(sourceFilePath)
+		if err != nil {
+			if os.IsNotExist(err) {
+				fmt.Printf("Skipping: %s not found in %s\n", targetFileName, subdirPath)
+				continue
+			}
+			return fmt.Errorf("failed to open source file %s: %w", sourceFilePath, err)
+		}
+		defer sourceFile.Close()
+
+		scanner := bufio.NewScanner(sourceFile)
+		// Skip first line
+		scanner.Scan()
+		// Append the rest of the lines to the output file
+		for scanner.Scan() {
+			line := scanner.Text()
+			_, err = writer.WriteString(line + "\n")
+			if err != nil {
+				return fmt.Errorf("failed to write line from %s to %s: %w", sourceFilePath, targetFileName, err)
+			}
+		}
+
+		if err := scanner.Err(); err != nil {
+			return fmt.Errorf("error reading lines from %s: %w", sourceFilePath, err)
+		}
+	}
+	return writer.Flush()
+}
diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml
index 9092224e..c8b3660e 100644
--- a/internal/simdgen/types.yaml
+++ b/internal/simdgen/types.yaml
@@ -7,48 +7,48 @@
 
 in: !repeat
 - !sum &types
-  - {class: vreg, go: Int8x16,    base: "int",   elemBits: 8,  bits: 128}
-  - {class: vreg, go: Uint8x16,   base: "uint",  elemBits: 8,  bits: 128}
-  - {class: vreg, go: Int16x8,    base: "int",   elemBits: 16, bits: 128}
-  - {class: vreg, go: Uint16x8,   base: "uint",  elemBits: 16, bits: 128}
-  - {class: vreg, go: Int32x4,    base: "int",   elemBits: 32, bits: 128}
-  - {class: vreg, go: Uint32x4,   base: "uint",  elemBits: 32, bits: 128}
-  - {class: vreg, go: Int64x2,    base: "int",   elemBits: 64, bits: 128}
-  - {class: vreg, go: Uint64x2,   base: "uint",  elemBits: 64, bits: 128}
-  - {class: vreg, go: Float32x4,  base: "float", elemBits: 32, bits: 128}
-  - {class: vreg, go: Float64x2,  base: "float", elemBits: 64, bits: 128}
-  - {class: vreg, go: Int8x32,    base: "int",   elemBits: 8,  bits: 256}
-  - {class: vreg, go: Uint8x32,   base: "uint",  elemBits: 8,  bits: 256}
-  - {class: vreg, go: Int16x16,   base: "int",   elemBits: 16, bits: 256}
-  - {class: vreg, go: Uint16x16,  base: "uint",  elemBits: 16, bits: 256}
-  - {class: vreg, go: Int32x8,    base: "int",   elemBits: 32, bits: 256}
-  - {class: vreg, go: Uint32x8,   base: "uint",  elemBits: 32, bits: 256}
-  - {class: vreg, go: Int64x4,    base: "int",   elemBits: 64, bits: 256}
-  - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 64, bits: 256}
-  - {class: vreg, go: Float32x8,  base: "float", elemBits: 32, bits: 256}
-  - {class: vreg, go: Float64x4,  base: "float", elemBits: 64, bits: 256}
-  - {class: vreg, go: Int8x64,    base: "int",   elemBits: 8,  bits: 512}
-  - {class: vreg, go: Uint8x64,   base: "uint",  elemBits: 8,  bits: 512}
-  - {class: vreg, go: Int16x32,   base: "int",   elemBits: 16, bits: 512}
-  - {class: vreg, go: Uint16x32,  base: "uint",  elemBits: 16, bits: 512}
-  - {class: vreg, go: Int32x16,   base: "int",   elemBits: 32, bits: 512}
-  - {class: vreg, go: Uint32x16,  base: "uint",  elemBits: 32, bits: 512}
-  - {class: vreg, go: Int64x8,    base: "int",   elemBits: 64, bits: 512}
-  - {class: vreg, go: Uint64x8,   base: "uint",  elemBits: 64, bits: 512}
-  - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512}
-  - {class: vreg, go: Float64x8,  base: "float", elemBits: 64, bits: 512}
-  - {class: mask, go: Mask8x16,  elemBits: 8,  bits: 128}
-  - {class: mask, go: Mask16x8,  elemBits: 16, bits: 128}
-  - {class: mask, go: Mask32x4,  elemBits: 32, bits: 128}
-  - {class: mask, go: Mask64x2,  elemBits: 64, bits: 128}
-  - {class: mask, go: Mask8x32,  elemBits: 8,  bits: 256}
-  - {class: mask, go: Mask16x16, elemBits: 16, bits: 256}
-  - {class: mask, go: Mask32x8,  elemBits: 32, bits: 256}
-  - {class: mask, go: Mask64x4,  elemBits: 64, bits: 256}
-  - {class: mask, go: Mask8x64,  elemBits: 8,  bits: 512}
-  - {class: mask, go: Mask16x32, elemBits: 16, bits: 512}
-  - {class: mask, go: Mask32x16, elemBits: 32, bits: 512}
-  - {class: mask, go: Mask64x8,  elemBits: 64, bits: 512}
-  - {class: immediate} # TODO
+  - {class: vreg, go: Int8x16,    base: "int",   elemBits: 8,  bits: 128, lanes: 16}
+  - {class: vreg, go: Uint8x16,   base: "uint",  elemBits: 8,  bits: 128, lanes: 16}
+  - {class: vreg, go: Int16x8,    base: "int",   elemBits: 16, bits: 128, lanes: 8}
+  - {class: vreg, go: Uint16x8,   base: "uint",  elemBits: 16, bits: 128, lanes: 8}
+  - {class: vreg, go: Int32x4,    base: "int",   elemBits: 32, bits: 128, lanes: 4}
+  - {class: vreg, go: Uint32x4,   base: "uint",  elemBits: 32, bits: 128, lanes: 4}
+  - {class: vreg, go: Int64x2,    base: "int",   elemBits: 64, bits: 128, lanes: 2}
+  - {class: vreg, go: Uint64x2,   base: "uint",  elemBits: 64, bits: 128, lanes: 2}
+  - {class: vreg, go: Float32x4,  base: "float", elemBits: 32, bits: 128, lanes: 4}
+  - {class: vreg, go: Float64x2,  base: "float", elemBits: 64, bits: 128, lanes: 2}
+  - {class: vreg, go: Int8x32,    base: "int",   elemBits: 8,  bits: 256, lanes: 32}
+  - {class: vreg, go: Uint8x32,   base: "uint",  elemBits: 8,  bits: 256, lanes: 32}
+  - {class: vreg, go: Int16x16,   base: "int",   elemBits: 16, bits: 256, lanes: 16}
+  - {class: vreg, go: Uint16x16,  base: "uint",  elemBits: 16, bits: 256, lanes: 16}
+  - {class: vreg, go: Int32x8,    base: "int",   elemBits: 32, bits: 256, lanes: 8}
+  - {class: vreg, go: Uint32x8,   base: "uint",  elemBits: 32, bits: 256, lanes: 8}
+  - {class: vreg, go: Int64x4,    base: "int",   elemBits: 64, bits: 256, lanes: 4}
+  - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 64, bits: 256, lanes: 4}
+  - {class: vreg, go: Float32x8,  base: "float", elemBits: 32, bits: 256, lanes: 8}
+  - {class: vreg, go: Float64x4,  base: "float", elemBits: 64, bits: 256, lanes: 4}
+  - {class: vreg, go: Int8x64,    base: "int",   elemBits: 8,  bits: 512, lanes: 64}
+  - {class: vreg, go: Uint8x64,   base: "uint",  elemBits: 8,  bits: 512, lanes: 64}
+  - {class: vreg, go: Int16x32,   base: "int",   elemBits: 16, bits: 512, lanes: 32}
+  - {class: vreg, go: Uint16x32,  base: "uint",  elemBits: 16, bits: 512, lanes: 32}
+  - {class: vreg, go: Int32x16,   base: "int",   elemBits: 32, bits: 512, lanes: 16}
+  - {class: vreg, go: Uint32x16,  base: "uint",  elemBits: 32, bits: 512, lanes: 16}
+  - {class: vreg, go: Int64x8,    base: "int",   elemBits: 64, bits: 512, lanes: 8}
+  - {class: vreg, go: Uint64x8,   base: "uint",  elemBits: 64, bits: 512, lanes: 8}
+  - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512, lanes: 16}
+  - {class: vreg, go: Float64x8,  base: "float", elemBits: 64, bits: 512, lanes: 8}
+  - {class: mask, go: Mask8x16,  base: "int", elemBits: 8,  bits: 128, lanes: 16}
+  - {class: mask, go: Mask16x8,  base: "int", elemBits: 16, bits: 128, lanes: 8}
+  - {class: mask, go: Mask32x4,  base: "int", elemBits: 32, bits: 128, lanes: 4}
+  - {class: mask, go: Mask64x2,  base: "int", elemBits: 64, bits: 128, lanes: 2}
+  - {class: mask, go: Mask8x32,  base: "int", elemBits: 8,  bits: 256, lanes: 32}
+  - {class: mask, go: Mask16x16, base: "int", elemBits: 16, bits: 256, lanes: 16}
+  - {class: mask, go: Mask32x8,  base: "int", elemBits: 32, bits: 256, lanes: 8}
+  - {class: mask, go: Mask64x4,  base: "int", elemBits: 64, bits: 256, lanes: 4}
+  - {class: mask, go: Mask8x64,  base: "int", elemBits: 8,  bits: 512, lanes: 64}
+  - {class: mask, go: Mask16x32, base: "int", elemBits: 16, bits: 512, lanes: 32}
+  - {class: mask, go: Mask32x16, base: "int", elemBits: 32, bits: 512, lanes: 16}
+  - {class: mask, go: Mask64x8,  base: "int", elemBits: 64, bits: 512, lanes: 8}
+  - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now.
 out: !repeat
 - *types
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index 292411cb..004a815f 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -47,12 +47,17 @@ func loadXED(xedPath string) []*unify.Value {
 			return
 		}
 		// TODO: "feature"
-		fields := []string{"goarch", "asm", "in", "out"}
+		fields := []string{"goarch", "asm", "in", "out", "extension"}
 		values := []*unify.Value{
 			unify.NewValue(unify.NewStringExact("amd64")),
 			unify.NewValue(unify.NewStringExact(inst.Opcode())),
 			unify.NewValue(ins),
 			unify.NewValue(outs),
+			unify.NewValue(unify.NewStringExact(inst.Extension)),
+		}
+		if strings.Contains(inst.Pattern, "ZEROING=0") {
+			fields = append(fields, "zeroing")
+			values = append(values, unify.NewValue(unify.NewStringExact("false")))
 		}
 		pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line}
 		defs = append(defs, unify.NewValuePos(unify.NewDef(fields, values), pos))
@@ -107,6 +112,8 @@ type operandVReg struct { // Vector register
 type operandMask struct {
 	operandCommon
 	vecShape
+	// Bits in the mask is w/bits.
+	allMasks bool
 }
 
 type operandImm struct {
@@ -137,17 +144,25 @@ func (o operandVReg) toValue() (fields []string, vals []*unify.Value) {
 	if err != nil {
 		panic("parsing baseRe: " + err.Error())
 	}
-	return []string{"class", "elemBits", "bits", "base"}, []*unify.Value{
+	fields, vals = []string{"class", "bits", "base"}, []*unify.Value{
 		strVal("vreg"),
-		strVal(o.elemBits),
 		strVal(o.bits),
 		unify.NewValue(baseDomain)}
+	if o.elemBits != o.bits {
+		fields, vals = append(fields, "elemBits"), append(vals, strVal(o.elemBits))
+	}
+	// otherwise it means the vector could be any shape.
+	return
 }
 
 func (o operandMask) toValue() (fields []string, vals []*unify.Value) {
 	return []string{"class", "elemBits", "bits"}, []*unify.Value{strVal("mask"), strVal(o.elemBits), strVal(o.bits)}
 }
 
+func (o operandMask) zeroMaskValue() (fields []string, vals []*unify.Value) {
+	return []string{"class"}, []*unify.Value{strVal("mask")}
+}
+
 func (o operandImm) toValue() (fields []string, vals []*unify.Value) {
 	return []string{"class", "bits"}, []*unify.Value{strVal("immediate"), strVal(o.bits)}
 }
@@ -256,6 +271,7 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 	inferMask := func(r, w bool) error {
 		var masks []int
 		var rSizes, wSizes, sizes []vecShape
+		allMasks := true
 		for i, op := range ops {
 			action := op.common().action
 			if _, ok := op.(operandMask); ok {
@@ -265,12 +281,15 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 				if action.r == r || action.w == w {
 					masks = append(masks, i)
 				}
-			} else if reg, ok := op.(operandVReg); ok {
-				if action.r {
-					rSizes = append(rSizes, reg.vecShape)
-				}
-				if action.w {
-					wSizes = append(wSizes, reg.vecShape)
+			} else {
+				allMasks = false
+				if reg, ok := op.(operandVReg); ok {
+					if action.r {
+						rSizes = append(rSizes, reg.vecShape)
+					}
+					if action.w {
+						wSizes = append(wSizes, reg.vecShape)
+					}
 				}
 			}
 		}
@@ -292,6 +311,15 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 		}
 
 		if len(sizes) == 0 {
+			// If all operands are masks, leave the mask inferrence to the users.
+			if allMasks {
+				for _, i := range masks {
+					m := ops[i].(operandMask)
+					m.allMasks = true
+					ops[i] = m
+				}
+				return nil
+			}
 			return fmt.Errorf("cannot infer mask size: no register operands")
 		}
 		shape, ok := singular(sizes)
@@ -315,6 +343,12 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 	var inVals, outVals []*unify.Value
 	for asmPos, op := range ops {
 		fields, values := op.toValue()
+		if opm, ok := op.(operandMask); ok {
+			if opm.allMasks {
+				// If all operands are masks, leave the mask inferrence to the users.
+				fields, values = opm.zeroMaskValue()
+			}
+		}
 
 		fields = append(fields, "asmPos")
 		values = append(values, unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))

From 99cca1d98223c070eb9c1236e346e562af4fd505 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 26 May 2025 21:06:00 +0000
Subject: [PATCH 058/200] internal/simdgen: complete defs for compares.

This CL generates codes that passed ./make.bash.

Change-Id: I7e645edd4eeda6322a669a427fa6164e7db18315
Reviewed-on: https://go-review.googlesource.com/c/arch/+/676415
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              | 67 ++++++++++++++++++
 internal/simdgen/gen_utility.go               |  3 -
 internal/simdgen/go.yaml                      | 30 +++++++-
 internal/simdgen/ops/Compares/categories.yaml | 69 ++++++++++++++++++-
 internal/simdgen/ops/Compares/go.yaml         | 30 +++++++-
 5 files changed, 191 insertions(+), 8 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 1f2fb056..80d4bf41 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -57,24 +57,91 @@
   extension: "AVX.*"
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
+# const imm predicate(holds for both float and int|uint):
+# 0: Equal
+# 1: Less
+# 2: LessEqual
+# 4: NotEqual
+# 5: GreaterEqual
+# 6: Greater
 - go: Equal
   constImm: 0
   commutative: "true"
   extension: "AVX.*"
+  documentation: "Predicate immediate is 0 if it has;"
+- go: Less
+  constImm: 1
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 1 if it has;"
+- go: LessEqual
+  constImm: 2
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 2 if it has;"
+- go: IsNan # For float only.
+  constImm: 3
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+- go: NotEqual
+  constImm: 4
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 4 if it has;"
+- go: GreaterEqual
+  constImm: 5
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 5 if it has;"
 - go: Greater
   constImm: 6
   commutative: "false"
   extension: "AVX.*"
+  documentation: "Predicate immediate is 6 if it has;"
+
 - go: MaskedEqual
   constImm: 0
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: "Predicate immediate is 0 if it has;"
+- go: MaskedLess
+  constImm: 1
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 1 if it has;"
+- go: MaskedLessEqual
+  constImm: 2
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 2 if it has;"
+- go: MaskedIsNan # For float only.
+  constImm: 3
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+- go: MaskedNotEqual
+  constImm: 4
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 4 if it has;"
+- go: MaskedGreaterEqual
+  constImm: 5
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 5 if it has;"
 - go: MaskedGreater
   constImm: 6
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
+  documentation: "Predicate immediate is 6 if it has;"
 - go: Div
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 1e822980..74ab0e9f 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -430,9 +430,6 @@ func overwrite(ops []Operation) error {
 			if op[idx].Class != "vreg" {
 				return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx])
 			}
-			if *op[idx].Base != "uint" && *op[idx].Base != "int" {
-				return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Base] from int|uint: %s", op[idx])
-			}
 			hasClassOverwrite = true
 			*op[idx].Base = oBase
 			op[idx].Class = oClass
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 0ec1ee0d..514f4540 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -247,6 +247,7 @@
   - *intToFloat
   out:
   - *intToFloat
+# Ints
 - go: Equal
   asm: "V?PCMPEQ[BWDQ]"
   in: &int2
@@ -279,7 +280,7 @@
   - class: mask
 # The const imm predicated compares after AVX512, please see categories.yaml
 # for const imm specification.
-- go: Masked(Equal|Greater)
+- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMP[BWDQ]"
   in:
   - class: mask
@@ -291,7 +292,7 @@
     const: 0 # Just a placeholder, will be overwritten by const imm porting.
   out:
   - class: mask
-- go: Masked(Equal|Greater)
+- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMPU[BWDQ]"
   in:
   - class: mask
@@ -303,6 +304,31 @@
     const: 0
   out:
   - class: mask
+
+# Floats
+- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan
+  asm: "VCMPP[SD]"
+  in:
+  - &float
+    go: $t
+    base: float
+  - *float
+  - class: immediate
+    const: 0
+  out:
+  - go: $t # We still need the output to be the same shape as inputs.
+    overwriteBase: uint
+    overwriteClass: mask
+- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
+  asm: "VCMPP[SD]"
+  in:
+  - class: mask
+  - *float
+  - *float
+  - class: immediate
+    const: 0
+  out:
+  - class: mask
 - go: Div
   asm: "V?DIVP[SD]"
   in:
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index cac97d4e..027c8e8d 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -1,19 +1,86 @@
 !sum
+# const imm predicate(holds for both float and int|uint):
+# 0: Equal
+# 1: Less
+# 2: LessEqual
+# 4: NotEqual
+# 5: GreaterEqual
+# 6: Greater
 - go: Equal
   constImm: 0
   commutative: "true"
   extension: "AVX.*"
+  documentation: "Predicate immediate is 0 if it has;"
+- go: Less
+  constImm: 1
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 1 if it has;"
+- go: LessEqual
+  constImm: 2
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 2 if it has;"
+- go: IsNan # For float only.
+  constImm: 3
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+- go: NotEqual
+  constImm: 4
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 4 if it has;"
+- go: GreaterEqual
+  constImm: 5
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 5 if it has;"
 - go: Greater
   constImm: 6
   commutative: "false"
   extension: "AVX.*"
+  documentation: "Predicate immediate is 6 if it has;"
+
 - go: MaskedEqual
   constImm: 0
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: "Predicate immediate is 0 if it has;"
+- go: MaskedLess
+  constImm: 1
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 1 if it has;"
+- go: MaskedLessEqual
+  constImm: 2
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 2 if it has;"
+- go: MaskedIsNan # For float only.
+  constImm: 3
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+- go: MaskedNotEqual
+  constImm: 4
+  masked: "true"
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 4 if it has;"
+- go: MaskedGreaterEqual
+  constImm: 5
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 5 if it has;"
 - go: MaskedGreater
   constImm: 6
   masked: "true"
   commutative: "false"
-  extension: "AVX.*"
\ No newline at end of file
+  extension: "AVX.*"
+  documentation: "Predicate immediate is 6 if it has;"
\ No newline at end of file
diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml
index f0a8cab1..c3a52394 100644
--- a/internal/simdgen/ops/Compares/go.yaml
+++ b/internal/simdgen/ops/Compares/go.yaml
@@ -1,4 +1,5 @@
 !sum
+# Ints
 - go: Equal
   asm: "V?PCMPEQ[BWDQ]"
   in: &int2
@@ -31,7 +32,7 @@
   - class: mask
 # The const imm predicated compares after AVX512, please see categories.yaml
 # for const imm specification.
-- go: Masked(Equal|Greater)
+- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMP[BWDQ]"
   in:
   - class: mask
@@ -43,7 +44,7 @@
     const: 0 # Just a placeholder, will be overwritten by const imm porting.
   out:
   - class: mask
-- go: Masked(Equal|Greater)
+- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMPU[BWDQ]"
   in:
   - class: mask
@@ -55,3 +56,28 @@
     const: 0
   out:
   - class: mask
+
+# Floats
+- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan
+  asm: "VCMPP[SD]"
+  in:
+  - &float
+    go: $t
+    base: float
+  - *float
+  - class: immediate
+    const: 0
+  out:
+  - go: $t # We still need the output to be the same shape as inputs.
+    overwriteBase: uint
+    overwriteClass: mask
+- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
+  asm: "VCMPP[SD]"
+  in:
+  - class: mask
+  - *float
+  - *float
+  - class: immediate
+    const: 0
+  out:
+  - class: mask
\ No newline at end of file

From 6a7b46808a5ff3b74d86db551fcfea7707676022 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 27 May 2025 15:59:22 -0400
Subject: [PATCH 059/200] internal/simd: tweak command line flags

This change the command line flag -godefroot to -goroot
so it matches the "Code generated by" comment at the top.

Change-Id: Iee8044fe6573d4d87a53181ac635dadbee4a9843
Reviewed-on: https://go-review.googlesource.com/c/arch/+/676497
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Auto-Submit: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/main.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index 84d8a92f..b0ae52cb 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -104,8 +104,8 @@ import (
 var (
 	xedPath               = flag.String("xedPath", "", "load XED datafiles from `path`")
 	flagQ                 = flag.String("q", "", "query: read `def` as another input (skips final validation)")
-	flagO                 = flag.String("o", "yaml", "output type: yaml, godefs")
-	flagGoDefRoot         = flag.String("godefroot", ".", "the path to the directory containing the generated godefs")
+	flagO                 = flag.String("o", "yaml", "output type: yaml, godefs (generate definitions into a Go source tree")
+	flagGoDefRoot         = flag.String("goroot", ".", "the path to the Go dev directory that will receive the generated files")
 	FlagNoDedup           = flag.Bool("nodedup", false, "disable deduplicating godefs of 2 qualifying operations from different extensions")
 	FlagNoSplitMask       = flag.Bool("nosplitmask", false, "disable splitting the masks to const and non const")
 	FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand")

From 2a0236b5d14f8fb59383ca43b1c267e09088dfca Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 28 May 2025 17:14:47 -0400
Subject: [PATCH 060/200] internal/simdgen: change simd package to "simd"; add
 test

this puts the heavyweight end-to-end test in the repo,
so we can be sure we agree on what the test is/does.

Change-Id: I7f31835594bdd6571a6fa682cd8c2b22fb833e03
Reviewed-on: https://go-review.googlesource.com/c/arch/+/676757
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/etetest.sh            | 21 +++++++++++++++++++++
 internal/simdgen/gen_simdIntrinsics.go | 21 +++++++++++----------
 internal/simdgen/gen_simdTypes.go      |  4 ++--
 internal/simdgen/main.go               |  2 ++
 4 files changed, 36 insertions(+), 12 deletions(-)
 create mode 100755 internal/simdgen/etetest.sh

diff --git a/internal/simdgen/etetest.sh b/internal/simdgen/etetest.sh
new file mode 100755
index 00000000..f83b6b37
--- /dev/null
+++ b/internal/simdgen/etetest.sh
@@ -0,0 +1,21 @@
+#!/bin/bash -x
+
+cat <<\\EOF
+
+This is an end-to-end test of Go SIMD. It checks out a fresh Go
+repository from the go.simd branch, then generates the SIMD input
+files and runs simdgen writing into the fresh repository.
+
+After that it generates the modified ssa pattern matching files, then
+builds the compiler.
+
+\EOF
+
+rm -rf go-test
+git clone https://go.googlesource.com/go -b dev.simd go-test
+go generate
+go run . -xedPath xeddata  -o godefs -goroot ./go-test  go.yaml types.yaml categories.yaml
+(cd go-test/src/cmd/compile/internal/ssa/_gen ; go run *.go )
+(cd go-test/src ; GOEXPERIMENT=simd  ./make.bash )
+(cd go-test/bin; b=`pwd` ; cd ../src/simd/testdata; GOARCH=amd64 $b/go run .)
+# next, add some tests of SIMD itself
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 5d4a27f2..93174937 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -18,32 +18,33 @@ import (
 	"cmd/internal/sys"
 )
 
+const simdPackage = "` + simdPackage + `"
 func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
 {{- range .OpsLen1}}
-	addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
 {{- end}}
 {{- range .OpsLen2}}
-	addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
 {{- end}}
 {{- range .OpsLen3}}
-	addF("internal/simd", "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
 {{- end}}
 
 {{- range .VectorConversions }}
-	addF("internal/simd", "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
+	addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
 {{- end}}
 
 {{- range $size, $ts := .TypeMap }}
 {{- range $t := $ts }}
-	addF("internal/simd", "Load{{$t.Name}}", simdLoad(), sys.AMD64)
-	addF("internal/simd", "{{$t.Name}}.Store", simdStore(), sys.AMD64)
+	addF(simdPackage, "Load{{$t.Name}}", simdLoad(), sys.AMD64)
+	addF(simdPackage, "{{$t.Name}}.Store", simdStore(), sys.AMD64)
 {{- end}}
 {{- end}}
 {{- range .Masks }}
-	addF("internal/simd", "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
-	addF("internal/simd", "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
-	addF("internal/simd", "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
-	addF("internal/simd", "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
+	addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
+	addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
+	addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
+	addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
 {{- end}}
 }
 
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 14395010..d06bb25a 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -179,7 +179,7 @@ func masksFromTypeMap(typeMap simdTypeMap) []simdType {
 // writeSIMDTypes generates the simd vector type and writes it to types_amd64.go
 // within the specified directory.
 func writeSIMDTypes(directory string, typeMap simdTypeMap) error {
-	file, t, err := openFileAndPrepareTemplate(directory, "src/internal/simd/types_amd64.go", simdTypesTmpl)
+	file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/types_amd64.go", simdTypesTmpl)
 	if err != nil {
 		return err
 	}
@@ -200,7 +200,7 @@ func writeSIMDTypes(directory string, typeMap simdTypeMap) error {
 // writeSIMDStubs generates the simd vector intrinsic stubs and writes it to stubs_amd64.go
 // within the specified directory.
 func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) error {
-	file, t, err := openFileAndPrepareTemplate(directory, "src/internal/simd/stubs_amd64.go", simdStubsTmpl)
+	file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/stubs_amd64.go", simdStubsTmpl)
 	if err != nil {
 		return err
 	}
diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index b0ae52cb..14bf9b8f 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -117,6 +117,8 @@ var (
 	FlagReportDup  = flag.Bool("reportdup", false, "report the duplicate godefs")
 )
 
+const simdPackage = "simd"
+
 func main() {
 	flag.Parse()
 

From 88f5c58204869f4dcc4c6ca72feab854d1ed6b7e Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Wed, 28 May 2025 21:16:14 +0000
Subject: [PATCH 061/200] internal/simdgen: Support more op shapes, add more
 ops

This CL tries to support FP only arithmetic operations.
To facilitate this it also amend simdgen to support more op shapes.
This CL also added logics to exclude input and output sharing the same
register case. This will be a TODO for simdgen.

Change-Id: Ied981bfb53663d060a117e3c3ff1b82494b743fb
Reviewed-on: https://go-review.googlesource.com/c/arch/+/676996
Reviewed-by: David Chase <drchase@google.com>
Commit-Queue: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              |  29 +++-
 internal/simdgen/gen_simdMachineOps.go        |   6 +-
 internal/simdgen/gen_simdrules.go             |  89 +++++++-----
 internal/simdgen/gen_simdssa.go               | 130 +++++++-----------
 internal/simdgen/gen_utility.go               |  31 +----
 internal/simdgen/go.yaml                      |  33 ++++-
 .../simdgen/ops/FPonlyArith/categories.yaml   |  31 ++++-
 internal/simdgen/ops/FPonlyArith/go.yaml      |  35 ++++-
 8 files changed, 222 insertions(+), 162 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 80d4bf41..c28926c8 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -143,10 +143,35 @@
   extension: "AVX.*"
   documentation: "Predicate immediate is 6 if it has;"
 - go: Div
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: MaskedDiv
-  commutative: "true"
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+- go: Sqrt
+  commutative: "false"
+  extension: "AVX.*"
+- go: MaskedSqrt
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+- go: ApproximateReciprocal
+  commutative: "false"
+  extension: "AVX.*"
+- go: MaskedApproximateReciprocal
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+- go: ApproximateReciprocalOfSqrt
+  commutative: "false"
+  extension: "AVX.*"
+- go: MaskedApproximateReciprocalOfSqrt
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+- go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated.
+  commutative: "false"
   masked: "true"
   extension: "AVX.*"
 - go: Mul
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 0deec9c6..ec3eaba9 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -12,7 +12,7 @@ import (
 const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 package main
 
-func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp2m1fp1, fp2m1m1 regInfo) []opData {
+func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp1m1fp1, fp2m1fp1, fp2m1m1 regInfo) []opData {
 	return []opData{
 {{- range .OpsData }}
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}"},
@@ -46,7 +46,7 @@ func writeSIMDMachineOps(directory string, ops []Operation) error {
 		OpsDataImm []opData
 	}
 	seen := map[string]struct{}{}
-	regInfoSet := map[string]bool{"fp1fp1": true, "fp2fp1": true, "fp2m1": true, "fp2m1fp1": true, "fp2m1m1": true}
+	regInfoSet := map[string]bool{"fp1fp1": true, "fp2fp1": true, "fp2m1": true, "fp2m1fp1": true, "fp2m1m1": true, "fp1m1fp1": true}
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
@@ -107,7 +107,7 @@ func writeSIMDMachineOps(directory string, ops []Operation) error {
 		} else if shapeOut == OneKmaskOut {
 			outType = "Mask"
 		} else {
-			return fmt.Errorf("simdgen does not recognize this output shape: %+v", shapeOut)
+			return fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut)
 		}
 		if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn {
 			opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType})
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index c368c770..172282eb 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -20,28 +20,28 @@ const simdrulesTmpl = `// Code generated by x/arch/internal/simdgen using 'go ru
 // Masks are always at the end, immediates always at the beginning.
 
 {{- range .Ops }}
-{{if eq (len .In) 1}}({{.Go}}{{(index .In 0).Go}} x) => ({{.Asm}} x){{end}}{{if eq (len .In) 2}}({{.Go}}{{(index .In 0).Go}} x y) => ({{.Asm}} y x){{end}}
+({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}}) => ({{.Op.Asm}} {{.ReverseArgs}})
 {{- end }}
 {{- range .OpsImm }}
-({{.Go}}{{(index .In 1).Go}} x y) => ({{.Asm}} [{{(index .In 0).Const}}] y x)
+({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}}) => ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}})
 {{- end }}
 {{- range .OpsMask}}
-({{.Go}}{{(index .In 0).Go}} x y z) => ({{.Asm}} y x (VPMOVVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}}ToM <types.TypeMask> z))
+({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}} mask) => ({{.Op.Asm}} {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}}ToM <types.TypeMask> mask))
 {{- end }}
 {{- range .OpsImmMask}}
-({{.Go}}{{(index .In 1).Go}} x y z) => ({{.Asm}} [{{(index .In 0).Const}}] y x (VPMOVVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}}ToM <types.TypeMask> z))
+({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}} mask) => ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}}ToM <types.TypeMask> mask))
 {{- end }}
 {{- range .OpsMaskOut}}
-({{.Go}}{{(index .In 0).Go}} x y) => (VPMOVMToVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}} ({{.Asm}} y x))
+({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}}) => (VPMOVMToVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}} ({{.Op.Asm}} {{.ReverseArgs}}))
 {{- end }}
 {{- range .OpsImmInMaskOut}}
-({{.Go}}{{(index .In 1).Go}} x y) => (VPMOVMToVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}} ({{.Asm}} [{{(index .In 0).Const}}] y x))
+({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}}) => (VPMOVMToVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}} ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}}))
 {{- end }}
 {{- range .OpsMaskInMaskOut}}
-({{.Go}}{{(index .In 0).Go}} x y z) => (VPMOVMToVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}} ({{.Asm}} y x (VPMOVVec{{(index .In 0).ElemBits}}x{{(index .In 0).Lanes}}ToM <types.TypeMask> z)))
+({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}} mask) => (VPMOVMToVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}} ({{.Op.Asm}} {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}}ToM <types.TypeMask> mask)))
 {{- end }}
 {{- range .OpsImmMaskInMaskOut}}
-({{.Go}}{{(index .In 1).Go}} x y z) => (VPMOVMToVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}} ({{.Asm}} [{{(index .In 0).Const}}] y x (VPMOVVec{{(index .In 1).ElemBits}}x{{(index .In 1).Lanes}}ToM <types.TypeMask> z)))
+({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}} mask) => (VPMOVMToVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}} ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}}ToM <types.TypeMask> mask)))
 {{- end }}
 `
 
@@ -53,35 +53,52 @@ func writeSIMDRules(directory string, ops []Operation) error {
 		return err
 	}
 	defer file.Close()
-	Ops := make([]Operation, 0)
-	OpsImm := make([]Operation, 0)
-	OpsMask := make([]Operation, 0)
-	OpsImmMask := make([]Operation, 0)
-	OpsMaskOut := make([]Operation, 0)
-	OpsImmInMaskOut := make([]Operation, 0)
-	OpsMaskInMaskOut := make([]Operation, 0)
-	OpsImmMaskInMaskOut := make([]Operation, 0)
+	type OpAndArgList struct {
+		Op          Operation
+		Args        string // "x y", does not include masks
+		ReverseArgs string // "y x", does not include masks
+	}
+	Ops := make([]OpAndArgList, 0)
+	OpsImm := make([]OpAndArgList, 0)
+	OpsMask := make([]OpAndArgList, 0)
+	OpsImmMask := make([]OpAndArgList, 0)
+	OpsMaskOut := make([]OpAndArgList, 0)
+	OpsImmInMaskOut := make([]OpAndArgList, 0)
+	OpsMaskInMaskOut := make([]OpAndArgList, 0)
+	OpsImmMaskInMaskOut := make([]OpAndArgList, 0)
 
 	for _, op := range ops {
-		opInShape, opOutShape, maskType, _, op, _, err := op.shape()
+		opInShape, opOutShape, maskType, _, op, gOp, err := op.shape()
 		if err != nil {
 			return err
 		}
+		vregInCnt := len(gOp.In)
 		if maskType == OneMask {
 			op.Asm += "Masked"
+			vregInCnt--
 		}
 		op.Asm = fmt.Sprintf("%s%d", op.Asm, *op.Out[0].Bits)
+		opData := OpAndArgList{Op: op}
+		if vregInCnt == 1 {
+			opData.Args = "x"
+			opData.ReverseArgs = "x"
+		} else if vregInCnt == 2 {
+			opData.Args = "x y"
+			opData.ReverseArgs = "y x"
+		} else {
+			return fmt.Errorf("simdgen does not support more than 2 vreg in inputs")
+		}
 		// If class overwrite is happening, that's not really a mask but a vreg.
 		if opOutShape == OneVregOut || op.Out[0].OverwriteClass != nil {
 			switch opInShape {
 			case PureVregIn:
-				Ops = append(Ops, op)
+				Ops = append(Ops, opData)
 			case OneKmaskIn:
-				OpsMask = append(OpsMask, op)
+				OpsMask = append(OpsMask, opData)
 			case OneConstImmIn:
-				OpsImm = append(OpsImm, op)
+				OpsImm = append(OpsImm, opData)
 			case OneKmaskConstImmIn:
-				OpsImmMask = append(OpsImmMask, op)
+				OpsImmMask = append(OpsImmMask, opData)
 			case PureKmaskIn:
 				return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")
 			}
@@ -89,22 +106,22 @@ func writeSIMDRules(directory string, ops []Operation) error {
 			// OneKmaskOut case
 			switch opInShape {
 			case PureVregIn:
-				OpsMaskOut = append(OpsMaskOut, op)
+				OpsMaskOut = append(OpsMaskOut, opData)
 			case OneKmaskIn:
-				OpsMaskInMaskOut = append(OpsMaskInMaskOut, op)
+				OpsMaskInMaskOut = append(OpsMaskInMaskOut, opData)
 			case OneConstImmIn:
-				OpsImmInMaskOut = append(OpsImmInMaskOut, op)
+				OpsImmInMaskOut = append(OpsImmInMaskOut, opData)
 			case OneKmaskConstImmIn:
-				OpsImmMaskInMaskOut = append(OpsImmMaskInMaskOut, op)
+				OpsImmMaskInMaskOut = append(OpsImmMaskInMaskOut, opData)
 			case PureKmaskIn:
 				return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")
 			}
 		}
 	}
-	sortKey := func(op *Operation) string {
-		return *op.In[0].Go + op.Go
+	sortKey := func(op *OpAndArgList) string {
+		return *op.Op.In[0].Go + op.Op.Go
 	}
-	sortBySortKey := func(ops []Operation) {
+	sortBySortKey := func(ops []OpAndArgList) {
 		sort.Slice(ops, func(i, j int) bool {
 			return sortKey(&ops[i]) < sortKey(&ops[j])
 		})
@@ -119,14 +136,14 @@ func writeSIMDRules(directory string, ops []Operation) error {
 	sortBySortKey(OpsImmMaskInMaskOut)
 
 	type templateData struct {
-		Ops                 []Operation
-		OpsImm              []Operation
-		OpsMask             []Operation
-		OpsImmMask          []Operation
-		OpsMaskOut          []Operation
-		OpsImmInMaskOut     []Operation
-		OpsMaskInMaskOut    []Operation
-		OpsImmMaskInMaskOut []Operation
+		Ops                 []OpAndArgList
+		OpsImm              []OpAndArgList
+		OpsMask             []OpAndArgList
+		OpsImmMask          []OpAndArgList
+		OpsMaskOut          []OpAndArgList
+		OpsImmInMaskOut     []OpAndArgList
+		OpsMaskInMaskOut    []OpAndArgList
+		OpsImmMaskInMaskOut []OpAndArgList
 	}
 
 	err = t.Execute(file, templateData{
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index 10222bc7..92bfed79 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -23,18 +23,18 @@ import (
 func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 	p := s.Prog(v.Op.Asm())
 	// First arg
-	switch v.Op {{"{"}}{{if gt (len .ImmFirst) 0}}
-	// Imm
-	case {{.ImmFirst}}:
+	switch v.Op {{"{"}}{{if gt (len .Imms) 0}}
+	// Immediates
+	case {{.Imms}}:
 		imm := v.AuxInt
 		if imm < 0 || imm > 255 {
 			v.Fatalf("Invalid source selection immediate")
 		}
 		p.From.Offset = imm
 		p.From.Type = obj.TYPE_CONST
-{{end}}{{if gt (len .VregFirst) 0}}
-	// vreg
-	case {{.VregFirst}}:
+{{end}}{{if gt (len .Reg0) 0}}
+	// Registers
+	case {{.Reg0}}:
 		p.From.Type = obj.TYPE_REG
 		p.From.Reg = simdReg(v.Args[0])
 {{end}}
@@ -44,9 +44,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 	}
 
 	// Second arg
-	switch v.Op {{"{"}}{{if gt (len .VregSecond) 0}}
-	// vreg
-	case {{.VregSecond}}:
+	switch v.Op {{"{"}}{{if gt (len .Reg1) 0}}
+	// Registers
+	case {{.Reg1}}:
 		if p.From.Type == obj.TYPE_CONST {
 			p.AddRestSourceReg(simdReg(v.Args[0]))
 		} else {
@@ -55,43 +55,31 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 	}
 
 	// Third arg
-	switch v.Op {{"{"}}{{if gt (len .VregThird) 0}}
-	// vreg
-	case {{.VregThird}}:
+	switch v.Op {{"{"}}{{if gt (len .Reg2) 0}}
+	// Registers
+	case {{.Reg2}}:
 		if p.From.Type == obj.TYPE_CONST {
 			p.AddRestSourceReg(simdReg(v.Args[1]))
 		} else {
 			p.AddRestSourceReg(simdReg(v.Args[2]))
-		}
-{{end}}{{if gt (len .MaskThird) 0}}
-	// k mask
-	case {{.MaskThird}}:
-		if p.From.Type == obj.TYPE_CONST {
-			p.AddRestSourceReg(v.Args[1].Reg())
-		} else {
-			p.AddRestSourceReg(v.Args[2].Reg())
 		}{{end}}
 	}
 
 	// Fourth arg
-	switch v.Op {{"{"}}{{if gt (len .MaskFourth) 0}}
-	case {{.MaskFourth}}:
+	switch v.Op {{"{"}}{{if gt (len .Reg3) 0}}
+	case {{.Reg3}}:
 		if p.From.Type == obj.TYPE_CONST {
-			p.AddRestSourceReg(v.Args[2].Reg())
+			p.AddRestSourceReg(simdReg(v.Args[2]))
 		} else {
-			p.AddRestSourceReg(v.Args[3].Reg())
+			p.AddRestSourceReg(simdReg(v.Args[3]))
 		}{{end}}
 	}
 
 	// Output
-	switch v.Op {{"{"}}{{if gt (len .VregOut) 0}}
-	case {{.VregOut}}:
+	switch v.Op {{"{"}}{{if gt (len .All) 0}}
+	case {{.All}}:
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = simdReg(v)
-{{end}}{{if gt (len .MaskOut) 0}}
-	case {{.MaskOut}}:
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = v.Reg()
 {{end}}
 	default:
 		// One result is required.
@@ -111,20 +99,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 // writeSIMDSSA generates the ssa to prog lowering codes and writes it to simdssa.go
 // within the specified directory.
 func writeSIMDSSA(directory string, ops []Operation) error {
-	var ImmFirst []string
-	var VregFirst []string
-	var VregSecond []string
-	var MaskThird []string
-	var VregThird []string
-	var MaskFourth []string
-	var VregOut []string
-	var MaskOut []string
+	var Imms []string
+	var All []string
 	var ZeroingMask []string
+	Regs := map[int][]string{}
 
 	seen := map[string]struct{}{}
 	for _, op := range ops {
 		asm := op.Asm
-		shapeIn, shapeOut, maskType, _, _, gOp, err := op.shape()
+		shapeIn, _, maskType, _, _, gOp, err := op.shape()
 		if err != nil {
 			return err
 		}
@@ -137,61 +120,40 @@ func writeSIMDSSA(directory string, ops []Operation) error {
 		}
 		seen[asm] = struct{}{}
 		caseStr := fmt.Sprintf("ssa.OpAMD64%s", asm)
-		if shapeIn == PureVregIn || shapeIn == PureKmaskIn {
-			// Masks and vreg are handled together by simdReg()
-			VregFirst = append(VregFirst, caseStr)
-			if len(gOp.In) > 1 {
-				VregSecond = append(VregSecond, caseStr)
-			}
-		} else if shapeIn == OneKmaskIn {
-			VregFirst = append(VregFirst, caseStr)
-			VregSecond = append(VregSecond, caseStr)
-			MaskThird = append(MaskThird, caseStr)
-			if gOp.Zeroing == nil {
-				ZeroingMask = append(ZeroingMask, caseStr)
-			}
-		} else if shapeIn == OneConstImmIn {
-			ImmFirst = append(ImmFirst, caseStr)
-			VregSecond = append(VregSecond, caseStr)
-			VregThird = append(VregThird, caseStr)
-		} else {
-			// OneKmaskConstImmIn case
-			ImmFirst = append(ImmFirst, caseStr)
-			VregSecond = append(VregSecond, caseStr)
-			VregThird = append(VregThird, caseStr)
-			MaskFourth = append(MaskFourth, caseStr)
+		if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn {
 			if gOp.Zeroing == nil {
 				ZeroingMask = append(ZeroingMask, caseStr)
 			}
 		}
-		if shapeOut == OneVregOut || gOp.Out[0].OverwriteClass != nil {
-			// If class overwrite is happening, that's not really a mask but a vreg.
-			VregOut = append(VregOut, caseStr)
-		} else {
-			// OneKmaskOut case
-			MaskOut = append(MaskOut, caseStr)
+		immCount := 0
+		if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn {
+			immCount++
+			Imms = append(Imms, caseStr)
+		}
+		for i := range len(gOp.In) {
+			if i > 2 {
+				return fmt.Errorf("simdgen does not recognize more than 3 registers: %s", gOp)
+			}
+			Regs[i+immCount] = append(Regs[i+immCount], caseStr)
 		}
+		All = append(All, caseStr)
 	}
 
 	data := struct {
-		ImmFirst    string
-		VregFirst   string
-		VregSecond  string
-		MaskThird   string
-		VregThird   string
-		MaskFourth  string
-		VregOut     string
-		MaskOut     string
+		Imms        string
+		Reg0        string
+		Reg1        string
+		Reg2        string
+		Reg3        string
+		All         string
 		ZeroingMask string
 	}{
-		strings.Join(ImmFirst, ", "),
-		strings.Join(VregFirst, ", "),
-		strings.Join(VregSecond, ", "),
-		strings.Join(MaskThird, ", "),
-		strings.Join(VregThird, ", "),
-		strings.Join(MaskFourth, ", "),
-		strings.Join(VregOut, ", "),
-		strings.Join(MaskOut, ", "),
+		strings.Join(Imms, ", "),
+		strings.Join(Regs[0], ", "),
+		strings.Join(Regs[1], ", "),
+		strings.Join(Regs[2], ", "),
+		strings.Join(Regs[3], ", "),
+		strings.Join(All, ", "),
 		strings.Join(ZeroingMask, ", "),
 	}
 
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 74ab0e9f..53362e61 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -91,7 +91,9 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper
 		err = fmt.Errorf("simdgen only supports 1 output: %s", op)
 		return
 	}
+	var outputReg int
 	if len(op.Out) == 1 {
+		outputReg = op.Out[0].AsmPos
 		if op.Out[0].Class == "vreg" {
 			shapeOut = OneVregOut
 		} else if op.Out[0].Class == "mask" {
@@ -112,6 +114,10 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper
 	iConstMask := -1
 	hasVreg := false
 	for i, in := range op.In {
+		if in.AsmPos == outputReg {
+			err = fmt.Errorf("simdgen doesn't support output and input sharing the same position: %s", op)
+			return
+		}
 		if in.Class == "immediate" {
 			// A manual check on XED data found that AMD64 SIMD instructions at most
 			// have 1 immediates. So we don't need to check this here.
@@ -208,31 +214,6 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper
 			return
 		}
 	}
-	// Exclude some shape combination that are not yet supported in simdssa.go
-	if shapeIn == PureVregIn {
-		if len(opNoConstImmMask.In) > 2 {
-			err = fmt.Errorf("simdgen doesn't support more than 2 vreg args: %s", op)
-			return
-		}
-	}
-	if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn {
-		if len(opNoConstImmMask.In) != 3 {
-			err = fmt.Errorf("simdgen only supports mask operations with 2 vreg args: %s", op)
-			return
-		}
-	}
-	if shapeIn == OneConstImmIn {
-		if len(opNoConstImmMask.In) != 2 {
-			err = fmt.Errorf("simdgen only supports immediate operations with 2 vreg args: %s", op)
-			return
-		}
-	}
-	if shapeIn == PureKmaskIn {
-		if len(opNoConstImmMask.In) != 2 {
-			err = fmt.Errorf("simdgen only supports pure k mask operations with 2 vreg args: %s", op)
-			return
-		}
-	}
 	return
 }
 
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 514f4540..157bc3ef 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -331,21 +331,46 @@
   - class: mask
 - go: Div
   asm: "V?DIVP[SD]"
-  in:
+  in: &2fp
   - &fp
     go: $t
     base: float
   - *fp
-  out:
+  out: &1fp
   - *fp
 - go: MaskedDiv
   asm: "V?DIVP[SD]"
-  in:
+  in: &1mask2fp
   - class: mask
   - *fp
   - *fp
-  out:
+  out: *1fp
+- go: Sqrt
+  asm: "V?SQRTP[SD]"
+  in: *1fp
+  out: *1fp
+- go: MaskedSqrt
+  asm: "V?SQRTP[SD]"
+  in: &1mask1fp
+  - class: mask
   - *fp
+  out: *1fp
+- go: MaskedApproximateReciprocal
+  asm: "VRCP14P[SD]"
+  in: *1mask1fp
+  out: *1fp
+- go: ApproximateReciprocalOfSqrt
+  asm: "V?RSQRTPS"
+  in: *1fp
+  out: *1fp
+- go: MaskedApproximateReciprocalOfSqrt
+  asm: "VRSQRT14P[SD]"
+  in: *1mask1fp
+  out: *1fp
+- go: MaskedMulByPowOf2
+  asm: "VSCALEFP[SD]"
+  in: *1mask2fp
+  out: *1fp
 # "Normal" multiplication is only available for floats.
 # This only covers the single and double precision.
 - go: Mul
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 9166f1fa..3c46f1f4 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -1,8 +1,33 @@
 !sum
 - go: Div
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: MaskedDiv
-  commutative: "true"
+  commutative: "false"
   masked: "true"
-  extension: "AVX.*"
\ No newline at end of file
+  extension: "AVX.*"
+- go: Sqrt
+  commutative: "false"
+  extension: "AVX.*"
+- go: MaskedSqrt
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+- go: ApproximateReciprocal
+  commutative: "false"
+  extension: "AVX.*"
+- go: MaskedApproximateReciprocal
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+- go: ApproximateReciprocalOfSqrt
+  commutative: "false"
+  extension: "AVX.*"
+- go: MaskedApproximateReciprocalOfSqrt
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+- go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated.
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml
index 4c74d253..bd774e1d 100644
--- a/internal/simdgen/ops/FPonlyArith/go.yaml
+++ b/internal/simdgen/ops/FPonlyArith/go.yaml
@@ -1,18 +1,43 @@
 !sum
 - go: Div
   asm: "V?DIVP[SD]"
-  in:
+  in: &2fp
   - &fp
     go: $t
     base: float
   - *fp
-  out:
+  out: &1fp
   - *fp
 - go: MaskedDiv
   asm: "V?DIVP[SD]"
-  in:
+  in: &1mask2fp
   - class: mask
   - *fp
   - *fp
-  out:
-  - *fp
\ No newline at end of file
+  out: *1fp
+- go: Sqrt
+  asm: "V?SQRTP[SD]"
+  in: *1fp
+  out: *1fp
+- go: MaskedSqrt
+  asm: "V?SQRTP[SD]"
+  in: &1mask1fp
+  - class: mask
+  - *fp
+  out: *1fp
+- go: MaskedApproximateReciprocal
+  asm: "VRCP14P[SD]"
+  in: *1mask1fp
+  out: *1fp
+- go: ApproximateReciprocalOfSqrt
+  asm: "V?RSQRTPS"
+  in: *1fp
+  out: *1fp
+- go: MaskedApproximateReciprocalOfSqrt
+  asm: "VRSQRT14P[SD]"
+  in: *1mask1fp
+  out: *1fp
+- go: MaskedMulByPowOf2
+  asm: "VSCALEFP[SD]"
+  in: *1mask2fp
+  out: *1fp
\ No newline at end of file

From ff8bee83384752b8dc32f89c347d41f511f44ae8 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Thu, 29 May 2025 16:55:50 +0000
Subject: [PATCH 062/200] internal/simd: fix gofmt issues in text template

Add a newline to mute "gofmt" error when git committing the generated
codes.

Change-Id: I37eeab52ce32ba4badc9c9367ce8ae770acd44c9
Reviewed-on: https://go-review.googlesource.com/c/arch/+/677275
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdIntrinsics.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 93174937..377026b9 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -19,6 +19,7 @@ import (
 )
 
 const simdPackage = "` + simdPackage + `"
+
 func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
 {{- range .OpsLen1}}
 	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)

From 5a34366aa3f831bf652de26d641702aed58b7d78 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 30 May 2025 02:58:12 +0000
Subject: [PATCH 063/200] internal/simdgen: add build tag to simd package

Change-Id: I384d188b31a597177555e215e6e2827b802207c7
Reviewed-on: https://go-review.googlesource.com/c/arch/+/677279
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdTypes.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index d06bb25a..2b1a5b2f 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -29,6 +29,8 @@ type simdTypePair struct {
 
 const simdTypesTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 
+//go:build goexperiment.simd
+
 package simd
 
 {{- range $size, $ts := .TypeMap }}
@@ -64,6 +66,8 @@ func (x {{$tsrc.Name}}) Store(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}})
 
 const simdStubsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 
+//go:build goexperiment.simd
+
 package simd
 
 {{- range .OpsLen1}}

From e201ba44648b5eb5478e4061f5a3a9d2005f0d11 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 30 May 2025 02:39:02 +0000
Subject: [PATCH 064/200] internal/simdgen: add min/max instructions support

Change-Id: Ia784578d2b815f6b76fe3c165f2f4c4e88c0f1f2
Reviewed-on: https://go-review.googlesource.com/c/arch/+/677197
Auto-Submit: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml            | 14 ++++
 internal/simdgen/go.yaml                    | 74 ++++++++++++++++++++
 internal/simdgen/ops/MinMax/categories.yaml | 15 +++++
 internal/simdgen/ops/MinMax/go.yaml         | 75 +++++++++++++++++++++
 4 files changed, 178 insertions(+)
 create mode 100644 internal/simdgen/ops/MinMax/categories.yaml
 create mode 100644 internal/simdgen/ops/MinMax/go.yaml

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index c28926c8..ca278805 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -174,6 +174,20 @@
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+- go: Max
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedMax
+  commutative: "true"
+  masked: "true"
+  extension: "AVX.*"
+- go: Min
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedMin
+  commutative: "true"
+  masked: "true"
+  extension: "AVX.*"
 - go: Mul
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 157bc3ef..a78f3614 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -371,6 +371,80 @@
   asm: "VSCALEFP[SD]"
   in: *1mask2fp
   out: *1fp
+- go: Max
+  asm: "V?PMAXS[BWDQ]"
+  in: &2int
+  - &int
+    go: $t
+    base: int
+  - *int
+  out: &1int
+  - *int
+- go: Max
+  asm: "V?PMAXU[BWDQ]"
+  in: &2uint
+  - &uint
+    go: $t
+    base: uint
+  - *uint
+  out: &1uint
+  - *uint
+- go: MaskedMax
+  asm: "V?PMAXS[BWDQ]"
+  in: &1mask2int
+  - class: mask
+  - *int
+  - *int
+  out: *1int
+- go: MaskedMax
+  asm: "V?PMAXU[BWDQ]"
+  in: &1mask2uint
+  - class: mask
+  - *uint
+  - *uint
+  out: *1uint
+
+- go: Min
+  asm: "V?PMINS[BWDQ]"
+  in: *2int
+  out: *1int
+- go: Min
+  asm: "V?PMINU[BWDQ]"
+  in: *2uint
+  out: *1uint
+- go: MaskedMin
+  asm: "V?PMINS[BWDQ]"
+  in: *1mask2int
+  out: *1int
+- go: MaskedMin
+  asm: "V?PMINU[BWDQ]"
+  in: *1mask2uint
+  out: *1uint
+
+- go: Max
+  asm: "V?MAXP[SD]"
+  in: &2float
+  - &float
+    go: $t
+    base: float
+  - *float
+  out: &1float
+  - *float
+- go: MaskedMax
+  asm: "V?MAXP[SD]"
+  in: &1mask2float
+  - class: mask
+  - *float
+  - *float
+  out: *1float
+- go: Min
+  asm: "V?MINP[SD]"
+  in: *2float
+  out: *1float
+- go: MaskedMin
+  asm: "V?MINP[SD]"
+  in: *1mask2float
+  out: *1float
 # "Normal" multiplication is only available for floats.
 # This only covers the single and double precision.
 - go: Mul
diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml
new file mode 100644
index 00000000..d5131958
--- /dev/null
+++ b/internal/simdgen/ops/MinMax/categories.yaml
@@ -0,0 +1,15 @@
+!sum
+- go: Max
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedMax
+  commutative: "true"
+  masked: "true"
+  extension: "AVX.*"
+- go: Min
+  commutative: "true"
+  extension: "AVX.*"
+- go: MaskedMin
+  commutative: "true"
+  masked: "true"
+  extension: "AVX.*"
diff --git a/internal/simdgen/ops/MinMax/go.yaml b/internal/simdgen/ops/MinMax/go.yaml
new file mode 100644
index 00000000..f307e6b6
--- /dev/null
+++ b/internal/simdgen/ops/MinMax/go.yaml
@@ -0,0 +1,75 @@
+!sum
+- go: Max
+  asm: "V?PMAXS[BWDQ]"
+  in: &2int
+  - &int
+    go: $t
+    base: int
+  - *int
+  out: &1int
+  - *int
+- go: Max
+  asm: "V?PMAXU[BWDQ]"
+  in: &2uint
+  - &uint
+    go: $t
+    base: uint
+  - *uint
+  out: &1uint
+  - *uint
+- go: MaskedMax
+  asm: "V?PMAXS[BWDQ]"
+  in: &1mask2int
+  - class: mask
+  - *int
+  - *int
+  out: *1int
+- go: MaskedMax
+  asm: "V?PMAXU[BWDQ]"
+  in: &1mask2uint
+  - class: mask
+  - *uint
+  - *uint
+  out: *1uint
+
+- go: Min
+  asm: "V?PMINS[BWDQ]"
+  in: *2int
+  out: *1int
+- go: Min
+  asm: "V?PMINU[BWDQ]"
+  in: *2uint
+  out: *1uint
+- go: MaskedMin
+  asm: "V?PMINS[BWDQ]"
+  in: *1mask2int
+  out: *1int
+- go: MaskedMin
+  asm: "V?PMINU[BWDQ]"
+  in: *1mask2uint
+  out: *1uint
+
+- go: Max
+  asm: "V?MAXP[SD]"
+  in: &2float
+  - &float
+    go: $t
+    base: float
+  - *float
+  out: &1float
+  - *float
+- go: MaskedMax
+  asm: "V?MAXP[SD]"
+  in: &1mask2float
+  - class: mask
+  - *float
+  - *float
+  out: *1float
+- go: Min
+  asm: "V?MINP[SD]"
+  in: *2float
+  out: *1float
+- go: MaskedMin
+  asm: "V?MINP[SD]"
+  in: *1mask2float
+  out: *1float
\ No newline at end of file

From 3c204e9c8b48dd0e918f2f70a37e32efd19d5160 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 30 May 2025 17:42:35 +0000
Subject: [PATCH 065/200] internal/simdgen: fix bugs when overwriting class to
 mask

The base type of mask should be int instead of uint.

Change-Id: I48a4ba1bfc06a2ac7eabd4c5aee12223b910c5f2
Reviewed-on: https://go-review.googlesource.com/c/arch/+/677615
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_utility.go       | 4 ++--
 internal/simdgen/go.yaml              | 4 ++--
 internal/simdgen/ops/Compares/go.yaml | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 53362e61..28a451d3 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -405,8 +405,8 @@ func overwrite(ops []Operation) error {
 			if oClass != "mask" {
 				return fmt.Errorf("simdgen: [Class] overwrite only supports overwritting to mask: %s", op[idx])
 			}
-			if oBase != "uint" {
-				return fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to uint: %s", op[idx])
+			if oBase != "int" {
+				return fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to int: %s", op[idx])
 			}
 			if op[idx].Class != "vreg" {
 				return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx])
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index a78f3614..52ca7703 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -258,7 +258,7 @@
   out:
   - &anyvregToMask
     go: $t # We still need the output to be the same shape as inputs.
-    overwriteBase: uint
+    overwriteBase: int
     overwriteClass: mask
 - go: Greater
   asm: "V?PCMPGT[BWDQ]"
@@ -317,7 +317,7 @@
     const: 0
   out:
   - go: $t # We still need the output to be the same shape as inputs.
-    overwriteBase: uint
+    overwriteBase: int
     overwriteClass: mask
 - go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
   asm: "VCMPP[SD]"
diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml
index c3a52394..2fc1f225 100644
--- a/internal/simdgen/ops/Compares/go.yaml
+++ b/internal/simdgen/ops/Compares/go.yaml
@@ -10,7 +10,7 @@
   out:
   - &anyvregToMask
     go: $t # We still need the output to be the same shape as inputs.
-    overwriteBase: uint
+    overwriteBase: int
     overwriteClass: mask
 - go: Greater
   asm: "V?PCMPGT[BWDQ]"
@@ -69,7 +69,7 @@
     const: 0
   out:
   - go: $t # We still need the output to be the same shape as inputs.
-    overwriteBase: uint
+    overwriteBase: int
     overwriteClass: mask
 - go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
   asm: "VCMPP[SD]"

From 206ef99dc4c47f525e15660d8d461a56c2a0f559 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 30 May 2025 20:24:01 +0000
Subject: [PATCH 066/200] internal/simdgen: add more int instructions

This CL is partially generated by Gemini Code Assist, and I promise I eyeballed it :D!!!

Change-Id: I8ad33c9ea4146bbbd5c606b01adcda60bd78eeca
Reviewed-on: https://go-review.googlesource.com/c/arch/+/677715
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml              | 27 ++++++++
 internal/simdgen/go.yaml                      | 60 ++++++++++++++++++
 .../simdgen/ops/IntOnlyArith/categories.yaml  | 28 +++++++++
 internal/simdgen/ops/IntOnlyArith/go.yaml     | 61 +++++++++++++++++++
 4 files changed, 176 insertions(+)
 create mode 100644 internal/simdgen/ops/IntOnlyArith/categories.yaml
 create mode 100644 internal/simdgen/ops/IntOnlyArith/go.yaml

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index ca278805..dfc65453 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -174,6 +174,33 @@
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+- go: Average
+  commutative: "true"
+  extension: "AVX.*" # VPAVGB/W are available across various AVX versions
+- go: MaskedAverage
+  commutative: "true"
+  masked: "true"
+  extension: "AVX512.*" # Masked operations are typically AVX512
+
+- go: Absolute
+  commutative: "false"
+  # Unary operation, not commutative
+  extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
+- go: MaskedAbsolute
+  commutative: "false"
+  masked: "true"
+  extension: "AVX512.*"
+
+- go: Sign
+  # Applies sign of second operand to first: sign(val, sign_src)
+  commutative: "false"
+  extension: "AVX.*"
+  # Sign does not have masked version
+
+- go: MaskedPopCount
+  commutative: "false"
+  masked: "true"
+  extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
 - go: Max
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 52ca7703..ed7c7b1a 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -371,6 +371,66 @@
   asm: "VSCALEFP[SD]"
   in: *1mask2fp
   out: *1fp
+# Average (unsigned byte, unsigned word)
+# Instructions: VPAVGB, VPAVGW
+- go: Average
+  asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word)
+  in:
+  - &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW
+    go: $t
+    base: uint
+  - *uint_t
+  out:
+  - *uint_t
+- go: MaskedAverage
+  asm: "VPAVG[BW]"
+  in:
+  - class: mask
+  - *uint_t
+  - *uint_t
+  out:
+  - *uint_t
+
+# Absolute Value (signed byte, word, dword, qword)
+# Instructions: VPABSB, VPABSW, VPABSD, VPABSQ
+- go: Absolute
+  asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ
+  in:
+  - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN
+    go: $t
+    base: int
+  out:
+  - *int_t # Output is magnitude, fits in the same signed type
+- go: MaskedAbsolute
+  asm: "VPABS[BWDQ]"
+  in:
+  - class: mask
+  - *int_t
+  out:
+  - *int_t
+
+# Sign Operation (signed byte, word, dword)
+# Applies sign of second operand to the first.
+# Instructions: VPSIGNB, VPSIGNW, VPSIGND
+- go: Sign
+  asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND
+  in:
+  - *int_t # value to apply sign to
+  - *int_t # value from which to take the sign
+  out:
+  - *int_t
+
+# Population Count (count set bits in each element)
+# Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
+#               VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
+- go: MaskedPopCount
+  asm: "VPOPCNT[BWDQ]"
+  in:
+  - class: mask
+  - &any
+    go: $t
+  out:
+  - *any
 - go: Max
   asm: "V?PMAXS[BWDQ]"
   in: &2int
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
new file mode 100644
index 00000000..c74b57c4
--- /dev/null
+++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml
@@ -0,0 +1,28 @@
+!sum
+- go: Average
+  commutative: "true"
+  extension: "AVX.*" # VPAVGB/W are available across various AVX versions
+- go: MaskedAverage
+  commutative: "true"
+  masked: "true"
+  extension: "AVX512.*" # Masked operations are typically AVX512
+
+- go: Absolute
+  commutative: "false"
+  # Unary operation, not commutative
+  extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
+- go: MaskedAbsolute
+  commutative: "false"
+  masked: "true"
+  extension: "AVX512.*"
+
+- go: Sign
+  # Applies sign of second operand to first: sign(val, sign_src)
+  commutative: "false"
+  extension: "AVX.*"
+  # Sign does not have masked version
+
+- go: MaskedPopCount
+  commutative: "false"
+  masked: "true"
+  extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
\ No newline at end of file
diff --git a/internal/simdgen/ops/IntOnlyArith/go.yaml b/internal/simdgen/ops/IntOnlyArith/go.yaml
new file mode 100644
index 00000000..e8aca3c6
--- /dev/null
+++ b/internal/simdgen/ops/IntOnlyArith/go.yaml
@@ -0,0 +1,61 @@
+!sum
+# Average (unsigned byte, unsigned word)
+# Instructions: VPAVGB, VPAVGW
+- go: Average
+  asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word)
+  in:
+  - &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW
+    go: $t
+    base: uint
+  - *uint_t
+  out:
+  - *uint_t
+- go: MaskedAverage
+  asm: "VPAVG[BW]"
+  in:
+  - class: mask
+  - *uint_t
+  - *uint_t
+  out:
+  - *uint_t
+
+# Absolute Value (signed byte, word, dword, qword)
+# Instructions: VPABSB, VPABSW, VPABSD, VPABSQ
+- go: Absolute
+  asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ
+  in:
+  - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN
+    go: $t
+    base: int
+  out:
+  - *int_t # Output is magnitude, fits in the same signed type
+- go: MaskedAbsolute
+  asm: "VPABS[BWDQ]"
+  in:
+  - class: mask
+  - *int_t
+  out:
+  - *int_t
+
+# Sign Operation (signed byte, word, dword)
+# Applies sign of second operand to the first.
+# Instructions: VPSIGNB, VPSIGNW, VPSIGND
+- go: Sign
+  asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND
+  in:
+  - *int_t # value to apply sign to
+  - *int_t # value from which to take the sign
+  out:
+  - *int_t
+
+# Population Count (count set bits in each element)
+# Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
+#               VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
+- go: MaskedPopCount
+  asm: "VPOPCNT[BWDQ]"
+  in:
+  - class: mask
+  - &any
+    go: $t
+  out:
+  - *any
\ No newline at end of file

From b6e9ef6db54609e2b9363ec7c5c855953c577b35 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 30 May 2025 11:32:21 -0400
Subject: [PATCH 067/200] arch/internal: add more to the end-to-end test

this adds some tests that were a pain to get right when
importing simd to a Go repository.

Change-Id: If94255105a1a601a4c92f6b7d0ce0369d18c26ee
Reviewed-on: https://go-review.googlesource.com/c/arch/+/677535
Auto-Submit: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/etetest.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/internal/simdgen/etetest.sh b/internal/simdgen/etetest.sh
index f83b6b37..a88776bf 100755
--- a/internal/simdgen/etetest.sh
+++ b/internal/simdgen/etetest.sh
@@ -18,4 +18,16 @@ go run . -xedPath xeddata  -o godefs -goroot ./go-test  go.yaml types.yaml categ
 (cd go-test/src/cmd/compile/internal/ssa/_gen ; go run *.go )
 (cd go-test/src ; GOEXPERIMENT=simd  ./make.bash )
 (cd go-test/bin; b=`pwd` ; cd ../src/simd/testdata; GOARCH=amd64 $b/go run .)
+(cd go-test/bin; b=`pwd` ; cd ../src ;
+GOEXPERIMENT=simd $b/go test go/doc
+GOEXPERIMENT=simd $b/go test go/build
+GOEXPERIMENT=simd $b/go test cmd/api -v -check
+$b/go test go/doc
+$b/go test go/build
+$b/go test cmd/api -v -check
+
+$b/go test cmd/compile/internal/ssagen -simd=0
+GOEXPERIMENT=simd $b/go test cmd/compile/internal/ssagen -simd=0
+)
+
 # next, add some tests of SIMD itself

From 441e8c15dcad485da09ed2ad0c68d0e57f1583b6 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 3 Jun 2025 16:43:00 +0000
Subject: [PATCH 068/200] internal/simdgen: add pairwise add/sub

Change-Id: Id0b678ec956e0c4ebdaae7f8b0a7ad01365f92df
Reviewed-on: https://go-review.googlesource.com/c/arch/+/678376
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml            | 16 +++++++++++
 internal/simdgen/go.yaml                    | 32 ++++++++++++++++++---
 internal/simdgen/ops/AddSub/categories.yaml | 18 +++++++++++-
 internal/simdgen/ops/AddSub/go.yaml         | 32 ++++++++++++++++++---
 4 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index dfc65453..aae0cc9e 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -27,6 +27,22 @@
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+- go: PairwiseAdd
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target"
+- go: PairwiseSub
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target"
+- go: SaturatedPairwiseAdd
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation"
+- go: SaturatedPairwiseSub
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation"
 - go: And
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index ed7c7b1a..35f0bf75 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -55,10 +55,10 @@
 # Sub
 - go: Sub
   asm: "VPSUB[BWDQ]|VADDP[SD]"
-  in:
+  in: &2any
   - *any
   - *any
-  out:
+  out: &1any
   - *any
 - go: MaskedSub
   asm: "VPSUB[BWDQ]|VADDP[SD]"
@@ -71,10 +71,10 @@
 # Saturated Sub
 - go: SaturatedSub
   asm: "VPSUBS[BWDQ]"
-  in:
+  in: &2int
   - *int
   - *int
-  out:
+  out: &1int
   - *int
 - go: SaturatedSub
   asm: "VPSUBS[BWDQ]"
@@ -99,6 +99,30 @@
   - *uint
   out:
   - *uint
+- go: PairwiseAdd
+  asm: "VPHADD[DW]"
+  in: *2any
+  out: *1any
+- go: PairwiseSub
+  asm: "VPHSUB[DW]"
+  in: *2any
+  out: *1any
+- go: PairwiseAdd
+  asm: "VHADDP[SD]" # floats
+  in: *2any
+  out: *1any
+- go: PairwiseSub
+  asm: "VHSUBP[SD]"  # floats
+  in: *2any
+  out: *1any
+- go: SaturatedPairwiseAdd
+  asm: "VPHADDS[DW]"
+  in: *2int
+  out: *1int
+- go: SaturatedPairwiseSub
+  asm: "VPHSUBS[DW]"
+  in: *2int
+  out: *1int
 # In the XED data, *all* floating point bitwise logic operation has their
 # operand type marked as uint. We are not trying to understand why Intel
 # decided that they want FP bit-wise logic operations, but this irregularity
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index 8da031f7..e44412c2 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -26,4 +26,20 @@
 - go: MaskedSaturatedSub
   masked: "true"
   commutative: "true"
-  extension: "AVX.*"
\ No newline at end of file
+  extension: "AVX.*"
+- go: PairwiseAdd
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target"
+- go: PairwiseSub
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target"
+- go: SaturatedPairwiseAdd
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation"
+- go: SaturatedPairwiseSub
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation"
\ No newline at end of file
diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml
index 9e8dc57d..75222a1b 100644
--- a/internal/simdgen/ops/AddSub/go.yaml
+++ b/internal/simdgen/ops/AddSub/go.yaml
@@ -55,10 +55,10 @@
 # Sub
 - go: Sub
   asm: "VPSUB[BWDQ]|VADDP[SD]"
-  in:
+  in: &2any
   - *any
   - *any
-  out:
+  out: &1any
   - *any
 - go: MaskedSub
   asm: "VPSUB[BWDQ]|VADDP[SD]"
@@ -71,10 +71,10 @@
 # Saturated Sub
 - go: SaturatedSub
   asm: "VPSUBS[BWDQ]"
-  in:
+  in: &2int
   - *int
   - *int
-  out:
+  out: &1int
   - *int
 - go: SaturatedSub
   asm: "VPSUBS[BWDQ]"
@@ -99,3 +99,27 @@
   - *uint
   out:
   - *uint
+- go: PairwiseAdd
+  asm: "VPHADD[DW]"
+  in: *2any
+  out: *1any
+- go: PairwiseSub
+  asm: "VPHSUB[DW]"
+  in: *2any
+  out: *1any
+- go: PairwiseAdd
+  asm: "VHADDP[SD]" # floats
+  in: *2any
+  out: *1any
+- go: PairwiseSub
+  asm: "VHSUBP[SD]"  # floats
+  in: *2any
+  out: *1any
+- go: SaturatedPairwiseAdd
+  asm: "VPHADDS[DW]"
+  in: *2int
+  out: *1int
+- go: SaturatedPairwiseSub
+  asm: "VPHSUBS[DW]"
+  in: *2int
+  out: *1int

From 61fd4bb3e16cea130005b525c80c82f85be7cb44 Mon Sep 17 00:00:00 2001
From: Carlos Amedee <carlos@golang.org>
Date: Thu, 5 Jun 2025 14:00:28 -0400
Subject: [PATCH 069/200] s390x/s390xasm: fix failing vet check for self
 assigment

This change removes a self assignment which causes a vet check to
fail.

Change-Id: I88bde9297f8f63b6552feb7b4dce204a6f9fa132
Reviewed-on: https://go-review.googlesource.com/c/arch/+/679237
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
---
 s390x/s390xasm/plan9.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/s390x/s390xasm/plan9.go b/s390x/s390xasm/plan9.go
index 482433b4..fa5e3362 100644
--- a/s390x/s390xasm/plan9.go
+++ b/s390x/s390xasm/plan9.go
@@ -358,7 +358,7 @@ func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) strin
 		args = args[0:3]
 	case VFS:
 		op = "WFSDB"
-		args[0], args[1], args[2] = args[2], args[1], args[0]
+		args[0], args[2] = args[2], args[0]
 		args = args[0:3]
 	case MSGFR, MHI, MSFI, MSGFI:
 		switch inst.Op {

From b2f4e2807decde481fc811cb5da9655c84dbcdcc Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Thu, 5 Jun 2025 17:41:44 +0000
Subject: [PATCH 070/200] internal/simdgen: adjust type defs for masks and fix
 errors

Change-Id: I88b970c754450080c5780b7223808072f72dd61f
Reviewed-on: https://go-review.googlesource.com/c/arch/+/679275
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml            |  8 ++++----
 internal/simdgen/gen_simdTypes.go           |  3 +++
 internal/simdgen/gen_simdssa.go             | 14 +++++++-------
 internal/simdgen/ops/AddSub/categories.yaml |  8 ++++----
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index aae0cc9e..9d1fd5d7 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -14,18 +14,18 @@
   commutative: "true"
   extension: "AVX.*"
 - go: Sub
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: SaturatedSub
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: MaskedSub
   masked: "true"
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: MaskedSaturatedSub
   masked: "true"
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: PairwiseAdd
   commutative: "false"
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 2b1a5b2f..d19c8d2c 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -47,6 +47,8 @@ type {{$tsrc.Name}} struct {
 {{$tsrc.Fields}}
 }
 
+{{- if ne $tsrc.Type "mask"}}
+
 // Len returns the number of elements in a {{$tsrc.Name}}
 func (x {{$tsrc.Name}}) Len() int { return {{$tsrc.Lanes}} }
 
@@ -60,6 +62,7 @@ func Load{{$tsrc.Name}}(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}}) {{$tsrc.Name}}
 //go:noescape
 func (x {{$tsrc.Name}}) Store(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}})
 
+{{- end}}
 {{- end}}
 {{- end}}
 `
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index 92bfed79..1f61d071 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -148,13 +148,13 @@ func writeSIMDSSA(directory string, ops []Operation) error {
 		All         string
 		ZeroingMask string
 	}{
-		strings.Join(Imms, ", "),
-		strings.Join(Regs[0], ", "),
-		strings.Join(Regs[1], ", "),
-		strings.Join(Regs[2], ", "),
-		strings.Join(Regs[3], ", "),
-		strings.Join(All, ", "),
-		strings.Join(ZeroingMask, ", "),
+		strings.Join(Imms, ",\n\t\t"),
+		strings.Join(Regs[0], ",\n\t\t"),
+		strings.Join(Regs[1], ",\n\t\t"),
+		strings.Join(Regs[2], ",\n\t\t"),
+		strings.Join(Regs[3], ",\n\t\t"),
+		strings.Join(All, ",\n\t\t"),
+		strings.Join(ZeroingMask, ",\n\t\t"),
 	}
 
 	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/amd64/simdssa.go", simdssaTmpl)
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index e44412c2..1d08a94b 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -14,18 +14,18 @@
   commutative: "true"
   extension: "AVX.*"
 - go: Sub
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: SaturatedSub
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: MaskedSub
   masked: "true"
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: MaskedSaturatedSub
   masked: "true"
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: PairwiseAdd
   commutative: "false"

From db8b269b3637e72e96359d969705eeb508f227b1 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 6 Jun 2025 19:59:43 +0000
Subject: [PATCH 071/200] internal/simdgen: parse more register types

This CL added more heuristics to parse more register types.
This change is necessary to make VPDP* appear in the XED yaml.

Change-Id: Ic502278edb798efe3e09deb8ea1165af3d774869
Reviewed-on: https://go-review.googlesource.com/c/arch/+/679735
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Auto-Submit: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/xed.go | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index 004a815f..44360435 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -190,6 +190,10 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
 	// complicated.
 	action, ok := actionEncoding[op.Action]
 	if !ok {
+		if strings.HasPrefix(op.Name, "EMX_BROADCAST") {
+			// BROADCAST looks like to contain an obsolete operand.
+			return nil, nil
+		}
 		return nil, fmt.Errorf("unknown action %q", op.Action)
 	}
 	common := operandCommon{action: action}
@@ -249,7 +253,9 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 		if err != nil {
 			return unify.Tuple{}, unify.Tuple{}, err
 		}
-		ops = append(ops, op)
+		if op != nil {
+			ops = append(ops, op)
+		}
 	}
 
 	// XED doesn't encode the size of mask operands. If there are mask operands,
@@ -272,6 +278,7 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 		var masks []int
 		var rSizes, wSizes, sizes []vecShape
 		allMasks := true
+		hasWMask := false
 		for i, op := range ops {
 			action := op.common().action
 			if _, ok := op.(operandMask); ok {
@@ -281,6 +288,9 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 				if action.r == r || action.w == w {
 					masks = append(masks, i)
 				}
+				if action.w {
+					hasWMask = true
+				}
 			} else {
 				allMasks = false
 				if reg, ok := op.(operandVReg); ok {
@@ -320,11 +330,17 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 				}
 				return nil
 			}
-			return fmt.Errorf("cannot infer mask size: no register operands")
+			return fmt.Errorf("cannot infer mask size: no register operands: %+v", operands)
 		}
 		shape, ok := singular(sizes)
 		if !ok {
-			return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes)
+			if !hasWMask && len(wSizes) == 1 && len(masks) == 1 {
+				// This pattern looks like predicate mask, so its shape should align with the
+				// output. TODO: verify this is a safe assumption.
+				shape = wSizes[0]
+			} else {
+				return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes)
+			}
 		}
 		for _, i := range masks {
 			m := ops[i].(operandMask)
@@ -407,6 +423,10 @@ func decodeReg(op *xeddata.Operand) (w int, ok bool) {
 		return 256, true
 	case strings.HasPrefix(rhs, "ZMM_"):
 		return 512, true
+	case strings.HasPrefix(rhs, "GPR64_"), strings.HasPrefix(rhs, "VGPR64_"):
+		return 64, true
+	case strings.HasPrefix(rhs, "GPR32_"), strings.HasPrefix(rhs, "VGPR32_"):
+		return 32, true
 	}
 	return 0, false
 }
@@ -475,6 +495,19 @@ func decodeType(op *xeddata.Operand) (base scalarBaseType, bits int, ok bool) {
 		// These just use the lower INT8 in each 16 bit field.
 		// As far as I can tell, "2I8" is a typo.
 		return scalarBaseInt, 8, true
+	case "2u16", "2U16":
+		// some VPDP* has it
+		// TODO: does "z" means it has zeroing?
+		return scalarBaseUint, 16, true
+	case "2i16", "2I16":
+		// some VPDP* has it
+		return scalarBaseInt, 16, true
+	case "4u8", "4U8":
+		// some VPDP* has it
+		return scalarBaseUint, 8, true
+	case "4i8", "4I8":
+		// some VPDP* has it
+		return scalarBaseInt, 8, true
 	}
 
 	// The rest follow a simple pattern.

From 57d64c8c469c8a812603990690bac37d6493bbaf Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 10 Jun 2025 11:53:17 -0400
Subject: [PATCH 072/200] internal/simdgen: add more register masks to
 simdAMD64Ops

These were added in the call in the compiler, so these
must change to match, else the end-to-end test fails.

Change-Id: I215c188d5935d4589e0c1ee14e2c51def80a2e36
Reviewed-on: https://go-review.googlesource.com/c/arch/+/680438
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdMachineOps.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index ec3eaba9..206c1665 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -12,7 +12,7 @@ import (
 const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 package main
 
-func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp1m1fp1, fp2m1fp1, fp2m1m1 regInfo) []opData {
+func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp1m1fp1, fp2m1fp1, fp2m1m1, fp3fp1, fp3m1fp1 regInfo) []opData {
 	return []opData{
 {{- range .OpsData }}
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}"},

From cd4b42487a5111fd5a984a2201e96d3c658db99e Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 9 Jun 2025 15:52:06 -0400
Subject: [PATCH 073/200] internal/simdgen: remove map-iteration dependence
 from output

This makes checking for (lack of) effects from changes
much easier.

Change-Id: I0b8c49381798d924541abb95bbfcbe8281d37950
Reviewed-on: https://go-review.googlesource.com/c/arch/+/680178
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdTypes.go | 83 +++++++++++++++++++++----------
 internal/simdgen/gen_utility.go   |  1 +
 internal/simdgen/godefs.go        | 44 ++++++++++++++++
 3 files changed, 102 insertions(+), 26 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index d19c8d2c..0405c584 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -6,6 +6,8 @@ package main
 
 import (
 	"fmt"
+	"slices"
+	"sort"
 	"strings"
 )
 
@@ -20,6 +22,14 @@ type simdType struct {
 	Size                    int    // The size of the type
 }
 
+func compareSimdTypes(x, y simdType) int {
+	c := strings.Compare(x.Name, y.Name)
+	if c != 0 {
+		return c
+	}
+	return strings.Compare(x.Type, y.Type)
+}
+
 type simdTypeMap map[int][]simdType
 
 type simdTypePair struct {
@@ -27,44 +37,51 @@ type simdTypePair struct {
 	Tdst simdType
 }
 
-const simdTypesTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+func compareSimdTypePairs(x, y simdTypePair) int {
+	c := compareSimdTypes(x.Tsrc, y.Tsrc)
+	if c != 0 {
+		return c
+	}
+	return compareSimdTypes(x.Tdst, y.Tdst)
+}
+
+const simdTypesTemplates = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 
 //go:build goexperiment.simd
 
 package simd
+{{end}}
 
-{{- range $size, $ts := .TypeMap }}
-
-// v{{$size}} is a tag type that tells the compiler that this is really {{$size}}-bit SIMD
-type v{{$size}} struct {
-	_{{$size}} struct{}
+{{define "sizeTmpl"}}
+// v{{.}} is a tag type that tells the compiler that this is really {{.}}-bit SIMD
+type v{{.}} struct {
+	_{{.}} struct{}
 }
+{{end}}
 
-{{- range $i, $tsrc := $ts }}
-
-// {{$tsrc.Name}} is a {{$size}}-bit SIMD vector of {{$tsrc.Lanes}} {{$tsrc.Base}}
-type {{$tsrc.Name}} struct {
-{{$tsrc.Fields}}
+{{define "typeTmpl"}}
+// {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}}
+type {{.Name}} struct {
+{{.Fields}}
 }
 
-{{- if ne $tsrc.Type "mask"}}
+{{- if ne .Type "mask"}}
 
-// Len returns the number of elements in a {{$tsrc.Name}}
-func (x {{$tsrc.Name}}) Len() int { return {{$tsrc.Lanes}} }
+// Len returns the number of elements in a {{.Name}}
+func (x {{.Name}}) Len() int { return {{.Lanes}} }
 
-// Load{{$tsrc.Name}} loads a {{$tsrc.Name}} from an array
+// Load{{.Name}} loads a {{.Name}} from an array
 //
 //go:noescape
-func Load{{$tsrc.Name}}(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}}) {{$tsrc.Name}}
+func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}}
 
-// Store stores a {{$tsrc.Name}} to an array
+// Store stores a {{.Name}} to an array
 //
 //go:noescape
-func (x {{$tsrc.Name}}) Store(y *[{{$tsrc.Lanes}}]{{$tsrc.Base}})
+func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
 
 {{- end}}
-{{- end}}
-{{- end}}
+{{end}}
 `
 
 const simdStubsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
@@ -168,6 +185,7 @@ func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair {
 			}
 		}
 	}
+	slices.SortFunc(v, compareSimdTypePairs)
 	return v
 }
 
@@ -180,25 +198,38 @@ func masksFromTypeMap(typeMap simdTypeMap) []simdType {
 			}
 		}
 	}
+	slices.SortFunc(m, compareSimdTypes)
 	return m
 }
 
 // writeSIMDTypes generates the simd vector type and writes it to types_amd64.go
 // within the specified directory.
 func writeSIMDTypes(directory string, typeMap simdTypeMap) error {
-	file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/types_amd64.go", simdTypesTmpl)
+	file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/types_amd64.go", simdTypesTemplates)
 	if err != nil {
 		return err
 	}
 	defer file.Close()
 
-	type templateData struct {
-		TypeMap simdTypeMap
+	if err := t.ExecuteTemplate(file, "fileHeader", nil); err != nil {
+		return fmt.Errorf("failed to execute fileHeader template: %w", err)
 	}
 
-	err = t.Execute(file, templateData{typeMap})
-	if err != nil {
-		return fmt.Errorf("failed to execute template: %w", err)
+	sizes := make([]int, 0, len(typeMap))
+	for size := range typeMap {
+		sizes = append(sizes, size)
+	}
+	sort.Ints(sizes)
+
+	for _, size := range sizes {
+		if err := t.ExecuteTemplate(file, "sizeTmpl", size); err != nil {
+			return fmt.Errorf("failed to execute size template for size %d: %w", size, err)
+		}
+		for _, typeDef := range typeMap[size] {
+			if err := t.ExecuteTemplate(file, "typeTmpl", typeDef); err != nil {
+				return fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err)
+			}
+		}
 	}
 
 	return nil
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 28a451d3..7fcade60 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -355,6 +355,7 @@ func dedupGodef(ops []Operation) ([]Operation, error) {
 		}
 		deduped = append(deduped, dup[0])
 	}
+	slices.SortFunc(deduped, compareOperations)
 	return deduped, nil
 }
 
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 2a611c9e..9309e0ce 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -6,6 +6,8 @@ package main
 
 import (
 	"log"
+	"slices"
+	"strings"
 
 	"golang.org/x/arch/internal/unify"
 )
@@ -30,6 +32,47 @@ type Operation struct {
 	Masked *string
 }
 
+func compareStringPointers(x, y *string) int {
+	if x != nil && y != nil {
+		return strings.Compare(*x, *y)
+	}
+	if x == nil && y == nil {
+		return 0
+	}
+	if x == nil {
+		return -1
+	}
+	return 1
+}
+
+func compareOperations(x, y Operation) int {
+	if c := strings.Compare(x.Go, y.Go); c != 0 {
+		return c
+	}
+	if c := strings.Compare(x.GoArch, y.GoArch); c != 0 {
+		return c
+	}
+	if len(x.In) < len(y.In) {
+		return -1
+	}
+	if len(x.In) > len(y.In) {
+		return 1
+	}
+	if len(x.Out) < len(y.Out) {
+		return -1
+	}
+	if len(x.Out) > len(y.Out) {
+		return 1
+	}
+	for i := range x.In {
+		ox, oy := &x.In[i], y.In[i]
+		if c := compareStringPointers(ox.Go, oy.Go); c != 0 {
+			return c
+		}
+	}
+	return 0
+}
+
 type Operand struct {
 	Class string // One of "mask", "immediate", "vreg" and "mem"
 
@@ -68,6 +111,7 @@ func writeGoDefs(path string, cl unify.Closure) error {
 		op.sortOperand()
 		ops = append(ops, op)
 	}
+	slices.SortFunc(ops, compareOperations)
 	// The parsed XED data might contain duplicates, like
 	// 512 bits VPADDP.
 	deduped := dedup(ops)

From 43295e6b7043ce004e33ac281d107156bf8b2d58 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 6 Jun 2025 17:29:43 -0400
Subject: [PATCH 074/200] internal/simdgen: simplify gen_simdrules.go

This gets the control flow out of the templates,
simplifies the templates, and allows better sorting
of the generated rules.

Change-Id: Ic31f2554bf3d2aaf1d3efd27a8a5060c8904767f
Reviewed-on: https://go-review.googlesource.com/c/arch/+/680275
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdrules.go | 238 +++++++++++++++++-------------
 1 file changed, 132 insertions(+), 106 deletions(-)

diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index 172282eb..2f2178d6 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -6,99 +6,140 @@ package main
 
 import (
 	"fmt"
-	"sort"
+	"io"
+	"os"
+	"path/filepath"
+	"slices"
+	"strings"
+	"text/template"
 )
 
-const simdrulesTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+var (
+	ruleTemplates = template.Must(template.New("simdRules").Parse(`
+{{define "pureVregInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} {{.ReverseArgs}})
+{{end}}
+{{define "oneKmaskInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM <types.TypeMask> mask))
+{{end}}
+{{define "oneConstImmInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} [{{.Const}}] {{.ReverseArgs}})
+{{end}}
+{{define "oneKmaskConstImmInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} [{{.Const}}] {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM <types.TypeMask> mask))
+{{end}}
+{{define "pureVregInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} {{.ReverseArgs}}))
+{{end}}
+{{define "oneKmaskInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM <types.TypeMask> mask)))
+{{end}}
+{{define "oneConstImmInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} [{{.Const}}] {{.ReverseArgs}}))
+{{end}}
+{{define "oneKmaskConstImmInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} [{{.Const}}] {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM <types.TypeMask> mask)))
+{{end}}
+`))
+)
 
-// The AVX instruction encodings orders vector register from right to left, for example:
-// VSUBPS X Y Z means Z=Y-X
-// The rules here swapped the order of such X and Y because the ssa to prog lowering in simdssa.go assumes a
-// left to right order.
-// TODO: we should offload the logic to simdssa.go, instead of here.
-//
-// Masks are always at the end, immediates always at the beginning.
-
-{{- range .Ops }}
-({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}}) => ({{.Op.Asm}} {{.ReverseArgs}})
-{{- end }}
-{{- range .OpsImm }}
-({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}}) => ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}})
-{{- end }}
-{{- range .OpsMask}}
-({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}} mask) => ({{.Op.Asm}} {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}}ToM <types.TypeMask> mask))
-{{- end }}
-{{- range .OpsImmMask}}
-({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}} mask) => ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}}ToM <types.TypeMask> mask))
-{{- end }}
-{{- range .OpsMaskOut}}
-({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}}) => (VPMOVMToVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}} ({{.Op.Asm}} {{.ReverseArgs}}))
-{{- end }}
-{{- range .OpsImmInMaskOut}}
-({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}}) => (VPMOVMToVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}} ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}}))
-{{- end }}
-{{- range .OpsMaskInMaskOut}}
-({{.Op.Go}}{{(index .Op.In 0).Go}} {{.Args}} mask) => (VPMOVMToVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}} ({{.Op.Asm}} {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 0).ElemBits}}x{{(index .Op.In 0).Lanes}}ToM <types.TypeMask> mask)))
-{{- end }}
-{{- range .OpsImmMaskInMaskOut}}
-({{.Op.Go}}{{(index .Op.In 1).Go}} {{.Args}} mask) => (VPMOVMToVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}} ({{.Op.Asm}} [{{(index .Op.In 0).Const}}] {{.ReverseArgs}} (VPMOVVec{{(index .Op.In 1).ElemBits}}x{{(index .Op.In 1).Lanes}}ToM <types.TypeMask> mask)))
-{{- end }}
-`
+type tplRuleData struct {
+	tplName     string
+	GoOp        string
+	GoType      string
+	Args        string
+	Asm         string
+	ReverseArgs string
+	ElemBits    int
+	Lanes       int
+	Const       string
+}
+
+func compareTplRuleData(x, y tplRuleData) int {
+	// TODO should MaskedXYZ compare just after XYZ?
+	if c := strings.Compare(x.GoOp, y.GoOp); c != 0 {
+		return c
+	}
+	if c := strings.Compare(x.GoType, y.GoType); c != 0 {
+		return c
+	}
+	if c := strings.Compare(x.Const, y.Const); c != 0 {
+		return c
+	}
+	return 0
+}
 
 // writeSIMDRules generates the lowering and rewrite rules for ssa and writes it to simdAMD64.rules
 // within the specified directory.
 func writeSIMDRules(directory string, ops []Operation) error {
-	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules", simdrulesTmpl)
+
+	outPath := filepath.Join(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules")
+	if err := os.MkdirAll(filepath.Dir(outPath), 0755); err != nil {
+		return fmt.Errorf("failed to create directory for %s: %w", outPath, err)
+	}
+	file, err := os.Create(outPath)
 	if err != nil {
-		return err
+		return fmt.Errorf("failed to create %s: %w", outPath, err)
 	}
 	defer file.Close()
-	type OpAndArgList struct {
-		Op          Operation
-		Args        string // "x y", does not include masks
-		ReverseArgs string // "y x", does not include masks
+
+	header := `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+// The AVX instruction encodings orders vector register from right to left, for example:
+// VSUBPS X Y Z means Z=Y-X
+// The rules here swapped the order of such X and Y because the ssa to prog lowering in simdssa.go assumes a
+// left to right order.
+// TODO: we should offload the logic to simdssa.go, instead of here.
+//
+
+`
+	if _, err := io.WriteString(file, header); err != nil {
+		return fmt.Errorf("failed to write header to %s: %w", outPath, err)
 	}
-	Ops := make([]OpAndArgList, 0)
-	OpsImm := make([]OpAndArgList, 0)
-	OpsMask := make([]OpAndArgList, 0)
-	OpsImmMask := make([]OpAndArgList, 0)
-	OpsMaskOut := make([]OpAndArgList, 0)
-	OpsImmInMaskOut := make([]OpAndArgList, 0)
-	OpsMaskInMaskOut := make([]OpAndArgList, 0)
-	OpsImmMaskInMaskOut := make([]OpAndArgList, 0)
-
-	for _, op := range ops {
-		opInShape, opOutShape, maskType, _, op, gOp, err := op.shape()
+
+	var allData []tplRuleData
+
+	for _, opr := range ops {
+		opInShape, opOutShape, maskType, _, o, gOp, err := opr.shape()
 		if err != nil {
 			return err
 		}
 		vregInCnt := len(gOp.In)
 		if maskType == OneMask {
-			op.Asm += "Masked"
+			o.Asm += "Masked"
 			vregInCnt--
 		}
-		op.Asm = fmt.Sprintf("%s%d", op.Asm, *op.Out[0].Bits)
-		opData := OpAndArgList{Op: op}
+		o.Asm = fmt.Sprintf("%s%d", o.Asm, *o.Out[0].Bits)
+
+		data := tplRuleData{
+			GoOp: o.Go,
+			Asm:  o.Asm,
+		}
+
 		if vregInCnt == 1 {
-			opData.Args = "x"
-			opData.ReverseArgs = "x"
+			data.Args = "x"
+			data.ReverseArgs = "x"
 		} else if vregInCnt == 2 {
-			opData.Args = "x y"
-			opData.ReverseArgs = "y x"
+			data.Args = "x y"
+			data.ReverseArgs = "y x"
 		} else {
 			return fmt.Errorf("simdgen does not support more than 2 vreg in inputs")
 		}
+
+		var tplName string
 		// If class overwrite is happening, that's not really a mask but a vreg.
-		if opOutShape == OneVregOut || op.Out[0].OverwriteClass != nil {
+		if opOutShape == OneVregOut || o.Out[0].OverwriteClass != nil {
 			switch opInShape {
 			case PureVregIn:
-				Ops = append(Ops, opData)
+				tplName = "pureVregInVregOut"
+				data.GoType = *o.In[0].Go
 			case OneKmaskIn:
-				OpsMask = append(OpsMask, opData)
+				tplName = "oneKmaskInVregOut"
+				data.GoType = *o.In[0].Go
+				data.ElemBits = *o.In[0].ElemBits
+				data.Lanes = *o.In[0].Lanes
 			case OneConstImmIn:
-				OpsImm = append(OpsImm, opData)
+				tplName = "oneConstImmInVregOut"
+				data.GoType = *o.In[1].Go
+				data.Const = *o.In[0].Const
 			case OneKmaskConstImmIn:
-				OpsImmMask = append(OpsImmMask, opData)
+				tplName = "oneKmaskConstImmInVregOut"
+				data.GoType = *o.In[1].Go
+				data.Const = *o.In[0].Const
+				data.ElemBits = *o.In[1].ElemBits
+				data.Lanes = *o.In[1].Lanes
 			case PureKmaskIn:
 				return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")
 			}
@@ -106,57 +147,42 @@ func writeSIMDRules(directory string, ops []Operation) error {
 			// OneKmaskOut case
 			switch opInShape {
 			case PureVregIn:
-				OpsMaskOut = append(OpsMaskOut, opData)
+				tplName = "pureVregInKmaskOut"
+				data.GoType = *o.In[0].Go
+				data.ElemBits = *o.In[0].ElemBits
+				data.Lanes = *o.In[0].Lanes
 			case OneKmaskIn:
-				OpsMaskInMaskOut = append(OpsMaskInMaskOut, opData)
+				tplName = "oneKmaskInKmaskOut"
+				data.GoType = *o.In[0].Go
+				data.ElemBits = *o.In[0].ElemBits
+				data.Lanes = *o.In[0].Lanes
 			case OneConstImmIn:
-				OpsImmInMaskOut = append(OpsImmInMaskOut, opData)
+				tplName = "oneConstImmInKmaskOut"
+				data.GoType = *o.In[1].Go
+				data.Const = *o.In[0].Const
+				data.ElemBits = *o.In[1].ElemBits
+				data.Lanes = *o.In[1].Lanes
 			case OneKmaskConstImmIn:
-				OpsImmMaskInMaskOut = append(OpsImmMaskInMaskOut, opData)
+				tplName = "oneKmaskConstImmInKmaskOut"
+				data.GoType = *o.In[1].Go
+				data.Const = *o.In[0].Const
+				data.ElemBits = *o.In[1].ElemBits
+				data.Lanes = *o.In[1].Lanes
 			case PureKmaskIn:
 				return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")
 			}
 		}
-	}
-	sortKey := func(op *OpAndArgList) string {
-		return *op.Op.In[0].Go + op.Op.Go
-	}
-	sortBySortKey := func(ops []OpAndArgList) {
-		sort.Slice(ops, func(i, j int) bool {
-			return sortKey(&ops[i]) < sortKey(&ops[j])
-		})
-	}
-	sortBySortKey(Ops)
-	sortBySortKey(OpsImm)
-	sortBySortKey(OpsMask)
-	sortBySortKey(OpsImmMask)
-	sortBySortKey(OpsMaskOut)
-	sortBySortKey(OpsImmInMaskOut)
-	sortBySortKey(OpsMaskInMaskOut)
-	sortBySortKey(OpsImmMaskInMaskOut)
-
-	type templateData struct {
-		Ops                 []OpAndArgList
-		OpsImm              []OpAndArgList
-		OpsMask             []OpAndArgList
-		OpsImmMask          []OpAndArgList
-		OpsMaskOut          []OpAndArgList
-		OpsImmInMaskOut     []OpAndArgList
-		OpsMaskInMaskOut    []OpAndArgList
-		OpsImmMaskInMaskOut []OpAndArgList
+
+		data.tplName = tplName
+		allData = append(allData, data)
 	}
 
-	err = t.Execute(file, templateData{
-		Ops,
-		OpsImm,
-		OpsMask,
-		OpsImmMask,
-		OpsMaskOut,
-		OpsImmInMaskOut,
-		OpsMaskInMaskOut,
-		OpsImmMaskInMaskOut})
-	if err != nil {
-		return fmt.Errorf("failed to execute template: %w", err)
+	slices.SortFunc(allData, compareTplRuleData)
+
+	for _, data := range allData {
+		if err := ruleTemplates.ExecuteTemplate(file, data.tplName, data); err != nil {
+			return fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.GoOp+data.GoType, err)
+		}
 	}
 
 	return nil

From 53c00bdecf19ba032a082f4db4aad970536b373b Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Thu, 12 Jun 2025 02:18:30 +0000
Subject: [PATCH 075/200] internal/simdgen: refactor and support more shapes

This CL refactors gen_simdrules.go and gen_simdssa.go:
Instead of reversing the operand orders at lowering rules and maintain
a state machine at prog writing, now machine ops and generic ops will
have the same operand order and prog writing will adjust the register
order.

This CL supports operations with immediate args:
During intrinsic, the compiler will check if the passed-in arg is a
const or not; if not it will insert a runtime panic.

This CL supports operations with result in arg0(same register).

This CL supports more longer operations up to 4 register and 1
immediates.

This CL also cleans up stubs documentation formats.

This CL generates CL 681215.

Change-Id: I3d14fbfafa5adc2ac189e27cd82b88623aa0150c
Reviewed-on: https://go-review.googlesource.com/c/arch/+/681195
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/categories.yaml              |  48 ++--
 internal/simdgen/gen_simdGenericOps.go        |  27 +-
 internal/simdgen/gen_simdIntrinsics.go        |  94 ++++++-
 internal/simdgen/gen_simdMachineOps.go        |  71 ++---
 internal/simdgen/gen_simdTypes.go             |  67 ++++-
 internal/simdgen/gen_simdrules.go             | 134 +++++-----
 internal/simdgen/gen_simdssa.go               | 190 +++++++-------
 internal/simdgen/gen_utility.go               | 245 +++++++++++++-----
 internal/simdgen/godefs.go                    |  13 +-
 internal/simdgen/ops/AddSub/categories.yaml   |   8 +-
 internal/simdgen/ops/Compares/categories.yaml |  28 +-
 internal/simdgen/ops/Mul/categories.yaml      |  12 +-
 12 files changed, 590 insertions(+), 347 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 9d1fd5d7..4a4affbc 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -30,19 +30,19 @@
 - go: PairwiseAdd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target"
+  documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target"
 - go: PairwiseSub
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target"
+  documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target"
 - go: SaturatedPairwiseAdd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation"
+  documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation"
 - go: SaturatedPairwiseSub
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation"
+  documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation"
 - go: And
   commutative: "true"
   extension: "AVX.*"
@@ -84,80 +84,80 @@
   constImm: 0
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 0 if it has;"
+  documentation: "// Predicate immediate is 0 if it has;"
 - go: Less
   constImm: 1
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 1 if it has;"
+  documentation: "// Predicate immediate is 1 if it has;"
 - go: LessEqual
   constImm: 2
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 2 if it has;"
+  documentation: "// Predicate immediate is 2 if it has;"
 - go: IsNan # For float only.
   constImm: 3
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+  documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
 - go: NotEqual
   constImm: 4
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 4 if it has;"
+  documentation: "// Predicate immediate is 4 if it has;"
 - go: GreaterEqual
   constImm: 5
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 5 if it has;"
+  documentation: "// Predicate immediate is 5 if it has;"
 - go: Greater
   constImm: 6
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 6 if it has;"
+  documentation: "// Predicate immediate is 6 if it has;"
 
 - go: MaskedEqual
   constImm: 0
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 0 if it has;"
+  documentation: "// Predicate immediate is 0 if it has;"
 - go: MaskedLess
   constImm: 1
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 1 if it has;"
+  documentation: "// Predicate immediate is 1 if it has;"
 - go: MaskedLessEqual
   constImm: 2
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 2 if it has;"
+  documentation: "// Predicate immediate is 2 if it has;"
 - go: MaskedIsNan # For float only.
   constImm: 3
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+  documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
 - go: MaskedNotEqual
   constImm: 4
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 4 if it has;"
+  documentation: "// Predicate immediate is 4 if it has;"
 - go: MaskedGreaterEqual
   constImm: 5
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 5 if it has;"
+  documentation: "// Predicate immediate is 5 if it has;"
 - go: MaskedGreater
   constImm: 6
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 6 if it has;"
+  documentation: "// Predicate immediate is 6 if it has;"
 - go: Div
   commutative: "false"
   extension: "AVX.*"
@@ -237,15 +237,15 @@
 - go: MulEvenWiden
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+  documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
 - go: MulHigh
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+  documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
 - go: MulLow
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
+  documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
 - go: MaskedMul
   masked: "true"
   commutative: "true"
@@ -254,14 +254,14 @@
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+  documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
 - go: MaskedMulHigh
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+  documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
 - go: MaskedMulLow
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
+  documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index 2b0fa008..bdda8b80 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -14,8 +14,11 @@ package main
 
 func simdGenericOps() []opData {
 	return []opData{
-{{- range . }}
+{{- range .Ops }}
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}},
+{{- end }}
+{{- range .OpsImm }}
+		{name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}, aux: "Int8"},
 {{- end }}
 	}
 }
@@ -35,17 +38,29 @@ func writeSIMDGenericOps(directory string, ops []Operation) error {
 		OpInLen int
 		Comm    string
 	}
-	opsData := make([]genericOpsData, 0)
+	type opData struct {
+		Ops    []genericOpsData
+		OpsImm []genericOpsData
+	}
+	var opsData opData
 	for _, op := range ops {
-		_, _, _, _, _, gOp, err := op.shape()
+		_, _, _, immType, _, _, gOp, err := op.shape()
 		if err != nil {
 			return err
 		}
 		genericNames := gOp.Go + *gOp.In[0].Go
-		opsData = append(opsData, genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative})
+		gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative}
+		if immType == VarImm || immType == ConstVarImm {
+			opsData.OpsImm = append(opsData.OpsImm, gOpData)
+		} else {
+			opsData.Ops = append(opsData.Ops, gOpData)
+		}
 	}
-	sort.Slice(opsData, func(i, j int) bool {
-		return opsData[i].sortKey < opsData[j].sortKey
+	sort.Slice(opsData.Ops, func(i, j int) bool {
+		return opsData.Ops[i].sortKey < opsData.Ops[j].sortKey
+	})
+	sort.Slice(opsData.OpsImm, func(i, j int) bool {
+		return opsData.OpsImm[i].sortKey < opsData.OpsImm[j].sortKey
 	})
 
 	err = t.Execute(file, opsData)
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 377026b9..5b6b74cf 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -30,6 +30,21 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 {{- range .OpsLen3}}
 	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
 {{- end}}
+{{- range .OpsLen4}}
+	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{- end}}
+{{- range .OpsLen1Imm8}}
+	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{- end}}
+{{- range .OpsLen2Imm8}}
+	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{- end}}
+{{- range .OpsLen3Imm8}}
+	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{- end}}
+{{- range .OpsLen4Imm8}}
+	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{- end}}
 
 {{- range .VectorConversions }}
 	addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
@@ -67,6 +82,76 @@ func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa
 	}
 }
 
+func opLen4(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+		return s.newValue4(op, t, args[0], args[1], args[2], args[3])
+	}
+}
+
+func plainPanicSimdImm(s *state) {
+	cmp := s.newValue0(ssa.OpConstBool, types.Types[types.TBOOL])
+	cmp.AuxInt = 1
+	// TODO: make this a standalone panic instead of reusing the overflow panic.
+	// Or maybe after we implement the switch table this will be obsolete anyway.
+	s.check(cmp, ir.Syms.Panicoverflow)
+}
+
+func opLen1Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+		if args[1].Op == ssa.OpConst8 {
+			return s.newValue1I(op, t, args[1].AuxInt<<int64(offset), args[0])
+		}
+		plainPanicSimdImm(s)
+		// Even though this default call is unreachable semantically,
+		// it has to return something, otherwise the compiler will try to generate
+		// default codes which might lead to a FwdRef being put at the entry block
+		// triggering a compiler panic.
+		return s.newValue1I(op, t, 0, args[0])
+	}
+}
+
+func opLen2Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+		if args[1].Op == ssa.OpConst8 {
+			return s.newValue2I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2])
+		}
+		plainPanicSimdImm(s)
+		// Even though this default call is unreachable semantically,
+		// it has to return something, otherwise the compiler will try to generate
+		// default codes which might lead to a FwdRef being put at the entry block
+		// triggering a compiler panic.
+		return s.newValue2I(op, t, 0, args[0], args[2])
+	}
+}
+
+func opLen3Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+		if args[1].Op == ssa.OpConst8 {
+			return s.newValue3I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2], args[3])
+		}
+		plainPanicSimdImm(s)
+		// Even though this default call is unreachable semantically,
+		// it has to return something, otherwise the compiler will try to generate
+		// default codes which might lead to a FwdRef being put at the entry block
+		// triggering a compiler panic.
+		return s.newValue3I(op, t, 0, args[0], args[2], args[3])
+	}
+}
+
+func opLen4Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+		if args[1].Op == ssa.OpConst8 {
+			return s.newValue4I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2], args[3], args[4])
+		}
+		plainPanicSimdImm(s)
+		// Even though this default call is unreachable semantically,
+		// it has to return something, otherwise the compiler will try to generate
+		// default codes which might lead to a FwdRef being put at the entry block
+		// triggering a compiler panic.
+		return s.newValue4I(op, t, 0, args[0], args[2], args[3], args[4])
+	}
+}
+
 func simdLoad() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
 	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
 		return s.newValue2(ssa.OpLoad, n.Type(), args[0], s.mem())
@@ -89,7 +174,7 @@ func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap)
 		return err
 	}
 	defer file.Close()
-	opsLen1, opsLen2, opsLen3, err := genericOpsByLen(ops)
+	opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, err := opsByLen(ops)
 	if err != nil {
 		return err
 	}
@@ -98,11 +183,16 @@ func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap)
 		OpsLen1           []Operation
 		OpsLen2           []Operation
 		OpsLen3           []Operation
+		OpsLen4           []Operation
+		OpsLen1Imm8       []Operation
+		OpsLen2Imm8       []Operation
+		OpsLen3Imm8       []Operation
+		OpsLen4Imm8       []Operation
 		TypeMap           simdTypeMap
 		VectorConversions []simdTypePair
 		Masks             []simdType
 	}
-	err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, typeMap, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)})
+	err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, typeMap, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)})
 	if err != nil {
 		return fmt.Errorf("failed to execute template: %w", err)
 	}
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 206c1665..3cacc990 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -12,13 +12,13 @@ import (
 const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 package main
 
-func simdAMD64Ops(fp1fp1, fp2fp1, fp2m1, fp1m1fp1, fp2m1fp1, fp2m1m1, fp3fp1, fp3m1fp1 regInfo) []opData {
+func simdAMD64Ops(fp11, fp21, fp2m1, fp1m1fp1, fp2m1fp1, fp2m1m1, fp31, fp3m1fp1 regInfo) []opData {
 	return []opData{
 {{- range .OpsData }}
-		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}"},
+		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
 {{- end }}
 {{- range .OpsDataImm }}
-		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "Int8", commutative: {{.Comm}}, typ: "{{.Type}}"},
+		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "Int8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
 {{- end }}
 	}
 }
@@ -33,24 +33,25 @@ func writeSIMDMachineOps(directory string, ops []Operation) error {
 	}
 	defer file.Close()
 	type opData struct {
-		sortKey string
-		OpName  string
-		Asm     string
-		OpInLen int
-		RegInfo string
-		Comm    string
-		Type    string
+		sortKey      string
+		OpName       string
+		Asm          string
+		OpInLen      int
+		RegInfo      string
+		Comm         string
+		Type         string
+		ResultInArg0 string
 	}
 	type machineOpsData struct {
 		OpsData    []opData
 		OpsDataImm []opData
 	}
 	seen := map[string]struct{}{}
-	regInfoSet := map[string]bool{"fp1fp1": true, "fp2fp1": true, "fp2m1": true, "fp2m1fp1": true, "fp2m1m1": true, "fp1m1fp1": true}
+	regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2m1": true, "fp2m1fp1": true, "fp2m1m1": true, "fp1m1fp1": true, "fp31": true, "fp3m1fp1": true}
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
-		shapeIn, shapeOut, maskType, _, _, gOp, err := op.shape()
+		shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape()
 		if err != nil {
 			return err
 		}
@@ -65,43 +66,15 @@ func writeSIMDMachineOps(directory string, ops []Operation) error {
 			continue
 		}
 		seen[asm] = struct{}{}
-		var regInfo string
-		// Process input reg shapes.
-		var vRegInCnt, kMaskInCnt, vRegOutCnt, kMaskOutCnt int
-		for _, in := range gOp.In {
-			if in.Class == "vreg" {
-				vRegInCnt++
-			} else if in.Class == "mask" {
-				kMaskInCnt++
-			}
-		}
-		for _, out := range gOp.Out {
-			// If class overwrite is happening, that's not really a mask but a vreg.
-			if out.Class == "vreg" || out.OverwriteClass != nil {
-				vRegOutCnt++
-			} else if out.Class == "mask" {
-				kMaskOutCnt++
-			}
-		}
-		var vRegInS, kMaskInS, vRegOutS, kMaskOutS string
-		if vRegInCnt > 0 {
-			vRegInS = fmt.Sprintf("fp%d", vRegInCnt)
-		}
-		if kMaskInCnt > 0 {
-			kMaskInS = fmt.Sprintf("m%d", kMaskInCnt)
-		}
-		if vRegOutCnt > 0 {
-			vRegOutS = fmt.Sprintf("fp%d", vRegOutCnt)
-		}
-		if kMaskOutCnt > 0 {
-			kMaskOutS = fmt.Sprintf("m%d", kMaskOutCnt)
+		regInfo, err := op.regShape()
+		if err != nil {
+			return err
 		}
-		regInfo = fmt.Sprintf("%s%s%s%s", vRegInS, kMaskInS, vRegOutS, kMaskOutS)
 		if _, ok := regInfoSet[regInfo]; !ok {
 			return fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s", regInfo)
 		}
 		var outType string
-		if shapeOut == OneVregOut || gOp.Out[0].OverwriteClass != nil {
+		if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil {
 			// If class overwrite is happening, that's not really a mask but a vreg.
 			outType = fmt.Sprintf("Vec%d", *gOp.Out[0].Bits)
 		} else if shapeOut == OneKmaskOut {
@@ -109,10 +82,14 @@ func writeSIMDMachineOps(directory string, ops []Operation) error {
 		} else {
 			return fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut)
 		}
-		if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn {
-			opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType})
+		resultInArg0 := "false"
+		if shapeOut == OneVregOutAtIn {
+			resultInArg0 = "true"
+		}
+		if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
+			opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
 		} else {
-			opsData = append(opsData, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType})
+			opsData = append(opsData, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
 		}
 	}
 	sort.Slice(opsData, func(i, j int) bool {
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 0405c584..50480b30 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -91,22 +91,68 @@ const simdStubsTmpl = `// Code generated by x/arch/internal/simdgen using 'go ru
 package simd
 
 {{- range .OpsLen1}}
-
-// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}}
+{{if .Documentation}}
+{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}
 
 {{- end}}
 {{- range .OpsLen2}}
-
-// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}}
+{{if .Documentation}}
+{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}
 
 {{- end}}
 {{- range .OpsLen3}}
-
-// Asm: {{.Asm}}, Arch: {{.Extension}}{{if .Documentation}}, Doc: {{.Documentation}}{{end}}
+{{if .Documentation}}
+{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}
 
+{{- end}}
+{{- range .OpsLen4}}
+{{if .Documentation}}
+{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}
+
+{{- end}}
+{{- range .OpsLen1Imm8}}
+{{if .Documentation}}
+{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}}
+
+{{- end}}
+{{- range .OpsLen2Imm8}}
+{{if .Documentation}}
+{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}}
+
+{{- end}}
+{{- range .OpsLen3Imm8}}
+{{if .Documentation}}
+{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}}
+
+{{- end}}
+{{- range .OpsLen3Imm8}}
+{{if .Documentation}}
+{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}
+
 {{- end}}
 {{- range .VectorConversions }}
 
@@ -243,7 +289,7 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro
 		return err
 	}
 	defer file.Close()
-	opsLen1, opsLen2, opsLen3, err := genericOpsByLen(ops)
+	opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, err := opsByLen(ops)
 	if err != nil {
 		return err
 	}
@@ -252,11 +298,16 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro
 		OpsLen1           []Operation
 		OpsLen2           []Operation
 		OpsLen3           []Operation
+		OpsLen4           []Operation
+		OpsLen1Imm8       []Operation
+		OpsLen2Imm8       []Operation
+		OpsLen3Imm8       []Operation
+		OpsLen4Imm8       []Operation
 		VectorConversions []simdTypePair
 		Masks             []simdType
 	}
 
-	err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)})
+	err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)})
 	if err != nil {
 		return fmt.Errorf("failed to execute template : %w", err)
 	}
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index 2f2178d6..5f51c6f8 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -16,35 +16,26 @@ import (
 
 var (
 	ruleTemplates = template.Must(template.New("simdRules").Parse(`
-{{define "pureVregInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} {{.ReverseArgs}})
+{{define "pureVreg"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} {{.ArgsOut}})
 {{end}}
-{{define "oneKmaskInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM <types.TypeMask> mask))
+{{define "maskIn"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} <types.TypeMask> mask))
 {{end}}
-{{define "oneConstImmInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} [{{.Const}}] {{.ReverseArgs}})
+{{define "maskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}}))
 {{end}}
-{{define "oneKmaskConstImmInVregOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} [{{.Const}}] {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM <types.TypeMask> mask))
-{{end}}
-{{define "pureVregInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} {{.ReverseArgs}}))
-{{end}}
-{{define "oneKmaskInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM <types.TypeMask> mask)))
-{{end}}
-{{define "oneConstImmInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} [{{.Const}}] {{.ReverseArgs}}))
-{{end}}
-{{define "oneKmaskConstImmInKmaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => (VPMOVMToVec{{.ElemBits}}x{{.Lanes}} ({{.Asm}} [{{.Const}}] {{.ReverseArgs}} (VPMOVVec{{.ElemBits}}x{{.Lanes}}ToM <types.TypeMask> mask)))
+{{define "maskInMaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} <types.TypeMask> mask)))
 {{end}}
 `))
 )
 
 type tplRuleData struct {
-	tplName     string
-	GoOp        string
-	GoType      string
-	Args        string
-	Asm         string
-	ReverseArgs string
-	ElemBits    int
-	Lanes       int
-	Const       string
+	tplName        string
+	GoOp           string
+	GoType         string
+	Args           string
+	Asm            string
+	ArgsOut        string
+	MaskInConvert  string
+	MaskOutConvert string
 }
 
 func compareTplRuleData(x, y tplRuleData) int {
@@ -55,7 +46,7 @@ func compareTplRuleData(x, y tplRuleData) int {
 	if c := strings.Compare(x.GoType, y.GoType); c != 0 {
 		return c
 	}
-	if c := strings.Compare(x.Const, y.Const); c != 0 {
+	if c := strings.Compare(x.Args, y.Args); c != 0 {
 		return c
 	}
 	return 0
@@ -77,13 +68,6 @@ func writeSIMDRules(directory string, ops []Operation) error {
 
 	header := `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 
-// The AVX instruction encodings orders vector register from right to left, for example:
-// VSUBPS X Y Z means Z=Y-X
-// The rules here swapped the order of such X and Y because the ssa to prog lowering in simdssa.go assumes a
-// left to right order.
-// TODO: we should offload the logic to simdssa.go, instead of here.
-//
-
 `
 	if _, err := io.WriteString(file, header); err != nil {
 		return fmt.Errorf("failed to write header to %s: %w", outPath, err)
@@ -92,82 +76,84 @@ func writeSIMDRules(directory string, ops []Operation) error {
 	var allData []tplRuleData
 
 	for _, opr := range ops {
-		opInShape, opOutShape, maskType, _, o, gOp, err := opr.shape()
+		opInShape, opOutShape, maskType, immType, _, _, gOp, err := opr.shape()
 		if err != nil {
 			return err
 		}
 		vregInCnt := len(gOp.In)
+		asm := gOp.Asm
 		if maskType == OneMask {
-			o.Asm += "Masked"
+			asm += "Masked"
 			vregInCnt--
 		}
-		o.Asm = fmt.Sprintf("%s%d", o.Asm, *o.Out[0].Bits)
+		asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits)
 
 		data := tplRuleData{
-			GoOp: o.Go,
-			Asm:  o.Asm,
+			GoOp: gOp.Go,
+			Asm:  asm,
 		}
 
 		if vregInCnt == 1 {
 			data.Args = "x"
-			data.ReverseArgs = "x"
+			data.ArgsOut = data.Args
 		} else if vregInCnt == 2 {
 			data.Args = "x y"
-			data.ReverseArgs = "y x"
+			data.ArgsOut = data.Args
+		} else if vregInCnt == 3 {
+			data.Args = "x y z"
+			data.ArgsOut = data.Args
 		} else {
-			return fmt.Errorf("simdgen does not support more than 2 vreg in inputs")
+			return fmt.Errorf("simdgen does not support more than 3 vreg in inputs")
+		}
+		if immType == ConstImm {
+			data.ArgsOut = fmt.Sprintf("[%s] %s", *opr.In[0].Const, data.ArgsOut)
+		} else if immType == VarImm {
+			data.Args = fmt.Sprintf("[a] %s", data.Args)
+			data.ArgsOut = fmt.Sprintf("[a] %s", data.ArgsOut)
+		} else if immType == ConstVarImm {
+			data.Args = fmt.Sprintf("[a] %s", data.Args)
+			data.ArgsOut = fmt.Sprintf("[a+%s] %s", *opr.In[0].Const, data.ArgsOut)
 		}
 
 		var tplName string
 		// If class overwrite is happening, that's not really a mask but a vreg.
-		if opOutShape == OneVregOut || o.Out[0].OverwriteClass != nil {
+		if opOutShape == OneVregOut || opOutShape == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil {
 			switch opInShape {
+			case OneImmIn:
+				tplName = "pureVreg"
+				data.GoType = *gOp.In[0].Go
 			case PureVregIn:
-				tplName = "pureVregInVregOut"
-				data.GoType = *o.In[0].Go
+				tplName = "pureVreg"
+				data.GoType = *gOp.In[0].Go
+				data.Args = "..."
+				data.ArgsOut = "..."
+			case OneKmaskImmIn:
+				fallthrough
 			case OneKmaskIn:
-				tplName = "oneKmaskInVregOut"
-				data.GoType = *o.In[0].Go
-				data.ElemBits = *o.In[0].ElemBits
-				data.Lanes = *o.In[0].Lanes
-			case OneConstImmIn:
-				tplName = "oneConstImmInVregOut"
-				data.GoType = *o.In[1].Go
-				data.Const = *o.In[0].Const
-			case OneKmaskConstImmIn:
-				tplName = "oneKmaskConstImmInVregOut"
-				data.GoType = *o.In[1].Go
-				data.Const = *o.In[0].Const
-				data.ElemBits = *o.In[1].ElemBits
-				data.Lanes = *o.In[1].Lanes
+				tplName = "maskIn"
+				data.GoType = *gOp.In[0].Go
+				rearIdx := len(gOp.In) - 1
+				// Mask is at the end.
+				data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes)
 			case PureKmaskIn:
 				return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")
 			}
 		} else {
 			// OneKmaskOut case
+			data.MaskOutConvert = fmt.Sprintf("VPMOVMToVec%dx%d", *gOp.Out[0].ElemBits, *gOp.In[0].Lanes)
 			switch opInShape {
+			case OneImmIn:
+				fallthrough
 			case PureVregIn:
-				tplName = "pureVregInKmaskOut"
-				data.GoType = *o.In[0].Go
-				data.ElemBits = *o.In[0].ElemBits
-				data.Lanes = *o.In[0].Lanes
+				tplName = "maskOut"
+				data.GoType = *gOp.In[0].Go
+			case OneKmaskImmIn:
+				fallthrough
 			case OneKmaskIn:
-				tplName = "oneKmaskInKmaskOut"
-				data.GoType = *o.In[0].Go
-				data.ElemBits = *o.In[0].ElemBits
-				data.Lanes = *o.In[0].Lanes
-			case OneConstImmIn:
-				tplName = "oneConstImmInKmaskOut"
-				data.GoType = *o.In[1].Go
-				data.Const = *o.In[0].Const
-				data.ElemBits = *o.In[1].ElemBits
-				data.Lanes = *o.In[1].Lanes
-			case OneKmaskConstImmIn:
-				tplName = "oneKmaskConstImmInKmaskOut"
-				data.GoType = *o.In[1].Go
-				data.Const = *o.In[0].Const
-				data.ElemBits = *o.In[1].ElemBits
-				data.Lanes = *o.In[1].Lanes
+				tplName = "maskInMaskOut"
+				data.GoType = *gOp.In[0].Go
+				rearIdx := len(gOp.In) - 1
+				data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes)
 			case PureKmaskIn:
 				return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")
 			}
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index 1f61d071..b7d94251 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -7,9 +7,12 @@ package main
 import (
 	"fmt"
 	"strings"
+	"text/template"
 )
 
-const simdssaTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+var (
+	ssaTemplates = template.Must(template.New("simdSSA").Parse(`
+{{define "header"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 
 package amd64
 
@@ -21,93 +24,67 @@ import (
 )
 
 func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
-	p := s.Prog(v.Op.Asm())
-	// First arg
-	switch v.Op {{"{"}}{{if gt (len .Imms) 0}}
-	// Immediates
-	case {{.Imms}}:
-		imm := v.AuxInt
-		if imm < 0 || imm > 255 {
-			v.Fatalf("Invalid source selection immediate")
-		}
-		p.From.Offset = imm
-		p.From.Type = obj.TYPE_CONST
-{{end}}{{if gt (len .Reg0) 0}}
-	// Registers
-	case {{.Reg0}}:
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = simdReg(v.Args[0])
+	var p *obj.Prog
+	switch v.Op {{"{"}}{{end}}
+{{define "case"}}
+	case {{.Cases}}:
+		p = {{.Helper}}(s, v)
 {{end}}
+{{define "footer"}}
 	default:
-		// At least one arg is required.
+		// Unknown reg shape
 		return false
 	}
-
-	// Second arg
-	switch v.Op {{"{"}}{{if gt (len .Reg1) 0}}
-	// Registers
-	case {{.Reg1}}:
-		if p.From.Type == obj.TYPE_CONST {
-			p.AddRestSourceReg(simdReg(v.Args[0]))
-		} else {
-			p.AddRestSourceReg(simdReg(v.Args[1]))
-		}{{end}}
-	}
-
-	// Third arg
-	switch v.Op {{"{"}}{{if gt (len .Reg2) 0}}
-	// Registers
-	case {{.Reg2}}:
-		if p.From.Type == obj.TYPE_CONST {
-			p.AddRestSourceReg(simdReg(v.Args[1]))
-		} else {
-			p.AddRestSourceReg(simdReg(v.Args[2]))
-		}{{end}}
-	}
-
-	// Fourth arg
-	switch v.Op {{"{"}}{{if gt (len .Reg3) 0}}
-	case {{.Reg3}}:
-		if p.From.Type == obj.TYPE_CONST {
-			p.AddRestSourceReg(simdReg(v.Args[2]))
-		} else {
-			p.AddRestSourceReg(simdReg(v.Args[3]))
-		}{{end}}
-	}
-
-	// Output
-	switch v.Op {{"{"}}{{if gt (len .All) 0}}
-	case {{.All}}:
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = simdReg(v)
 {{end}}
-	default:
-		// One result is required.
-		return false
-	}
-{{if gt (len .ZeroingMask) 0}}
+{{define "zeroing"}}
 	// Masked operation are always compiled with zeroing.
 	switch v.Op {
-	case {{.ZeroingMask}}:
+	case {{.}}:
 		x86.ParseSuffix(p, "Z")
 	}
 {{end}}
+{{define "ending"}}
 	return true
 }
-`
+{{end}}`))
+)
+
+type tplSSAData struct {
+	Cases  string
+	Helper string
+}
 
 // writeSIMDSSA generates the ssa to prog lowering codes and writes it to simdssa.go
 // within the specified directory.
 func writeSIMDSSA(directory string, ops []Operation) error {
-	var Imms []string
-	var All []string
 	var ZeroingMask []string
-	Regs := map[int][]string{}
+	regInfoKeys := []string{
+		"fp11",
+		"fp21",
+		"fp2m1",
+		"fp2m1fp1",
+		"fp2m1m1",
+		"fp1m1fp1",
+		"fp31",
+		"fp3m1fp1",
+		"fp11Imm8",
+		"fp1m1fp1Imm8",
+		"fp21Imm8",
+		"fp2m1Imm8",
+		"fp2m1m1Imm8",
+		"fp31ResultInArg0",
+		"fp3m1fp1ResultInArg0",
+	}
+	regInfoSet := map[string][]string{}
+	for _, key := range regInfoKeys {
+		regInfoSet[key] = []string{}
+	}
 
 	seen := map[string]struct{}{}
+	allUnseen := map[string]struct{}{}
 	for _, op := range ops {
 		asm := op.Asm
-		shapeIn, _, maskType, _, _, gOp, err := op.shape()
+		shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape()
 		if err != nil {
 			return err
 		}
@@ -120,52 +97,67 @@ func writeSIMDSSA(directory string, ops []Operation) error {
 		}
 		seen[asm] = struct{}{}
 		caseStr := fmt.Sprintf("ssa.OpAMD64%s", asm)
-		if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn {
+		if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn {
 			if gOp.Zeroing == nil {
 				ZeroingMask = append(ZeroingMask, caseStr)
 			}
 		}
-		immCount := 0
-		if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn {
-			immCount++
-			Imms = append(Imms, caseStr)
+		regShape, err := op.regShape()
+		if err != nil {
+			return err
+		}
+		if shapeOut == OneVregOutAtIn {
+			regShape += "ResultInArg0"
 		}
-		for i := range len(gOp.In) {
-			if i > 2 {
-				return fmt.Errorf("simdgen does not recognize more than 3 registers: %s", gOp)
-			}
-			Regs[i+immCount] = append(Regs[i+immCount], caseStr)
+		if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
+			regShape += "Imm8"
+		}
+		if _, ok := regInfoSet[regShape]; !ok {
+			allUnseen[regShape] = struct{}{}
 		}
-		All = append(All, caseStr)
+		regInfoSet[regShape] = append(regInfoSet[regShape], caseStr)
 	}
-
-	data := struct {
-		Imms        string
-		Reg0        string
-		Reg1        string
-		Reg2        string
-		Reg3        string
-		All         string
-		ZeroingMask string
-	}{
-		strings.Join(Imms, ",\n\t\t"),
-		strings.Join(Regs[0], ",\n\t\t"),
-		strings.Join(Regs[1], ",\n\t\t"),
-		strings.Join(Regs[2], ",\n\t\t"),
-		strings.Join(Regs[3], ",\n\t\t"),
-		strings.Join(All, ",\n\t\t"),
-		strings.Join(ZeroingMask, ",\n\t\t"),
+	if len(allUnseen) != 0 {
+		return fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen)
 	}
 
-	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/amd64/simdssa.go", simdssaTmpl)
+	file, _, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/amd64/simdssa.go", "")
 	if err != nil {
 		return err
 	}
 	defer file.Close()
 
-	err = t.Execute(file, data)
-	if err != nil {
-		return fmt.Errorf("failed to execute template: %w", err)
+	if err := ssaTemplates.ExecuteTemplate(file, "header", nil); err != nil {
+		return fmt.Errorf("failed to execute header template: %w", err)
+	}
+
+	for _, regShape := range regInfoKeys {
+		// Stable traversal of regInfoSet
+		cases := regInfoSet[regShape]
+		if len(cases) == 0 {
+			continue
+		}
+		data := tplSSAData{
+			Cases:  strings.Join(cases, ",\n\t\t"),
+			Helper: "simdGen" + capitalizeFirst(regShape),
+		}
+		if err := ssaTemplates.ExecuteTemplate(file, "case", data); err != nil {
+			return fmt.Errorf("failed to execute case template for %s: %w", regShape, err)
+		}
+	}
+
+	if err := ssaTemplates.ExecuteTemplate(file, "footer", nil); err != nil {
+		return fmt.Errorf("failed to execute footer template: %w", err)
+	}
+
+	if len(ZeroingMask) != 0 {
+		if err := ssaTemplates.ExecuteTemplate(file, "zeroing", strings.Join(ZeroingMask, ",\n\t\t")); err != nil {
+			return fmt.Errorf("failed to execute footer template: %w", err)
+		}
+	}
+
+	if err := ssaTemplates.ExecuteTemplate(file, "ending", nil); err != nil {
+		return fmt.Errorf("failed to execute footer template: %w", err)
 	}
 
 	return nil
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 7fcade60..7f2af75c 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -40,8 +40,8 @@ const (
 	InvalidIn int = iota
 	PureVregIn
 	OneKmaskIn
-	OneConstImmIn
-	OneKmaskConstImmIn
+	OneImmIn
+	OneKmaskImmIn
 	PureKmaskIn
 )
 
@@ -50,6 +50,7 @@ const (
 	NoOut
 	OneVregOut
 	OneKmaskOut
+	OneVregOutAtIn
 )
 
 const (
@@ -60,33 +61,51 @@ const (
 	AllMasks
 )
 
+const (
+	InvalidImm int = iota
+	NoImm
+	ConstImm
+	VarImm
+	ConstVarImm
+)
+
 // opShape returns the an int denoting the shape of the operation:
 //
-//	shapeIn:
-//		InvalidIn: unknown, with err set to the error message
-//		PureVregIn: pure vreg operation
-//		OneKmaskIn: operation with one k mask input (TODO: verify if it's always opmask predicate)
-//		OneConstImmIn: operation with one const imm input
-//		OneKmaskConstImmIn: operation with one k mask input and one const imm input
-//		PureKmaskIn: it's a K mask instruction (which can use K0)
+//		shapeIn:
+//			InvalidIn: unknown, with err set to the error message
+//			PureVregIn: pure vreg operation
+//			OneKmaskIn: operation with one k mask input (TODO: verify if it's always opmask predicate)
+//			OneImmIn: operation with one imm input
+//			OneKmaskImmIn: operation with one k mask input and one imm input
+//			PureKmaskIn: it's a K mask instruction (which can use K0)
 //
-//	shapeOut:
-//	 	InvalidOut: unknown, with err set to the error message
-//		NoOut: no outputs, this is invalid now.
-//		OneVregOut: one vreg output
-//		OneKmaskOut: one mask output
+//		shapeOut:
+//		 	InvalidOut: unknown, with err set to the error message
+//			NoOut: no outputs, this is invalid now.
+//			OneVregOut: one vreg output
+//			OneKmaskOut: one mask output
+//			OneVregOutAtIn: one vreg output, it's at the same time the first input
 //
-//	maskType:
-//		InvalidMask: unknown, with err set to the error message
-//		NoMask: no mask
-//		OneMask: with mask (K1 to K7)
-//		OneConstMask: with const mask K0
-//		AllMasks: it's a K mask instruction
+//		maskType:
+//			InvalidMask: unknown, with err set to the error message
+//			NoMask: no mask
+//			OneMask: with mask (K1 to K7)
+//			OneConstMask: with const mask K0
+//			AllMasks: it's a K mask instruction
+//
+//	 	immType:
+//			InvalidImm: unrecognize immediate structure
+//			NoImm: no immediate
+//			ConstImm: const only immediate
+//			VarImm: pure imm argument provided by the users
+//			ConstVarImm: a combination of user arg and const
 //
 // opNoImm is op with its inputs excluding the const imm.
 // opNoConstMask is op with its inputs excluding the const mask.
 // opNoConstImmMask is op with its inputs excluding the const imm and mask.
-func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Operation, opNoConstMask Operation, opNoConstImmMask Operation, err error) {
+//
+// This function does not modify op.
+func (op *Operation) shape() (shapeIn, shapeOut, maskType, immTyppe int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation, err error) {
 	if len(op.Out) > 1 {
 		err = fmt.Errorf("simdgen only supports 1 output: %s", op)
 		return
@@ -115,16 +134,16 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper
 	hasVreg := false
 	for i, in := range op.In {
 		if in.AsmPos == outputReg {
-			err = fmt.Errorf("simdgen doesn't support output and input sharing the same position: %s", op)
-			return
+			if shapeOut != OneVregOutAtIn && in.AsmPos == 0 && in.Class == "vreg" {
+				shapeOut = OneVregOutAtIn
+			} else {
+				err = fmt.Errorf("simdgen only support output and input sharing the same position case of \"the first input is vreg and the only output\": %s", op)
+				return
+			}
 		}
 		if in.Class == "immediate" {
 			// A manual check on XED data found that AMD64 SIMD instructions at most
 			// have 1 immediates. So we don't need to check this here.
-			if in.Const == nil {
-				err = fmt.Errorf("simdgen doesn't support non-const immediates: %s", op)
-				return
-			}
 			if *in.Bits != 8 {
 				err = fmt.Errorf("simdgen only supports immediates of 8 bits: %s", op)
 				return
@@ -151,22 +170,36 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper
 			hasVreg = true
 		}
 	}
-	opNoConstImm = *op
+	opNoImm = *op
 	opNoConstMask = *op
-	opNoConstImmMask = *op
+	opNoImmConstMask = *op
 	removeConstMask := func(o *Operation) {
 		o.In = append(o.In[:iConstMask], o.In[iConstMask+1:]...)
 	}
 	if iConstMask != -1 {
 		removeConstMask(&opNoConstMask)
-		removeConstMask(&opNoConstImmMask)
+		removeConstMask(&opNoImmConstMask)
 	}
-	removeConstImm := func(o *Operation) {
+	removeImm := func(o *Operation) {
 		o.In = o.In[1:]
 	}
 	if hasImm {
-		removeConstImm(&opNoConstImm)
-		removeConstImm(&opNoConstImmMask)
+		removeImm(&opNoImm)
+		removeImm(&opNoImmConstMask)
+		if op.In[0].Const != nil {
+			if op.In[0].ImmOffset != nil {
+				immTyppe = ConstVarImm
+			} else {
+				immTyppe = ConstImm
+			}
+		} else if op.In[0].ImmOffset != nil {
+			immTyppe = VarImm
+		} else {
+			err = fmt.Errorf("simdgen requires imm to have at least one of ImmOffset or Const set: %s", op)
+			return
+		}
+	} else {
+		immTyppe = NoImm
 	}
 	if maskCount == 0 {
 		if iConstMask == -1 {
@@ -205,10 +238,10 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper
 			maskType = AllMasks
 		}
 	} else if hasImm && maskCount == 0 {
-		shapeIn = OneConstImmIn
+		shapeIn = OneImmIn
 	} else {
 		if maskCount == 1 {
-			shapeIn = OneKmaskConstImmIn
+			shapeIn = OneKmaskImmIn
 		} else {
 			checkPureMask()
 			return
@@ -217,6 +250,48 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper
 	return
 }
 
+// regShape returns a string representation of the register shape.
+func (op *Operation) regShape() (string, error) {
+	_, _, _, _, _, _, gOp, _ := op.shape()
+	var regInfo string
+	var vRegInCnt, kMaskInCnt, vRegOutCnt, kMaskOutCnt int
+	for _, in := range gOp.In {
+		if in.Class == "vreg" {
+			vRegInCnt++
+		} else if in.Class == "mask" {
+			kMaskInCnt++
+		}
+	}
+	for _, out := range gOp.Out {
+		// If class overwrite is happening, that's not really a mask but a vreg.
+		if out.Class == "vreg" || out.OverwriteClass != nil {
+			vRegOutCnt++
+		} else if out.Class == "mask" {
+			kMaskOutCnt++
+		}
+	}
+	var vRegInS, kMaskInS, vRegOutS, kMaskOutS string
+	if vRegInCnt > 0 {
+		vRegInS = fmt.Sprintf("fp%d", vRegInCnt)
+	}
+	if kMaskInCnt > 0 {
+		kMaskInS = fmt.Sprintf("m%d", kMaskInCnt)
+	}
+	if vRegOutCnt > 0 {
+		vRegOutS = fmt.Sprintf("fp%d", vRegOutCnt)
+	}
+	if kMaskOutCnt > 0 {
+		kMaskOutS = fmt.Sprintf("m%d", kMaskOutCnt)
+	}
+	if kMaskInCnt == 0 && kMaskOutCnt == 0 {
+		// For pure fp we can abbreviate it as fp%d%d.
+		regInfo = fmt.Sprintf("fp%d%d", vRegInCnt, vRegOutCnt)
+	} else {
+		regInfo = fmt.Sprintf("%s%s%s%s", vRegInS, kMaskInS, vRegOutS, kMaskOutS)
+	}
+	return regInfo, nil
+}
+
 // sortOperand sorts op.In by putting immediates first, then vreg, and mask the last.
 // TODO: verify that this is a safe assumption of the prog strcture.
 // from my observation looks like in asm, imms are always the first, masks are always the last, with
@@ -224,31 +299,66 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType int, opNoConstImm Oper
 func (op *Operation) sortOperand() {
 	priority := map[string]int{"immediate": 2, "vreg": 1, "mask": 0}
 	sort.SliceStable(op.In, func(i, j int) bool {
-		return priority[op.In[i].Class]-priority[op.In[j].Class] > 0
+		pi := priority[op.In[i].Class]
+		pj := priority[op.In[j].Class]
+		if pi != pj {
+			return pi > pj
+		}
+		return op.In[i].AsmPos < op.In[j].AsmPos
 	})
 }
 
-// genericOpsByLen returns the lists of generic ops aggregated by input length.
-func genericOpsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3 []Operation, e error) {
+// opsByLen returns the lists of ops stripping the const masks away, aggregated by input length.
+// Ops with only const imms also has their immediates removed.
+func opsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8 []Operation, e error) {
 	opsLen1 = make([]Operation, 0)
 	opsLen2 = make([]Operation, 0)
 	opsLen3 = make([]Operation, 0)
+	opsLen4 = make([]Operation, 0)
+	opsLen1Imm8 = make([]Operation, 0)
+	opsLen2Imm8 = make([]Operation, 0)
+	opsLen3Imm8 = make([]Operation, 0)
+	opsLen4Imm8 = make([]Operation, 0)
 	for _, op := range ops {
-		_, shapeOut, _, _, _, gOp, err := op.shape()
+		_, shapeOut, _, immType, _, opNoConstMask, gOp, err := op.shape()
 		if err != nil {
 			e = err
 			return
 		}
-		// Put the go ssa type in Class field, simd intrinsics need it.
-		if shapeOut == OneVregOut || shapeOut == OneKmaskOut {
+		// Put the go ssa type in GoArch field, simd intrinsics need it.
+		if shapeOut == OneVregOut || shapeOut == OneKmaskOut || shapeOut == OneVregOutAtIn {
+			opNoConstMask.GoArch = fmt.Sprintf("types.TypeVec%d", *opNoConstMask.Out[0].Bits)
 			gOp.GoArch = fmt.Sprintf("types.TypeVec%d", *gOp.Out[0].Bits)
 		}
-		if len(gOp.In) == 1 {
-			opsLen1 = append(opsLen1, gOp)
-		} else if len(gOp.In) == 2 {
-			opsLen2 = append(opsLen2, gOp)
-		} else if len(gOp.In) == 3 {
-			opsLen3 = append(opsLen3, gOp)
+		if immType == VarImm || immType == ConstVarImm {
+			switch len(opNoConstMask.In) {
+			case 1:
+				e = fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op)
+				return
+			case 2:
+				opsLen1Imm8 = append(opsLen1Imm8, opNoConstMask)
+			case 3:
+				opsLen2Imm8 = append(opsLen2Imm8, opNoConstMask)
+			case 4:
+				opsLen3Imm8 = append(opsLen3Imm8, opNoConstMask)
+			case 5:
+				opsLen4Imm8 = append(opsLen4Imm8, opNoConstMask)
+			default:
+				e = fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op)
+			}
+		} else {
+			switch len(gOp.In) {
+			case 1:
+				opsLen1 = append(opsLen1, gOp)
+			case 2:
+				opsLen2 = append(opsLen2, gOp)
+			case 3:
+				opsLen3 = append(opsLen3, gOp)
+			case 4:
+				opsLen4 = append(opsLen4, gOp)
+			default:
+				e = fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op)
+			}
 		}
 	}
 	sortKey := func(op *Operation) string {
@@ -262,6 +372,11 @@ func genericOpsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3 []Operation, e
 	sortBySortKey(opsLen1)
 	sortBySortKey(opsLen2)
 	sortBySortKey(opsLen3)
+	sortBySortKey(opsLen4)
+	sortBySortKey(opsLen1Imm8)
+	sortBySortKey(opsLen2Imm8)
+	sortBySortKey(opsLen3Imm8)
+	sortBySortKey(opsLen4Imm8)
 	return
 }
 
@@ -291,11 +406,11 @@ func splitMask(ops []Operation) ([]Operation, error) {
 		if op.Masked == nil || *op.Masked != "true" {
 			continue
 		}
-		shapeIn, _, _, _, _, _, err := op.shape()
+		shapeIn, _, _, _, _, _, _, err := op.shape()
 		if err != nil {
 			return nil, err
 		}
-		if shapeIn == OneKmaskIn || shapeIn == OneKmaskConstImmIn {
+		if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn {
 			op2 := op
 			op2.In = slices.Clone(op.In)
 			constMask := "K0"
@@ -305,6 +420,9 @@ func splitMask(ops []Operation) ([]Operation, error) {
 				return nil, fmt.Errorf("simdgen only recognizes masked operations with name starting with 'Masked': %s", op)
 			}
 			op2.Go = strings.ReplaceAll(op2.Go, "Masked", "")
+			if op2.Documentation != nil {
+				*op2.Documentation = strings.ReplaceAll(*op2.Documentation, "Masked", "")
+			}
 			splited = append(splited, op2)
 		} else {
 			return nil, fmt.Errorf("simdgen only recognizes masked operations with exactly one mask input: %s", op)
@@ -320,7 +438,7 @@ func splitMask(ops []Operation) ([]Operation, error) {
 func dedupGodef(ops []Operation) ([]Operation, error) {
 	seen := map[string][]Operation{}
 	for _, op := range ops {
-		_, _, _, _, _, gOp, err := op.shape()
+		_, _, _, _, _, _, gOp, err := op.shape()
 		if err != nil {
 			return nil, err
 		}
@@ -366,11 +484,11 @@ func copyConstImm(ops []Operation) error {
 		if op.ConstImm == nil {
 			continue
 		}
-		shapeIn, _, _, _, _, _, err := op.shape()
+		_, _, _, immType, _, _, _, err := op.shape()
 		if err != nil {
 			return err
 		}
-		if shapeIn == OneConstImmIn || shapeIn == OneKmaskConstImmIn {
+		if immType == ConstImm || immType == ConstVarImm {
 			op.In[0].Const = op.ConstImm
 		}
 		// Otherwise, just not port it - e.g. {VPCMP[BWDQ] imm=0} and {VPCMPEQ[BWDQ]} are
@@ -379,6 +497,16 @@ func copyConstImm(ops []Operation) error {
 	return nil
 }
 
+func capitalizeFirst(s string) string {
+	if s == "" {
+		return ""
+	}
+	// Convert the string to a slice of runes to handle multi-byte characters correctly.
+	r := []rune(s)
+	r[0] = unicode.ToUpper(r[0])
+	return string(r)
+}
+
 // overwrite corrects some errors due to:
 //   - The XED data is wrong
 //   - Go's SIMD API requirement, for example AVX2 compares should also produce masks.
@@ -386,15 +514,6 @@ func copyConstImm(ops []Operation) error {
 //     These constraints are also explointed in [writeSIMDRules], [writeSIMDMachineOps]
 //     and [writeSIMDSSA], please be careful when updating these constraints.
 func overwrite(ops []Operation) error {
-	capitalizeFirst := func(s string) string {
-		if s == "" {
-			return ""
-		}
-		// Convert the string to a slice of runes to handle multi-byte characters correctly.
-		r := []rune(s)
-		r[0] = unicode.ToUpper(r[0])
-		return string(r)
-	}
 	hasClassOverwrite := false
 	overwrite := func(op []Operand, idx int) error {
 		if op[idx].OverwriteClass != nil {
@@ -421,6 +540,10 @@ func overwrite(ops []Operation) error {
 			*op[idx].Go = strings.ReplaceAll(*op[idx].Go, capitalizeFirst(*op[idx].Base), capitalizeFirst(oBase))
 			*op[idx].Base = oBase
 		}
+		if op[idx].OverwriteElementBits != nil {
+			*op[idx].ElemBits = *op[idx].OverwriteElementBits
+			*op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Bits / *op[idx].ElemBits)
+		}
 		return nil
 	}
 	for i := range ops {
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 9309e0ce..b6d872be 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -83,14 +83,23 @@ type Operand struct {
 	ElemBits *int    // Element bit width
 	Bits     *int    // Total vector bit width
 
-	Const *string // Optional constant value
-	Lanes *int    // Lanes should equal Bits/ElemBits
+	Const *string // Optional constant value for immediates.
+	// Optional immediate arg offsets. If this field is non-nil,
+	// This operand will be an immediate operand:
+	// The compiler will right-shift the user-passed value by ImmOffset and set it as the AuxInt
+	// field of the operation.
+	ImmOffset *string
+	Lanes     *int // Lanes should equal Bits/ElemBits
 	// If non-nil, it means the [Class] field is overwritten here, right now this is used to
 	// overwrite the results of AVX2 compares to masks.
 	OverwriteClass *string
 	// If non-nil, it means the [Base] field is overwritten here. This field exist solely
 	// because Intel's XED data is inconsistent. e.g. VANDNP[SD] marks its operand int.
 	OverwriteBase *string
+	// If non-nil, it means the [ElementBits] field is overwritten. This field exist solely
+	// because Intel's XED data is inconsistent. e.g. AVX512 VPMADDUBSW marks its operand
+	// elemBits 16, which should be 8.
+	OverwriteElementBits *int
 }
 
 func writeGoDefs(path string, cl unify.Closure) error {
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index 1d08a94b..592790ca 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -30,16 +30,16 @@
 - go: PairwiseAdd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target"
+  documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target"
 - go: PairwiseSub
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target"
+  documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target"
 - go: SaturatedPairwiseAdd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation"
+  documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation"
 - go: SaturatedPairwiseSub
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation"
\ No newline at end of file
+  documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation"
\ No newline at end of file
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index 027c8e8d..06a1caa2 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -10,77 +10,77 @@
   constImm: 0
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 0 if it has;"
+  documentation: "// Predicate immediate is 0 if it has;"
 - go: Less
   constImm: 1
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 1 if it has;"
+  documentation: "// Predicate immediate is 1 if it has;"
 - go: LessEqual
   constImm: 2
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 2 if it has;"
+  documentation: "// Predicate immediate is 2 if it has;"
 - go: IsNan # For float only.
   constImm: 3
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+  documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
 - go: NotEqual
   constImm: 4
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 4 if it has;"
+  documentation: "// Predicate immediate is 4 if it has;"
 - go: GreaterEqual
   constImm: 5
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 5 if it has;"
+  documentation: "// Predicate immediate is 5 if it has;"
 - go: Greater
   constImm: 6
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 6 if it has;"
+  documentation: "// Predicate immediate is 6 if it has;"
 
 - go: MaskedEqual
   constImm: 0
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 0 if it has;"
+  documentation: "// Predicate immediate is 0 if it has;"
 - go: MaskedLess
   constImm: 1
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 1 if it has;"
+  documentation: "// Predicate immediate is 1 if it has;"
 - go: MaskedLessEqual
   constImm: 2
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 2 if it has;"
+  documentation: "// Predicate immediate is 2 if it has;"
 - go: MaskedIsNan # For float only.
   constImm: 3
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+  documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
 - go: MaskedNotEqual
   constImm: 4
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 4 if it has;"
+  documentation: "// Predicate immediate is 4 if it has;"
 - go: MaskedGreaterEqual
   constImm: 5
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 5 if it has;"
+  documentation: "// Predicate immediate is 5 if it has;"
 - go: MaskedGreater
   constImm: 6
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "Predicate immediate is 6 if it has;"
\ No newline at end of file
+  documentation: "// Predicate immediate is 6 if it has;"
\ No newline at end of file
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index 0ef6cf57..42275e24 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -5,15 +5,15 @@
 - go: MulEvenWiden
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+  documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
 - go: MulHigh
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+  documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
 - go: MulLow
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
+  documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
 - go: MaskedMul
   masked: "true"
   commutative: "true"
@@ -22,14 +22,14 @@
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+  documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
 - go: MaskedMulHigh
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+  documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
 - go: MaskedMulLow
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
\ No newline at end of file
+  documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
\ No newline at end of file

From 36000cf2bf76dea89e3d78bce3d5a6874622ce22 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 13 Jun 2025 03:12:28 +0000
Subject: [PATCH 076/200] internal/simdgen: fix register and prog func names

This CL generates CL 681215.

Change-Id: I97032505ee3221340df146686dea87b7320edf45
Reviewed-on: https://go-review.googlesource.com/c/arch/+/681395
Auto-Submit: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdMachineOps.go |  4 ++--
 internal/simdgen/gen_simdssa.go        | 20 ++++++++++----------
 internal/simdgen/gen_utility.go        |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 3cacc990..f09b5568 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -12,7 +12,7 @@ import (
 const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 package main
 
-func simdAMD64Ops(fp11, fp21, fp2m1, fp1m1fp1, fp2m1fp1, fp2m1m1, fp31, fp3m1fp1 regInfo) []opData {
+func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 regInfo) []opData {
 	return []opData{
 {{- range .OpsData }}
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
@@ -47,7 +47,7 @@ func writeSIMDMachineOps(directory string, ops []Operation) error {
 		OpsDataImm []opData
 	}
 	seen := map[string]struct{}{}
-	regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2m1": true, "fp2m1fp1": true, "fp2m1m1": true, "fp1m1fp1": true, "fp31": true, "fp3m1fp1": true}
+	regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k1": true, "fp2k1fp1": true, "fp2k1k1": true, "fp1k1fp1": true, "fp31": true, "fp3k1fp1": true}
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index b7d94251..14f97e60 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -61,19 +61,19 @@ func writeSIMDSSA(directory string, ops []Operation) error {
 	regInfoKeys := []string{
 		"fp11",
 		"fp21",
-		"fp2m1",
-		"fp2m1fp1",
-		"fp2m1m1",
-		"fp1m1fp1",
+		"fp2k1",
+		"fp2k1fp1",
+		"fp2k1k1",
+		"fp1k1fp1",
 		"fp31",
-		"fp3m1fp1",
+		"fp3k1fp1",
 		"fp11Imm8",
-		"fp1m1fp1Imm8",
+		"fp1k1fp1Imm8",
 		"fp21Imm8",
-		"fp2m1Imm8",
-		"fp2m1m1Imm8",
+		"fp2k1Imm8",
+		"fp2k1k1Imm8",
 		"fp31ResultInArg0",
-		"fp3m1fp1ResultInArg0",
+		"fp3k1fp1ResultInArg0",
 	}
 	regInfoSet := map[string][]string{}
 	for _, key := range regInfoKeys {
@@ -139,7 +139,7 @@ func writeSIMDSSA(directory string, ops []Operation) error {
 		}
 		data := tplSSAData{
 			Cases:  strings.Join(cases, ",\n\t\t"),
-			Helper: "simdGen" + capitalizeFirst(regShape),
+			Helper: "simd" + capitalizeFirst(regShape),
 		}
 		if err := ssaTemplates.ExecuteTemplate(file, "case", data); err != nil {
 			return fmt.Errorf("failed to execute case template for %s: %w", regShape, err)
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 7f2af75c..074be682 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -275,13 +275,13 @@ func (op *Operation) regShape() (string, error) {
 		vRegInS = fmt.Sprintf("fp%d", vRegInCnt)
 	}
 	if kMaskInCnt > 0 {
-		kMaskInS = fmt.Sprintf("m%d", kMaskInCnt)
+		kMaskInS = fmt.Sprintf("k%d", kMaskInCnt)
 	}
 	if vRegOutCnt > 0 {
 		vRegOutS = fmt.Sprintf("fp%d", vRegOutCnt)
 	}
 	if kMaskOutCnt > 0 {
-		kMaskOutS = fmt.Sprintf("m%d", kMaskOutCnt)
+		kMaskOutS = fmt.Sprintf("k%d", kMaskOutCnt)
 	}
 	if kMaskInCnt == 0 && kMaskOutCnt == 0 {
 		// For pure fp we can abbreviate it as fp%d%d.

From ea2d5edcba09cb9673cb2b86943400c4be784fec Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 13 Jun 2025 14:21:18 -0400
Subject: [PATCH 077/200] internal/simdgen: fix commutativity of AndNot ops

Change-Id: Ie313f7aa8227eaff9d944da57a366841fa48c1d6
Reviewed-on: https://go-review.googlesource.com/c/arch/+/681477
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/categories.yaml                  | 4 ++--
 internal/simdgen/ops/BitwiseLogic/categories.yaml | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 4a4affbc..3492ffbb 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -58,11 +58,11 @@
   commutative: "true"
   extension: "AVX.*"
 - go: AndNot
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: MaskedAndNot
   masked: "true"
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: Xor
   commutative: "true"
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index bc4eda74..064f42b0 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -14,11 +14,11 @@
   commutative: "true"
   extension: "AVX.*"
 - go: AndNot
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: MaskedAndNot
   masked: "true"
-  commutative: "true"
+  commutative: "false"
   extension: "AVX.*"
 - go: Xor
   commutative: "true"
@@ -28,4 +28,4 @@
   commutative: "true"
   extension: "AVX.*"
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
-# and only appear in rewrite rules.
\ No newline at end of file
+# and only appear in rewrite rules.

From bdb36ed7fe3d64f233fea98b785d9fe0e87d7ec1 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 2 Jun 2025 21:37:17 +0000
Subject: [PATCH 078/200] internal/simdgen: add round operations

This CL also simplifies some simdgen logics.

This CL generates CL 681295.

Change-Id: Ibceb7d514353cf7b479913c905b21469c1b80df3
Reviewed-on: https://go-review.googlesource.com/c/arch/+/678195
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/.gitignore                   |   2 +
 internal/simdgen/categories.yaml              | 104 ++++++++++++++++++
 internal/simdgen/go.yaml                      |  35 ++++++
 .../simdgen/ops/FPonlyArith/categories.yaml   | 104 ++++++++++++++++++
 internal/simdgen/ops/FPonlyArith/go.yaml      |  37 ++++++-
 5 files changed, 281 insertions(+), 1 deletion(-)

diff --git a/internal/simdgen/.gitignore b/internal/simdgen/.gitignore
index 1cc9ae43..de579f6b 100644
--- a/internal/simdgen/.gitignore
+++ b/internal/simdgen/.gitignore
@@ -1 +1,3 @@
 testdata/*
+.gemini/*
+.gemini*
diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 3492ffbb..7a55f58e 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -190,6 +190,110 @@
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+
+- go: Round
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 0
+- go: MaskedRoundWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 0
+  masked: "true"
+- go: MaskedRoundSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 8
+  masked: "true"
+- go: MaskedDiffWithRoundWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 0
+  masked: "true"
+- go: MaskedDiffWithRoundSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 8
+  masked: "true"
+
+- go: Floor
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 1
+- go: MaskedFloorWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 1
+  masked: "true"
+- go: MaskedFloorSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 9
+  masked: "true"
+- go: MaskedDiffWithFloorWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 1
+  masked: "true"
+- go: MaskedDiffWithFloorSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 9
+  masked: "true"
+
+- go: Ceil
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 2
+- go: MaskedCeilWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 2
+  masked: "true"
+- go: MaskedCeilSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 10
+  masked: "true"
+- go: MaskedDiffWithCeilWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 2
+  masked: "true"
+- go: MaskedDiffWithCeilSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 10
+  masked: "true"
+
+- go: Trunc
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 3
+- go: MaskedTruncWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 3
+  masked: "true"
+- go: MaskedTruncSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 11
+  masked: "true"
+- go: MaskedDiffWithTruncWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 3
+  masked: "true"
+- go: MaskedDiffWithTruncSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 11
+  masked: "true"
+
+- go: AddSub
+  commutative: "false"
+  extension: "AVX.*"
 - go: Average
   commutative: "true"
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 35f0bf75..997f3992 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -395,6 +395,41 @@
   asm: "VSCALEFP[SD]"
   in: *1mask2fp
   out: *1fp
+
+- go: "Round|Ceil|Floor|Trunc"
+  asm: "VROUNDP[SD]"
+  in:
+  - *fp
+  - class: immediate
+    const: 0 # place holder
+  out: *1fp
+
+- go: "Masked(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
+  asm: "VRNDSCALEP[SD]"
+  in:
+  - class: mask
+  - *fp
+  - class: immediate
+    const: 0 # place holder
+    immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
+  out: *1fp
+- go: "MaskedDiffWith(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
+  asm: "VREDUCEP[SD]"
+  in:
+  - class: mask
+  - *fp
+  - class: immediate
+    const: 0 # place holder
+    immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
+  out: *1fp
+
+- go: "AddSub"
+  asm: "VADDSUBP[SD]"
+  in:
+  - *fp
+  - *fp
+  out:
+  - *fp
 # Average (unsigned byte, unsigned word)
 # Instructions: VPAVGB, VPAVGW
 - go: Average
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 3c46f1f4..e486225e 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -31,3 +31,107 @@
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+
+- go: Round
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 0
+- go: MaskedRoundWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 0
+  masked: "true"
+- go: MaskedRoundSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 8
+  masked: "true"
+- go: MaskedDiffWithRoundWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 0
+  masked: "true"
+- go: MaskedDiffWithRoundSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 8
+  masked: "true"
+
+- go: Floor
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 1
+- go: MaskedFloorWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 1
+  masked: "true"
+- go: MaskedFloorSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 9
+  masked: "true"
+- go: MaskedDiffWithFloorWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 1
+  masked: "true"
+- go: MaskedDiffWithFloorSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 9
+  masked: "true"
+
+- go: Ceil
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 2
+- go: MaskedCeilWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 2
+  masked: "true"
+- go: MaskedCeilSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 10
+  masked: "true"
+- go: MaskedDiffWithCeilWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 2
+  masked: "true"
+- go: MaskedDiffWithCeilSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 10
+  masked: "true"
+
+- go: Trunc
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 3
+- go: MaskedTruncWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 3
+  masked: "true"
+- go: MaskedTruncSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 11
+  masked: "true"
+- go: MaskedDiffWithTruncWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 3
+  masked: "true"
+- go: MaskedDiffWithTruncSuppressExceptionWithPrecision
+  commutative: "false"
+  extension: "AVX.*"
+  constImm: 11
+  masked: "true"
+
+- go: AddSub
+  commutative: "false"
+  extension: "AVX.*"
\ No newline at end of file
diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml
index bd774e1d..48e071ec 100644
--- a/internal/simdgen/ops/FPonlyArith/go.yaml
+++ b/internal/simdgen/ops/FPonlyArith/go.yaml
@@ -40,4 +40,39 @@
 - go: MaskedMulByPowOf2
   asm: "VSCALEFP[SD]"
   in: *1mask2fp
-  out: *1fp
\ No newline at end of file
+  out: *1fp
+
+- go: "Round|Ceil|Floor|Trunc"
+  asm: "VROUNDP[SD]"
+  in:
+  - *fp
+  - class: immediate
+    const: 0 # place holder
+  out: *1fp
+
+- go: "Masked(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
+  asm: "VRNDSCALEP[SD]"
+  in:
+  - class: mask
+  - *fp
+  - class: immediate
+    const: 0 # place holder
+    immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
+  out: *1fp
+- go: "MaskedDiffWith(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
+  asm: "VREDUCEP[SD]"
+  in:
+  - class: mask
+  - *fp
+  - class: immediate
+    const: 0 # place holder
+    immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
+  out: *1fp
+
+- go: "AddSub"
+  asm: "VADDSUBP[SD]"
+  in:
+  - *fp
+  - *fp
+  out:
+  - *fp
\ No newline at end of file

From c2a8f7037160725497d502d69a9f1aa5fa83ac6d Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 3 Jun 2025 17:53:42 +0000
Subject: [PATCH 079/200] internal/simdgen: add dot products

This CL also updates the sortOperand method; before dot products, all
vreg inputs has the same type, so their order doesn't really matter; Now
with dot products we have to make sure their order is correct after
sort.

This CL generates CL 681296.

Change-Id: I20506eb889979ea5f390b36615f4cf934fc418c9
Reviewed-on: https://go-review.googlesource.com/c/arch/+/678515
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml           | 24 +++++++++++
 internal/simdgen/go.yaml                   | 49 +++++++++++++++++++++
 internal/simdgen/ops/MLOps/categories.yaml | 25 +++++++++++
 internal/simdgen/ops/MLOps/go.yaml         | 50 ++++++++++++++++++++++
 4 files changed, 148 insertions(+)
 create mode 100644 internal/simdgen/ops/MLOps/categories.yaml
 create mode 100644 internal/simdgen/ops/MLOps/go.yaml

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 7a55f58e..00672a14 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -321,6 +321,30 @@
   commutative: "false"
   masked: "true"
   extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
+- go: PairDotProd
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size"
+- go: MaskedPairDotProd
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size"
+- go: SaturatedUnsignedSignedPairDotProd
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size"
+- go: MaskedSaturatedUnsignedSignedPairDotProd
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size"
+
+# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
+- go: DotProdBroadcast
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "// Multiply all the elements and add them together; the result is a broadcast of the dot product"
 - go: Max
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 997f3992..765eea0b 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -490,6 +490,55 @@
     go: $t
   out:
   - *any
+- go: PairDotProd
+  asm: VPMADDWD
+  in:
+  - &int
+    go: $t
+    base: int
+  - *int
+  out:
+  - &int2 # The elemBits are different
+    go: $t2
+    base: int
+- go: MaskedPairDotProd
+  asm: VPMADDWD
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int2
+- go: SaturatedUnsignedSignedPairDotProd
+  asm: VPMADDUBSW
+  in:
+  - &uint
+    go: $t
+    base: uint
+  - &int3
+    go: $t3
+    base: int
+  out:
+  - *int2
+- go: MaskedSaturatedUnsignedSignedPairDotProd
+  asm: VPMADDUBSW
+  in:
+  - class: mask
+  - *uint
+  - *int3
+  out:
+  - *int2
+- go: DotProdBroadcast
+  asm: VDPPD
+  in:
+  - &float
+    go: $t
+    base: float
+  - *float
+  - class: immediate
+    const: 127 # make sure the control bits [4:5] are all 1
+  out:
+  - *float
 - go: Max
   asm: "V?PMAXS[BWDQ]"
   in: &2int
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
new file mode 100644
index 00000000..6ebb12a0
--- /dev/null
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -0,0 +1,25 @@
+!sum
+- go: PairDotProd
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size"
+- go: MaskedPairDotProd
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size"
+- go: SaturatedUnsignedSignedPairDotProd
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size"
+- go: MaskedSaturatedUnsignedSignedPairDotProd
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size"
+
+# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
+- go: DotProdBroadcast
+  commutative: "true"
+  extension: "AVX.*"
+  documentation: "// Multiply all the elements and add them together; the result is a broadcast of the dot product"
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
new file mode 100644
index 00000000..9e06d3c9
--- /dev/null
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -0,0 +1,50 @@
+!sum
+- go: PairDotProd
+  asm: VPMADDWD
+  in:
+  - &int
+    go: $t
+    base: int
+  - *int
+  out:
+  - &int2 # The elemBits are different
+    go: $t2
+    base: int
+- go: MaskedPairDotProd
+  asm: VPMADDWD
+  in:
+  - class: mask
+  - *int
+  - *int
+  out:
+  - *int2
+- go: SaturatedUnsignedSignedPairDotProd
+  asm: VPMADDUBSW
+  in:
+  - &uint
+    go: $t
+    base: uint
+  - &int3
+    go: $t3
+    base: int
+  out:
+  - *int2
+- go: MaskedSaturatedUnsignedSignedPairDotProd
+  asm: VPMADDUBSW
+  in:
+  - class: mask
+  - *uint
+  - *int3
+  out:
+  - *int2
+- go: DotProdBroadcast
+  asm: VDPPD
+  in:
+  - &float
+    go: $t
+    base: float
+  - *float
+  - class: immediate
+    const: 127 # make sure the control bits [4:5] are all 1
+  out:
+  - *float
\ No newline at end of file

From 606013036de41e033aebb39e0507d3fe3631213d Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Sat, 7 Jun 2025 18:54:20 +0000
Subject: [PATCH 080/200] internal/simdgen: update documentations

This CL is generated by Gemini

This CL generates CL 681297.

Change-Id: If3323c3a23b0d2197390d1a239bdcbedd60615d2
Reviewed-on: https://go-review.googlesource.com/c/arch/+/679955
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              | 270 ++++++++++++++++--
 internal/simdgen/go.yaml                      |   1 +
 internal/simdgen/ops/AddSub/categories.yaml   |  32 ++-
 .../simdgen/ops/BitwiseLogic/categories.yaml  |  16 ++
 internal/simdgen/ops/Compares/categories.yaml |  59 +++-
 .../simdgen/ops/FPonlyArith/categories.yaml   |  91 +++++-
 .../simdgen/ops/IntOnlyArith/categories.yaml  |  15 +-
 internal/simdgen/ops/MLOps/categories.yaml    |  19 +-
 internal/simdgen/ops/MinMax/categories.yaml   |   8 +
 internal/simdgen/ops/Mul/categories.yaml      |  33 ++-
 internal/simdgen/ops/main.go                  |   4 +
 11 files changed, 488 insertions(+), 60 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 00672a14..f4194101 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -1,76 +1,117 @@
 !sum
+# TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes.
 - go: Add
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Add adds corresponding elements of two vectors.
 - go: SaturatedAdd
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // SaturatedAdd adds corresponding elements of two vectors with saturation.
 - go: MaskedAdd
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedAdd adds corresponding elements of two vectors.
 - go: MaskedSaturatedAdd
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedSaturatedAdd adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // Sub subtracts corresponding elements of two vectors.
 - go: SaturatedSub
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // SaturatedSub subtracts corresponding elements of two vectors with saturation.
 - go: MaskedSub
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedSub subtracts corresponding elements of two vectors.
 - go: MaskedSaturatedSub
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedSaturatedSub subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target"
+  documentation: !string |-
+    // PairwiseAdd horizontally adds adjacent pairs of elements.
+    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: PairwiseSub
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target"
+  documentation: !string |-
+    // PairwiseSub horizontally subtracts adjacent pairs of elements.
+    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: SaturatedPairwiseAdd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation"
+  documentation: !string |-
+    // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
+    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: SaturatedPairwiseSub
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation"
+  documentation: !string |-
+    // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
+    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: And
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // And performs a bitwise AND operation between two vectors.
 - go: MaskedAnd
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedAnd performs a masked bitwise AND operation between two vectors.
 - go: Or
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Or performs a bitwise OR operation between two vectors.
 - go: MaskedOr
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedOr performs a masked bitwise OR operation between two vectors.
 - go: AndNot
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // AndNot performs a bitwise AND NOT operation between two vectors.
 - go: MaskedAndNot
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedAndNot performs a masked bitwise AND NOT operation between two vectors.
 - go: Xor
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Xor performs a bitwise XOR operation between two vectors.
 - go: MaskedXor
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedXor performs a masked bitwise XOR operation between two vectors.
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
 # const imm predicate(holds for both float and int|uint):
@@ -84,312 +125,483 @@
   constImm: 0
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 0 if it has;"
+  documentation: !string |-
+    // Equal compares for equality.
+    // Const Immediate = 0.
 - go: Less
   constImm: 1
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 1 if it has;"
+  documentation: !string |-
+    // Less compares for less than.
+    // Const Immediate = 1.
 - go: LessEqual
   constImm: 2
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 2 if it has;"
+  documentation: !string |-
+    // LessEqual compares for less than or equal.
+    // Const Immediate = 2.
 - go: IsNan # For float only.
   constImm: 3
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+  documentation: !string |-
+    // IsNan checks if elements are NaN. Use as x.IsNan(x).
+    // Const Immediate = 3.
 - go: NotEqual
   constImm: 4
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 4 if it has;"
+  documentation: !string |-
+    // NotEqual compares for inequality.
+    // Const Immediate = 4.
 - go: GreaterEqual
   constImm: 5
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 5 if it has;"
+  documentation: !string |-
+    // GreaterEqual compares for greater than or equal.
+    // Const Immediate = 5.
 - go: Greater
   constImm: 6
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 6 if it has;"
+  documentation: !string |-
+    // Greater compares for greater than.
+    // Const Immediate = 6.
 
 - go: MaskedEqual
   constImm: 0
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 0 if it has;"
+  documentation: !string |-
+    // MaskedEqual compares for equality, masked.
+    // Const Immediate = 0.
+  docUnmasked: !string |-
+    // Equal compares for equality.
+    // Const Immediate = 0.
 - go: MaskedLess
   constImm: 1
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 1 if it has;"
+  documentation: !string |-
+    // MaskedLess compares for less than.
+    // Const Immediate = 1.
 - go: MaskedLessEqual
   constImm: 2
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 2 if it has;"
+  documentation: !string |-
+    // MaskedLessEqual compares for less than or equal.
+    // Const Immediate = 2.
 - go: MaskedIsNan # For float only.
   constImm: 3
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+  documentation: !string |-
+    // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x).
+    // Const Immediate = 3.
 - go: MaskedNotEqual
   constImm: 4
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 4 if it has;"
+  documentation: !string |-
+    // MaskedNotEqual compares for inequality.
+    // Const Immediate = 4.
 - go: MaskedGreaterEqual
   constImm: 5
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 5 if it has;"
+  documentation: !string |-
+    // MaskedGreaterEqual compares for greater than or equal.
+    // Const Immediate = 5.
 - go: MaskedGreater
   constImm: 6
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 6 if it has;"
+  documentation: !string |-
+    // MaskedGreater compares for greater than.
+    // Const Immediate = 6.
 - go: Div
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // Div divides elements of two vectors.
 - go: MaskedDiv
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedDiv divides elements of two vectors.
 - go: Sqrt
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // Sqrt computes the square root of each element.
 - go: MaskedSqrt
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedSqrt computes the square root of each element.
 - go: ApproximateReciprocal
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // ApproximateReciprocal computes an approximate reciprocal of each element.
 - go: MaskedApproximateReciprocal
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedApproximateReciprocal computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
 - go: MaskedApproximateReciprocalOfSqrt
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
 - go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated.
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedMulByPowOf2 multiplies elements by a power of 2.
 
 - go: Round
   commutative: "false"
   extension: "AVX.*"
   constImm: 0
+  documentation: !string |-
+    // Round rounds elements to the nearest integer.
+    // Const Immediate = 0.
 - go: MaskedRoundWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 0
   masked: "true"
+  documentation: !string |-
+    // MaskedRoundWithPrecision rounds elements with specified precision.
+    // Const Immediate = 0.
 - go: MaskedRoundSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 8
   masked: "true"
+  documentation: !string |-
+    // MaskedRoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
+    // Const Immediate = 8.
 - go: MaskedDiffWithRoundWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 0
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+    // Const Immediate = 0.
 - go: MaskedDiffWithRoundSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 8
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+    // Const Immediate = 8.
 
 - go: Floor
   commutative: "false"
   extension: "AVX.*"
   constImm: 1
+  documentation: !string |-
+    // Floor rounds elements down to the nearest integer.
+    // Const Immediate = 1.
 - go: MaskedFloorWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 1
   masked: "true"
+  documentation: !string |-
+    // MaskedFloorWithPrecision rounds elements down with specified precision, masked.
+    // Const Immediate = 1.
+  docUnmasked: !string |-
+    // FloorWithPrecision rounds elements down with specified precision.
+    // Const Immediate = 1.
 - go: MaskedFloorSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 9
   masked: "true"
+  documentation: !string |-
+    // MaskedFloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+    // Const Immediate = 9.
+  docUnmasked: !string |-
+    // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions.
+    // Const Immediate = 9.
 - go: MaskedDiffWithFloorWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 1
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+    // Const Immediate = 1.
 - go: MaskedDiffWithFloorSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 9
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+    // Const Immediate = 9.
 
 - go: Ceil
   commutative: "false"
   extension: "AVX.*"
   constImm: 2
+  documentation: !string |-
+    // Ceil rounds elements up to the nearest integer.
+    // Const Immediate = 2.
 - go: MaskedCeilWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 2
   masked: "true"
+  documentation: !string |-
+    // MaskedCeilWithPrecision rounds elements up with specified precision, masked.
+    // Const Immediate = 2.
+  docUnmasked: !string |-
+    // CeilWithPrecision rounds elements up with specified precision.
+    // Const Immediate = 2.
 - go: MaskedCeilSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 10
   masked: "true"
+  documentation: !string |-
+    // MaskedCeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+    // Const Immediate = 10.
 - go: MaskedDiffWithCeilWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 2
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+    // Const Immediate = 2.
 - go: MaskedDiffWithCeilSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 10
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+    // Const Immediate = 10.
 
 - go: Trunc
   commutative: "false"
   extension: "AVX.*"
   constImm: 3
+  documentation: !string |-
+    // Trunc truncates elements towards zero.
+    // Const Immediate = 3.
 - go: MaskedTruncWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 3
   masked: "true"
+  documentation: !string |-
+    // MaskedTruncWithPrecision truncates elements with specified precision.
+    // Const Immediate = 3.
 - go: MaskedTruncSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 11
   masked: "true"
+  documentation: !string |-
+    // MaskedTruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
+    // Const Immediate = 11.
 - go: MaskedDiffWithTruncWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 3
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+    // Const Immediate = 3.
 - go: MaskedDiffWithTruncSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 11
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+    // Const Immediate = 11.
 
 - go: AddSub
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // AddSub subtracts even elements and adds odd elements of two vectors.
 - go: Average
   commutative: "true"
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
+  documentation: !string |-
+    // Average computes the rounded average of corresponding elements.
 - go: MaskedAverage
   commutative: "true"
   masked: "true"
   extension: "AVX512.*" # Masked operations are typically AVX512
+  documentation: !string |-
+    // MaskedAverage computes the rounded average of corresponding elements.
 
 - go: Absolute
   commutative: "false"
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
+  documentation: !string |-
+    // Absolute computes the absolute value of each element.
 - go: MaskedAbsolute
   commutative: "false"
   masked: "true"
   extension: "AVX512.*"
+  documentation: !string |-
+    // MaskedAbsolute computes the absolute value of each element.
 
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // Sign returns the product of the first operand with -1, 0, or 1,
+    // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
 
 - go: MaskedPopCount
   commutative: "false"
   masked: "true"
   extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
+  documentation: !string |-
+    // MaskedPopCount counts the number of set bits in each element.
 - go: PairDotProd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size"
+  documentation: !string |-
+    // PairDotProd multiplies the elements and add the pairs together,
+    // yielding a vector of half as many elements with twice the input element size.
 - go: MaskedPairDotProd
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size"
+  documentation: !string |-
+    // MaskedPairDotProd multiplies the elements and add the pairs together,
+    // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size"
+  documentation: !string |-
+    // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // yielding a vector of half as many elements with twice the input element size.
 - go: MaskedSaturatedUnsignedSignedPairDotProd
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size"
+  documentation: !string |-
+    // MaskedSaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // yielding a vector of half as many elements with twice the input element size.
 
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 - go: DotProdBroadcast
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiply all the elements and add them together; the result is a broadcast of the dot product"
+  documentation: !string |-
+    // DotProdBroadcast multiplies all elements and broadcasts the sum.
 - go: Max
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Max computes the maximum of corresponding elements.
 - go: MaskedMax
   commutative: "true"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedMax computes the maximum of corresponding elements.
 - go: Min
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Min computes the minimum of corresponding elements.
 - go: MaskedMin
   commutative: "true"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedMin computes the minimum of corresponding elements.
 - go: Mul
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Mul multiplies corresponding elements of two vectors.
 - go: MulEvenWiden
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+  documentation: !string |-
+    // MulEvenWiden multiplies even-indexed elements, widening the result.
+    // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHigh
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+  documentation: !string |-
+    // MulHigh multiplies elements and stores the high part of the result.
 - go: MulLow
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
+  documentation: !string |-
+    // MulLow multiplies elements and stores the low part of the result.
 - go: MaskedMul
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedMul multiplies corresponding elements of two vectors, masked.
+  docUnmasked: !string |-
+    // Mul multiplies corresponding elements of two vectors.
 - go: MaskedMulEvenWiden
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+  documentation: !string |-
+    // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked.
+    // Result[i] = v1.Even[i] * v2.Even[i].
+  docUnmasked: !string |-
+    // MulEvenWiden multiplies even-indexed elements, widening the result.
+    // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MaskedMulHigh
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+  documentation: !string |-
+    // MaskedMulHigh multiplies elements and stores the high part of the result, masked.
+  docUnmasked: !string |-
+    // MulHigh multiplies elements and stores the high part of the result.
 - go: MaskedMulLow
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
+  documentation: !string |-
+    // MaskedMulLow multiplies elements and stores the low part of the result, masked.
+  docUnmasked: !string |-
+    // MulLow multiplies elements and stores the low part of the result.
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 765eea0b..de65a04e 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -1,4 +1,5 @@
 !sum
+# TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes.
 # Add
 - go: Add
   asm: "VPADD[BWDQ]|VADDP[SD]"
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index 592790ca..e87ead1d 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -2,44 +2,68 @@
 - go: Add
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Add adds corresponding elements of two vectors.
 - go: SaturatedAdd
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // SaturatedAdd adds corresponding elements of two vectors with saturation.
 - go: MaskedAdd
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedAdd adds corresponding elements of two vectors.
 - go: MaskedSaturatedAdd
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedSaturatedAdd adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // Sub subtracts corresponding elements of two vectors.
 - go: SaturatedSub
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // SaturatedSub subtracts corresponding elements of two vectors with saturation.
 - go: MaskedSub
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedSub subtracts corresponding elements of two vectors.
 - go: MaskedSaturatedSub
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedSaturatedSub subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target"
+  documentation: !string |-
+    // PairwiseAdd horizontally adds adjacent pairs of elements.
+    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: PairwiseSub
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target"
+  documentation: !string |-
+    // PairwiseSub horizontally subtracts adjacent pairs of elements.
+    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: SaturatedPairwiseAdd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Add pairs of elements in vector x and store them in higher half of the target; Add pairs of elements in vector y and store them in lower half of the target; With saturation"
+  documentation: !string |-
+    // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
+    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: SaturatedPairwiseSub
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Sub pairs of elements in vector x and store them in higher half of the target; Sub pairs of elements in vector y and store them in lower half of the target; With saturation"
\ No newline at end of file
+  documentation: !string |-
+    // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
+    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index 064f42b0..4d948364 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -2,30 +2,46 @@
 - go: And
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // And performs a bitwise AND operation between two vectors.
 - go: MaskedAnd
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedAnd performs a masked bitwise AND operation between two vectors.
 - go: Or
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Or performs a bitwise OR operation between two vectors.
 - go: MaskedOr
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedOr performs a masked bitwise OR operation between two vectors.
 - go: AndNot
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // AndNot performs a bitwise AND NOT operation between two vectors.
 - go: MaskedAndNot
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedAndNot performs a masked bitwise AND NOT operation between two vectors.
 - go: Xor
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Xor performs a bitwise XOR operation between two vectors.
 - go: MaskedXor
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedXor performs a masked bitwise XOR operation between two vectors.
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index 06a1caa2..bd4d8c76 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -10,77 +10,108 @@
   constImm: 0
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 0 if it has;"
+  documentation: !string |-
+    // Equal compares for equality.
+    // Const Immediate = 0.
 - go: Less
   constImm: 1
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 1 if it has;"
+  documentation: !string |-
+    // Less compares for less than.
+    // Const Immediate = 1.
 - go: LessEqual
   constImm: 2
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 2 if it has;"
+  documentation: !string |-
+    // LessEqual compares for less than or equal.
+    // Const Immediate = 2.
 - go: IsNan # For float only.
   constImm: 3
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+  documentation: !string |-
+    // IsNan checks if elements are NaN. Use as x.IsNan(x).
+    // Const Immediate = 3.
 - go: NotEqual
   constImm: 4
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 4 if it has;"
+  documentation: !string |-
+    // NotEqual compares for inequality.
+    // Const Immediate = 4.
 - go: GreaterEqual
   constImm: 5
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 5 if it has;"
+  documentation: !string |-
+    // GreaterEqual compares for greater than or equal.
+    // Const Immediate = 5.
 - go: Greater
   constImm: 6
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 6 if it has;"
+  documentation: !string |-
+    // Greater compares for greater than.
+    // Const Immediate = 6.
 
 - go: MaskedEqual
   constImm: 0
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 0 if it has;"
+  documentation: !string |-
+    // MaskedEqual compares for equality, masked.
+    // Const Immediate = 0.
+  docUnmasked: !string |-
+    // Equal compares for equality.
+    // Const Immediate = 0.
 - go: MaskedLess
   constImm: 1
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 1 if it has;"
+  documentation: !string |-
+    // MaskedLess compares for less than.
+    // Const Immediate = 1.
 - go: MaskedLessEqual
   constImm: 2
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 2 if it has;"
+  documentation: !string |-
+    // MaskedLessEqual compares for less than or equal.
+    // Const Immediate = 2.
 - go: MaskedIsNan # For float only.
   constImm: 3
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 3 if it has; Returns mask element True if either one of the input\\'s element is Nan; Please use this method as x\\.IsNan\\(x\\) to check x only;"
+  documentation: !string |-
+    // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x).
+    // Const Immediate = 3.
 - go: MaskedNotEqual
   constImm: 4
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 4 if it has;"
+  documentation: !string |-
+    // MaskedNotEqual compares for inequality.
+    // Const Immediate = 4.
 - go: MaskedGreaterEqual
   constImm: 5
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 5 if it has;"
+  documentation: !string |-
+    // MaskedGreaterEqual compares for greater than or equal.
+    // Const Immediate = 5.
 - go: MaskedGreater
   constImm: 6
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Predicate immediate is 6 if it has;"
\ No newline at end of file
+  documentation: !string |-
+    // MaskedGreater compares for greater than.
+    // Const Immediate = 6.
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index e486225e..c00d43d6 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -2,136 +2,225 @@
 - go: Div
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // Div divides elements of two vectors.
 - go: MaskedDiv
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedDiv divides elements of two vectors.
 - go: Sqrt
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // Sqrt computes the square root of each element.
 - go: MaskedSqrt
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedSqrt computes the square root of each element.
 - go: ApproximateReciprocal
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // ApproximateReciprocal computes an approximate reciprocal of each element.
 - go: MaskedApproximateReciprocal
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedApproximateReciprocal computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
 - go: MaskedApproximateReciprocalOfSqrt
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
 - go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated.
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedMulByPowOf2 multiplies elements by a power of 2.
 
 - go: Round
   commutative: "false"
   extension: "AVX.*"
   constImm: 0
+  documentation: !string |-
+    // Round rounds elements to the nearest integer.
+    // Const Immediate = 0.
 - go: MaskedRoundWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 0
   masked: "true"
+  documentation: !string |-
+    // MaskedRoundWithPrecision rounds elements with specified precision.
+    // Const Immediate = 0.
 - go: MaskedRoundSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 8
   masked: "true"
+  documentation: !string |-
+    // MaskedRoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
+    // Const Immediate = 8.
 - go: MaskedDiffWithRoundWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 0
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+    // Const Immediate = 0.
 - go: MaskedDiffWithRoundSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 8
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
+    // Const Immediate = 8.
 
 - go: Floor
   commutative: "false"
   extension: "AVX.*"
   constImm: 1
+  documentation: !string |-
+    // Floor rounds elements down to the nearest integer.
+    // Const Immediate = 1.
 - go: MaskedFloorWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 1
   masked: "true"
+  documentation: !string |-
+    // MaskedFloorWithPrecision rounds elements down with specified precision, masked.
+    // Const Immediate = 1.
+  docUnmasked: !string |-
+    // FloorWithPrecision rounds elements down with specified precision.
+    // Const Immediate = 1.
 - go: MaskedFloorSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 9
   masked: "true"
+  documentation: !string |-
+    // MaskedFloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
+    // Const Immediate = 9.
+  docUnmasked: !string |-
+    // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions.
+    // Const Immediate = 9.
 - go: MaskedDiffWithFloorWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 1
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+    // Const Immediate = 1.
 - go: MaskedDiffWithFloorSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 9
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
+    // Const Immediate = 9.
 
 - go: Ceil
   commutative: "false"
   extension: "AVX.*"
   constImm: 2
+  documentation: !string |-
+    // Ceil rounds elements up to the nearest integer.
+    // Const Immediate = 2.
 - go: MaskedCeilWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 2
   masked: "true"
+  documentation: !string |-
+    // MaskedCeilWithPrecision rounds elements up with specified precision, masked.
+    // Const Immediate = 2.
+  docUnmasked: !string |-
+    // CeilWithPrecision rounds elements up with specified precision.
+    // Const Immediate = 2.
 - go: MaskedCeilSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 10
   masked: "true"
+  documentation: !string |-
+    // MaskedCeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
+    // Const Immediate = 10.
 - go: MaskedDiffWithCeilWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 2
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+    // Const Immediate = 2.
 - go: MaskedDiffWithCeilSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 10
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
+    // Const Immediate = 10.
 
 - go: Trunc
   commutative: "false"
   extension: "AVX.*"
   constImm: 3
+  documentation: !string |-
+    // Trunc truncates elements towards zero.
+    // Const Immediate = 3.
 - go: MaskedTruncWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 3
   masked: "true"
+  documentation: !string |-
+    // MaskedTruncWithPrecision truncates elements with specified precision.
+    // Const Immediate = 3.
 - go: MaskedTruncSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 11
   masked: "true"
+  documentation: !string |-
+    // MaskedTruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
+    // Const Immediate = 11.
 - go: MaskedDiffWithTruncWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 3
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+    // Const Immediate = 3.
 - go: MaskedDiffWithTruncSuppressExceptionWithPrecision
   commutative: "false"
   extension: "AVX.*"
   constImm: 11
   masked: "true"
+  documentation: !string |-
+    // MaskedDiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
+    // Const Immediate = 11.
 
 - go: AddSub
   commutative: "false"
-  extension: "AVX.*"
\ No newline at end of file
+  extension: "AVX.*"
+  documentation: !string |-
+    // AddSub subtracts even elements and adds odd elements of two vectors.
\ No newline at end of file
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
index c74b57c4..b6c83bf3 100644
--- a/internal/simdgen/ops/IntOnlyArith/categories.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml
@@ -2,27 +2,40 @@
 - go: Average
   commutative: "true"
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
+  documentation: !string |-
+    // Average computes the rounded average of corresponding elements.
 - go: MaskedAverage
   commutative: "true"
   masked: "true"
   extension: "AVX512.*" # Masked operations are typically AVX512
+  documentation: !string |-
+    // MaskedAverage computes the rounded average of corresponding elements.
 
 - go: Absolute
   commutative: "false"
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
+  documentation: !string |-
+    // Absolute computes the absolute value of each element.
 - go: MaskedAbsolute
   commutative: "false"
   masked: "true"
   extension: "AVX512.*"
+  documentation: !string |-
+    // MaskedAbsolute computes the absolute value of each element.
 
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
   commutative: "false"
   extension: "AVX.*"
+  documentation: !string |-
+    // Sign returns the product of the first operand with -1, 0, or 1,
+    // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
 
 - go: MaskedPopCount
   commutative: "false"
   masked: "true"
-  extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
\ No newline at end of file
+  extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
+  documentation: !string |-
+    // MaskedPopCount counts the number of set bits in each element.
\ No newline at end of file
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 6ebb12a0..18cfd967 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -2,24 +2,33 @@
 - go: PairDotProd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size"
+  documentation: !string |-
+    // PairDotProd multiplies the elements and add the pairs together,
+    // yielding a vector of half as many elements with twice the input element size.
 - go: MaskedPairDotProd
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Multiply the elements and add the pairs together, yielding a vector of half as many elements with twice the input element size"
+  documentation: !string |-
+    // MaskedPairDotProd multiplies the elements and add the pairs together,
+    // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size"
+  documentation: !string |-
+    // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // yielding a vector of half as many elements with twice the input element size.
 - go: MaskedSaturatedUnsignedSignedPairDotProd
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
-  documentation: "// Multiply the elements and add the pairs together with saturation, yielding a vector of half as many elements with twice the input element size"
+  documentation: !string |-
+    // MaskedSaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // yielding a vector of half as many elements with twice the input element size.
 
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 - go: DotProdBroadcast
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiply all the elements and add them together; the result is a broadcast of the dot product"
+  documentation: !string |-
+    // DotProdBroadcast multiplies all elements and broadcasts the sum.
diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml
index d5131958..c64eb24e 100644
--- a/internal/simdgen/ops/MinMax/categories.yaml
+++ b/internal/simdgen/ops/MinMax/categories.yaml
@@ -2,14 +2,22 @@
 - go: Max
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Max computes the maximum of corresponding elements.
 - go: MaskedMax
   commutative: "true"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedMax computes the maximum of corresponding elements.
 - go: Min
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Min computes the minimum of corresponding elements.
 - go: MaskedMin
   commutative: "true"
   masked: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedMin computes the minimum of corresponding elements.
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index 42275e24..def502f3 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -2,34 +2,55 @@
 - go: Mul
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // Mul multiplies corresponding elements of two vectors.
 - go: MulEvenWiden
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+  documentation: !string |-
+    // MulEvenWiden multiplies even-indexed elements, widening the result.
+    // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHigh
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+  documentation: !string |-
+    // MulHigh multiplies elements and stores the high part of the result.
 - go: MulLow
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
+  documentation: !string |-
+    // MulLow multiplies elements and stores the low part of the result.
 - go: MaskedMul
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
+  documentation: !string |-
+    // MaskedMul multiplies corresponding elements of two vectors, masked.
+  docUnmasked: !string |-
+    // Mul multiplies corresponding elements of two vectors.
 - go: MaskedMulEvenWiden
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the even index elements from the two sources of size X at index i, store the result of size 2X at index i/2"
+  documentation: !string |-
+    // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked.
+    // Result[i] = v1.Even[i] * v2.Even[i].
+  docUnmasked: !string |-
+    // MulEvenWiden multiplies even-indexed elements, widening the result.
+    // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MaskedMulHigh
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the elements from the two sources of size X at index i, store the high X bits of the result of size 2X at index i"
+  documentation: !string |-
+    // MaskedMulHigh multiplies elements and stores the high part of the result, masked.
+  docUnmasked: !string |-
+    // MulHigh multiplies elements and stores the high part of the result.
 - go: MaskedMulLow
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
-  documentation: "// Multiplies the elements from the two sources of size X at index i, store the low X bits of the result of size 2X at index i"
\ No newline at end of file
+  documentation: !string |-
+    // MaskedMulLow multiplies elements and stores the low part of the result, masked.
+  docUnmasked: !string |-
+    // MulLow multiplies elements and stores the low part of the result.
diff --git a/internal/simdgen/ops/main.go b/internal/simdgen/ops/main.go
index 7e462bf7..c71f3ff6 100644
--- a/internal/simdgen/ops/main.go
+++ b/internal/simdgen/ops/main.go
@@ -32,6 +32,10 @@ func mergeYamlFiles(targetFileName string) error {
 	if err != nil {
 		return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err)
 	}
+	_, err = writer.WriteString("# TODO: remove the \"Const Immediate\" from the documentation field, it's there only for debug purposes.\n")
+	if err != nil {
+		return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err)
+	}
 
 	entries, err := os.ReadDir(baseDir)
 	if err != nil {

From f26e3cc49a9405339efc8d819966243ef17bac89 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Sun, 8 Jun 2025 02:45:08 +0000
Subject: [PATCH 081/200] internal/simdgen: more dot products

This CL's yaml data is generated by Gemini and reviewed by me.
This CL also updates simdgen to fit into some new op shapes.

This CL geneartes CL 681298.

Change-Id: Iae240af704a79eeb1dc78f24e11c3894f76b6bb7
Reviewed-on: https://go-review.googlesource.com/c/arch/+/680215
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml           |  45 +++++++++
 internal/simdgen/go.yaml                   | 105 +++++++++++++++++++--
 internal/simdgen/ops/MLOps/categories.yaml |  45 +++++++++
 internal/simdgen/ops/MLOps/go.yaml         | 105 +++++++++++++++++++--
 4 files changed, 286 insertions(+), 14 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index f4194101..dbf5b41d 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -528,6 +528,51 @@
   extension: "AVX.*"
   documentation: !string |-
     // DotProdBroadcast multiplies all elements and broadcasts the sum.
+    // Const Immediate = 127.
+- go: UnsignedSignedQuadDotProdAccumulate
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+- go: MaskedUnsignedSignedQuadDotProdAccumulate
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedUnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+- go: SaturatedUnsignedSignedQuadDotProdAccumulate
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+- go: PairDotProdAccumulate
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+- go: MaskedPairDotProdAccumulate
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+- go: SaturatedPairDotProdAccumulate
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+- go: MaskedSaturatedPairDotProdAccumulate
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
 - go: Max
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index de65a04e..b5aca038 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -525,21 +525,112 @@
   asm: VPMADDUBSW
   in:
   - class: mask
-  - *uint
-  - *int3
+  - go: $t1
+    base: uint
+    overwriteElemBits: 8
+  - go: $t2
+    base: int
+    overwriteElemBits: 8
   out:
-  - *int2
+  - *int3
 - go: DotProdBroadcast
   asm: VDPPD
   in:
-  - &float
+  - &dpb_src
     go: $t
     base: float
-  - *float
+    elemBits: 64
+    bits: $bits
+  - *dpb_src
   - class: immediate
-    const: 127 # make sure the control bits [4:5] are all 1
+    const: 127
   out:
-  - *float
+  - *dpb_src
+- go: UnsignedSignedQuadDotProdAccumulate
+  asm: "VPDPBUSD"
+  in:
+  - &qdpa_acc
+    go: $t_acc
+    elemBits: 32
+  - &qdpa_src1
+    go: $t_src1
+    base: uint
+    overwriteElemBits: 8
+  - &qdpa_src2
+    go: $t_src2
+    base: int
+    overwriteElemBits: 8
+  out:
+  - *qdpa_acc
+- go: MaskedUnsignedSignedQuadDotProdAccumulate
+  asm: "VPDPBUSD"
+  in:
+  - *qdpa_acc
+  - class: mask
+  - *qdpa_src1
+  - *qdpa_src2
+  out:
+  - *qdpa_acc
+- go: SaturatedUnsignedSignedQuadDotProdAccumulate
+  asm: "VPDPBUSDS"
+  in:
+  - *qdpa_acc
+  - *qdpa_src1
+  - *qdpa_src2
+  out:
+  - *qdpa_acc
+- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
+  asm: "VPDPBUSDS"
+  in:
+  - *qdpa_acc
+  - class: mask
+  - *qdpa_src1
+  - *qdpa_src2
+  out:
+  - *qdpa_acc
+- go: PairDotProdAccumulate
+  asm: "VPDPWSSD"
+  in:
+  - &pdpa_acc
+    go: $t_acc
+    base: int
+    elemBits: 32
+  - &pdpa_src1
+    go: $t_src1
+    base: int
+    overwriteElemBits: 16
+  - &pdpa_src2
+    go: $t_src2
+    base: int
+    overwriteElemBits: 16
+  out:
+  - *pdpa_acc
+- go: MaskedPairDotProdAccumulate
+  asm: "VPDPWSSD"
+  in:
+  - *pdpa_acc
+  - class: mask
+  - *pdpa_src1
+  - *pdpa_src2
+  out:
+  - *pdpa_acc
+- go: SaturatedPairDotProdAccumulate
+  asm: "VPDPWSSDS"
+  in:
+  - *pdpa_acc
+  - *pdpa_src1
+  - *pdpa_src2
+  out:
+  - *pdpa_acc
+- go: MaskedSaturatedPairDotProdAccumulate
+  asm: "VPDPWSSDS"
+  in:
+  - *pdpa_acc
+  - class: mask
+  - *pdpa_src1
+  - *pdpa_src2
+  out:
+  - *pdpa_acc
 - go: Max
   asm: "V?PMAXS[BWDQ]"
   in: &2int
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 18cfd967..6375534c 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -32,3 +32,48 @@
   extension: "AVX.*"
   documentation: !string |-
     // DotProdBroadcast multiplies all elements and broadcasts the sum.
+    // Const Immediate = 127.
+- go: UnsignedSignedQuadDotProdAccumulate
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+- go: MaskedUnsignedSignedQuadDotProdAccumulate
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedUnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+- go: SaturatedUnsignedSignedQuadDotProdAccumulate
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+- go: PairDotProdAccumulate
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+- go: MaskedPairDotProdAccumulate
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+- go: SaturatedPairDotProdAccumulate
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+- go: MaskedSaturatedPairDotProdAccumulate
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
index 9e06d3c9..be8a054c 100644
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -33,18 +33,109 @@
   asm: VPMADDUBSW
   in:
   - class: mask
-  - *uint
-  - *int3
+  - go: $t1
+    base: uint
+    overwriteElemBits: 8
+  - go: $t2
+    base: int
+    overwriteElemBits: 8
   out:
-  - *int2
+  - *int3
 - go: DotProdBroadcast
   asm: VDPPD
   in:
-  - &float
+  - &dpb_src
     go: $t
     base: float
-  - *float
+    elemBits: 64
+    bits: $bits
+  - *dpb_src
   - class: immediate
-    const: 127 # make sure the control bits [4:5] are all 1
+    const: 127
+  out:
+  - *dpb_src
+- go: UnsignedSignedQuadDotProdAccumulate
+  asm: "VPDPBUSD"
+  in:
+  - &qdpa_acc
+    go: $t_acc
+    elemBits: 32
+  - &qdpa_src1
+    go: $t_src1
+    base: uint
+    overwriteElemBits: 8
+  - &qdpa_src2
+    go: $t_src2
+    base: int
+    overwriteElemBits: 8
+  out:
+  - *qdpa_acc
+- go: MaskedUnsignedSignedQuadDotProdAccumulate
+  asm: "VPDPBUSD"
+  in:
+  - *qdpa_acc
+  - class: mask
+  - *qdpa_src1
+  - *qdpa_src2
+  out:
+  - *qdpa_acc
+- go: SaturatedUnsignedSignedQuadDotProdAccumulate
+  asm: "VPDPBUSDS"
+  in:
+  - *qdpa_acc
+  - *qdpa_src1
+  - *qdpa_src2
+  out:
+  - *qdpa_acc
+- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
+  asm: "VPDPBUSDS"
+  in:
+  - *qdpa_acc
+  - class: mask
+  - *qdpa_src1
+  - *qdpa_src2
+  out:
+  - *qdpa_acc
+- go: PairDotProdAccumulate
+  asm: "VPDPWSSD"
+  in:
+  - &pdpa_acc
+    go: $t_acc
+    base: int
+    elemBits: 32
+  - &pdpa_src1
+    go: $t_src1
+    base: int
+    overwriteElemBits: 16
+  - &pdpa_src2
+    go: $t_src2
+    base: int
+    overwriteElemBits: 16
+  out:
+  - *pdpa_acc
+- go: MaskedPairDotProdAccumulate
+  asm: "VPDPWSSD"
+  in:
+  - *pdpa_acc
+  - class: mask
+  - *pdpa_src1
+  - *pdpa_src2
+  out:
+  - *pdpa_acc
+- go: SaturatedPairDotProdAccumulate
+  asm: "VPDPWSSDS"
+  in:
+  - *pdpa_acc
+  - *pdpa_src1
+  - *pdpa_src2
+  out:
+  - *pdpa_acc
+- go: MaskedSaturatedPairDotProdAccumulate
+  asm: "VPDPWSSDS"
+  in:
+  - *pdpa_acc
+  - class: mask
+  - *pdpa_src1
+  - *pdpa_src2
   out:
-  - *float
\ No newline at end of file
+  - *pdpa_acc

From 030c5e4e253991e0c3b78dda6c9099ea7c3d9774 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 10 Jun 2025 17:51:46 +0000
Subject: [PATCH 082/200] internal/simdgen: add fused mul add sub

This CL is generated by Gemini and reviewed by myself.

This CL generates CL 681299.

Change-Id: I5bc57185ce104d0d80dfd82c8eff15312c397aeb
Reviewed-on: https://go-review.googlesource.com/c/arch/+/680595
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml           | 108 ++++++++++++++
 internal/simdgen/go.yaml                   | 164 +++++++++++++++++++++
 internal/simdgen/ops/MLOps/categories.yaml | 108 ++++++++++++++
 internal/simdgen/ops/MLOps/go.yaml         | 164 +++++++++++++++++++++
 4 files changed, 544 insertions(+)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index dbf5b41d..8b325fa4 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -573,6 +573,114 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+- go: MaskedFusedMultiplyAdd132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+- go: MaskedFusedMultiplyAdd213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+- go: MaskedFusedMultiplyAdd231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+- go: MaskedFusedMultiplySub132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySub132 performs `(v1 * v3) - v2`.
+- go: MaskedFusedMultiplySub213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySub213 performs `(v2 * v1) - v3`.
+- go: MaskedFusedMultiplySub231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySub231 performs `(v2 * v3) - v1`.
+- go: MaskedFusedNegativeMultiplyAdd132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+- go: MaskedFusedNegativeMultiplyAdd213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+- go: MaskedFusedNegativeMultiplyAdd231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+- go: MaskedFusedNegativeMultiplySub132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+- go: MaskedFusedNegativeMultiplySub213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+- go: MaskedFusedNegativeMultiplySub231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+- go: MaskedFusedMultiplyAddSub132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+- go: MaskedFusedMultiplyAddSub213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+- go: MaskedFusedMultiplyAddSub231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+- go: MaskedFusedMultiplySubAdd132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+- go: MaskedFusedMultiplySubAdd213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+- go: MaskedFusedMultiplySubAdd231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
 - go: Max
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index b5aca038..6fb817be 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -631,6 +631,170 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
+- go: MaskedFusedMultiplyAdd132
+  asm: "VFMADD132PS|VFMADD132PD"
+  in:
+  - &fma_op
+    go: $t
+    base: float
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplyAdd213
+  asm: "VFMADD213PS|VFMADD213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplyAdd231
+  asm: "VFMADD231PS|VFMADD231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySub132
+  asm: "VFMSUB132PS|VFMSUB132PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySub213
+  asm: "VFMSUB213PS|VFMSUB213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySub231
+  asm: "VFMSUB231PS|VFMSUB231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplyAdd132
+  asm: "VFNMADD132PS|VFNMADD132PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplyAdd213
+  asm: "VFNMADD213PS|VFNMADD213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplyAdd231
+  asm: "VFNMADD231PS|VFNMADD231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplySub132
+  asm: "VFNMSUB132PS|VFNMSUB132PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplySub213
+  asm: "VFNMSUB213PS|VFNMSUB213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplySub231
+  asm: "VFNMSUB231PS|VFNMSUB231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplyAddSub132
+  asm: "VFMADDSUB132PS|VFMADDSUB132PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplyAddSub213
+  asm: "VFMADDSUB213PS|VFMADDSUB213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplyAddSub231
+  asm: "VFMADDSUB231PS|VFMADDSUB231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySubAdd132
+  asm: "VFMSUBADD132PS|VFMSUBADD132PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySubAdd213
+  asm: "VFMSUBADD213PS|VFMSUBADD213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySubAdd231
+  asm: "VFMSUBADD231PS|VFMSUBADD231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
 - go: Max
   asm: "V?PMAXS[BWDQ]"
   in: &2int
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 6375534c..412af692 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -77,3 +77,111 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+- go: MaskedFusedMultiplyAdd132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAdd132 performs `(v1 * v3) + v2`.
+- go: MaskedFusedMultiplyAdd213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAdd213 performs `(v2 * v1) + v3`.
+- go: MaskedFusedMultiplyAdd231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAdd231 performs `(v2 * v3) + v1`.
+- go: MaskedFusedMultiplySub132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySub132 performs `(v1 * v3) - v2`.
+- go: MaskedFusedMultiplySub213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySub213 performs `(v2 * v1) - v3`.
+- go: MaskedFusedMultiplySub231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySub231 performs `(v2 * v3) - v1`.
+- go: MaskedFusedNegativeMultiplyAdd132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
+- go: MaskedFusedNegativeMultiplyAdd213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
+- go: MaskedFusedNegativeMultiplyAdd231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
+- go: MaskedFusedNegativeMultiplySub132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
+- go: MaskedFusedNegativeMultiplySub213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
+- go: MaskedFusedNegativeMultiplySub231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
+- go: MaskedFusedMultiplyAddSub132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
+- go: MaskedFusedMultiplyAddSub213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
+- go: MaskedFusedMultiplyAddSub231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
+- go: MaskedFusedMultiplySubAdd132
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
+- go: MaskedFusedMultiplySubAdd213
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
+- go: MaskedFusedMultiplySubAdd231
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedFusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
index be8a054c..e0cefda2 100644
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -139,3 +139,167 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
+- go: MaskedFusedMultiplyAdd132
+  asm: "VFMADD132PS|VFMADD132PD"
+  in:
+  - &fma_op
+    go: $t
+    base: float
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplyAdd213
+  asm: "VFMADD213PS|VFMADD213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplyAdd231
+  asm: "VFMADD231PS|VFMADD231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySub132
+  asm: "VFMSUB132PS|VFMSUB132PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySub213
+  asm: "VFMSUB213PS|VFMSUB213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySub231
+  asm: "VFMSUB231PS|VFMSUB231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplyAdd132
+  asm: "VFNMADD132PS|VFNMADD132PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplyAdd213
+  asm: "VFNMADD213PS|VFNMADD213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplyAdd231
+  asm: "VFNMADD231PS|VFNMADD231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplySub132
+  asm: "VFNMSUB132PS|VFNMSUB132PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplySub213
+  asm: "VFNMSUB213PS|VFNMSUB213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedNegativeMultiplySub231
+  asm: "VFNMSUB231PS|VFNMSUB231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplyAddSub132
+  asm: "VFMADDSUB132PS|VFMADDSUB132PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplyAddSub213
+  asm: "VFMADDSUB213PS|VFMADDSUB213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplyAddSub231
+  asm: "VFMADDSUB231PS|VFMADDSUB231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySubAdd132
+  asm: "VFMSUBADD132PS|VFMSUBADD132PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySubAdd213
+  asm: "VFMSUBADD213PS|VFMSUBADD213PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
+- go: MaskedFusedMultiplySubAdd231
+  asm: "VFMSUBADD231PS|VFMSUBADD231PD"
+  in:
+  - *fma_op
+  - class: mask
+  - *fma_op
+  - *fma_op
+  out:
+  - *fma_op
\ No newline at end of file

From 934a84cceceb03defe90c20c29568395e786d853 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 13 Jun 2025 16:15:40 -0400
Subject: [PATCH 083/200] arch/internal:  move simd helpers into compiler, out
 of generated code

PAIRED with CL 681500 from cmd/compile/internal/ssagen

Change-Id: I42775ce43c3810fac83cb6c1674e1cae1b83d4db
Reviewed-on: https://go-review.googlesource.com/c/arch/+/681615
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 internal/simdgen/gen_simdIntrinsics.go | 101 -------------------------
 1 file changed, 101 deletions(-)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 5b6b74cf..02eb5a63 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -63,107 +63,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
 {{- end}}
 }
-
-func opLen1(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		return s.newValue1(op, t, args[0])
-	}
-}
-
-func opLen2(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		return s.newValue2(op, t, args[0], args[1])
-	}
-}
-
-func opLen3(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		return s.newValue3(op, t, args[0], args[1], args[2])
-	}
-}
-
-func opLen4(op ssa.Op, t *types.Type) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		return s.newValue4(op, t, args[0], args[1], args[2], args[3])
-	}
-}
-
-func plainPanicSimdImm(s *state) {
-	cmp := s.newValue0(ssa.OpConstBool, types.Types[types.TBOOL])
-	cmp.AuxInt = 1
-	// TODO: make this a standalone panic instead of reusing the overflow panic.
-	// Or maybe after we implement the switch table this will be obsolete anyway.
-	s.check(cmp, ir.Syms.Panicoverflow)
-}
-
-func opLen1Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		if args[1].Op == ssa.OpConst8 {
-			return s.newValue1I(op, t, args[1].AuxInt<<int64(offset), args[0])
-		}
-		plainPanicSimdImm(s)
-		// Even though this default call is unreachable semantically,
-		// it has to return something, otherwise the compiler will try to generate
-		// default codes which might lead to a FwdRef being put at the entry block
-		// triggering a compiler panic.
-		return s.newValue1I(op, t, 0, args[0])
-	}
-}
-
-func opLen2Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		if args[1].Op == ssa.OpConst8 {
-			return s.newValue2I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2])
-		}
-		plainPanicSimdImm(s)
-		// Even though this default call is unreachable semantically,
-		// it has to return something, otherwise the compiler will try to generate
-		// default codes which might lead to a FwdRef being put at the entry block
-		// triggering a compiler panic.
-		return s.newValue2I(op, t, 0, args[0], args[2])
-	}
-}
-
-func opLen3Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		if args[1].Op == ssa.OpConst8 {
-			return s.newValue3I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2], args[3])
-		}
-		plainPanicSimdImm(s)
-		// Even though this default call is unreachable semantically,
-		// it has to return something, otherwise the compiler will try to generate
-		// default codes which might lead to a FwdRef being put at the entry block
-		// triggering a compiler panic.
-		return s.newValue3I(op, t, 0, args[0], args[2], args[3])
-	}
-}
-
-func opLen4Imm8(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		if args[1].Op == ssa.OpConst8 {
-			return s.newValue4I(op, t, args[1].AuxInt<<int64(offset), args[0], args[2], args[3], args[4])
-		}
-		plainPanicSimdImm(s)
-		// Even though this default call is unreachable semantically,
-		// it has to return something, otherwise the compiler will try to generate
-		// default codes which might lead to a FwdRef being put at the entry block
-		// triggering a compiler panic.
-		return s.newValue4I(op, t, 0, args[0], args[2], args[3], args[4])
-	}
-}
-
-func simdLoad() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		return s.newValue2(ssa.OpLoad, n.Type(), args[0], s.mem())
-	}
-}
-
-func simdStore() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-	return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-		s.store(args[0].Type, args[1], args[0])
-		return nil
-	}
-}
 `
 
 // writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go

From d9681fce53c51fcc832ceb0f269e8a2595020543 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 16 Jun 2025 17:47:09 +0000
Subject: [PATCH 084/200] internal/simdgen: fix float Sub instruction error

FP sub is mapped to Add, this CL fixes that.

Change-Id: I5645f9427cb89b989baa1b3d4f6e7503eb5f24dc
Reviewed-on: https://go-review.googlesource.com/c/arch/+/681976
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/go.yaml            | 4 ++--
 internal/simdgen/ops/AddSub/go.yaml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 6fb817be..f7e771c6 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -55,14 +55,14 @@
 
 # Sub
 - go: Sub
-  asm: "VPSUB[BWDQ]|VADDP[SD]"
+  asm: "VPSUB[BWDQ]|VSUBP[SD]"
   in: &2any
   - *any
   - *any
   out: &1any
   - *any
 - go: MaskedSub
-  asm: "VPSUB[BWDQ]|VADDP[SD]"
+  asm: "VPSUB[BWDQ]|VSUBP[SD]"
   in:
   - class: mask
   - *any
diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml
index 75222a1b..c2df1e2c 100644
--- a/internal/simdgen/ops/AddSub/go.yaml
+++ b/internal/simdgen/ops/AddSub/go.yaml
@@ -54,14 +54,14 @@
 
 # Sub
 - go: Sub
-  asm: "VPSUB[BWDQ]|VADDP[SD]"
+  asm: "VPSUB[BWDQ]|VSUBP[SD]"
   in: &2any
   - *any
   - *any
   out: &1any
   - *any
 - go: MaskedSub
-  asm: "VPSUB[BWDQ]|VADDP[SD]"
+  asm: "VPSUB[BWDQ]|VSUBP[SD]"
   in:
   - class: mask
   - *any

From ebbfc1c588ead2032daf83f9d5fe6fce2b3c7ab4 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 13 Jun 2025 18:13:15 -0400
Subject: [PATCH 085/200] arch/internal/simdgen: remove control flow from
 template

this is a prerequisite to sort in some other order

Change-Id: Ie8683dfdf028195044b706388a018da54c265d16
Reviewed-on: https://go-review.googlesource.com/c/arch/+/681995
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdTypes.go | 109 +++++++++++++++++-------------
 1 file changed, 61 insertions(+), 48 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 50480b30..13920333 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -84,84 +84,73 @@ func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
 {{end}}
 `
 
-const simdStubsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+const simdStubsTmpl = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 
 //go:build goexperiment.simd
 
-package simd
-
-{{- range .OpsLen1}}
+package simd{{end}}
+{{define "opsLen1"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}
+func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}{{end}}
 
-{{- end}}
-{{- range .OpsLen2}}
+{{define "opsLen2"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}{{end}}
 
-{{- end}}
-{{- range .OpsLen3}}
+{{define "opsLen3"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}}
 
-{{- end}}
-{{- range .OpsLen4}}
+{{define "opsLen4"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}}
 
-{{- end}}
-{{- range .OpsLen1Imm8}}
+{{define "opsLen1Imm8"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}}{{end}}
 
-{{- end}}
-{{- range .OpsLen2Imm8}}
+{{define "opsLen2Imm8"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}}
 
-{{- end}}
-{{- range .OpsLen3Imm8}}
+{{define "opsLen3Imm8"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}}
 
-{{- end}}
-{{- range .OpsLen3Imm8}}
+{{define "opsLen4Imm8"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}
-
-{{- end}}
-{{- range .VectorConversions }}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{(index .Out 0).Go}}{{end}}
 
+{{define "vectorConversion"}}
 // {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}}
 func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}})
+{{end}}
 
-{{- end}}
-{{- range .Masks }}
-
+{{define "mask"}}
 // converts from {{.Name}} to {{.VectorCounterpart}}
 func (from {{.Name}}) As{{.VectorCounterpart}}() (to {{.VectorCounterpart}})
 
@@ -171,8 +160,7 @@ func (from {{.VectorCounterpart}}) As{{.Name}}() (to {{.Name}})
 func (x {{.Name}}) And(y {{.Name}}) {{.Name}}
 
 func (x {{.Name}}) Or(y {{.Name}}) {{.Name}}
-
-{{- end}}
+{{end}}
 `
 
 // parseSIMDTypes groups go simd types by their vector sizes, and
@@ -289,27 +277,52 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro
 		return err
 	}
 	defer file.Close()
+
+	if err := t.ExecuteTemplate(file, "fileHeader", nil); err != nil {
+		return fmt.Errorf("failed to execute fileHeader template: %w", err)
+	}
+	if _, err := file.WriteString("\n"); err != nil {
+		return err
+	}
+
 	opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, err := opsByLen(ops)
 	if err != nil {
 		return err
 	}
 
-	type templateData struct {
-		OpsLen1           []Operation
-		OpsLen2           []Operation
-		OpsLen3           []Operation
-		OpsLen4           []Operation
-		OpsLen1Imm8       []Operation
-		OpsLen2Imm8       []Operation
-		OpsLen3Imm8       []Operation
-		OpsLen4Imm8       []Operation
-		VectorConversions []simdTypePair
-		Masks             []simdType
+	opLists := map[string][]Operation{
+		"opsLen1":     opsLen1,
+		"opsLen2":     opsLen2,
+		"opsLen3":     opsLen3,
+		"opsLen4":     opsLen4,
+		"opsLen1Imm8": opsLen1Imm8,
+		"opsLen2Imm8": opsLen2Imm8,
+		"opsLen3Imm8": opsLen3Imm8,
+		"opsLen4Imm8": opsLen4Imm8,
 	}
 
-	err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)})
-	if err != nil {
-		return fmt.Errorf("failed to execute template : %w", err)
+	templateNames := []string{"opsLen1", "opsLen2", "opsLen3", "opsLen4", "opsLen1Imm8", "opsLen2Imm8", "opsLen3Imm8", "opsLen4Imm8"}
+
+	for _, name := range templateNames {
+		for _, op := range opLists[name] {
+			if err := t.ExecuteTemplate(file, name, op); err != nil {
+				return fmt.Errorf("failed to execute template %s for op %s: %w", name, op.Go, err)
+			}
+		}
+	}
+
+	vectorConversions := vConvertFromTypeMap(typeMap)
+	for _, conv := range vectorConversions {
+		if err := t.ExecuteTemplate(file, "vectorConversion", conv); err != nil {
+			return fmt.Errorf("failed to execute vectorConversion template: %w", err)
+		}
+	}
+
+	masks := masksFromTypeMap(typeMap)
+	for _, mask := range masks {
+		if err := t.ExecuteTemplate(file, "mask", mask); err != nil {
+			return fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err)
+		}
 	}
 
 	return nil

From 7b7f349a95426492e8f0867d783ac515b7e5afd1 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 16 Jun 2025 14:05:30 -0400
Subject: [PATCH 086/200] internal/simdgen: generate stubs in alphabetical
 order

Includes tweaking the Operator order so that all the
operations come out ordered by

  method name
  element type
  element width
  element count

and inserts break between all the grouped methods.  E.g.

```
package simd

// Absolute

// Absolute computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX
func (x Int8x16) Absolute() Int8x16

// Absolute computes the absolute value of each element.
//
// Asm: VPABSB, CPU Feature: AVX2
func (x Int8x32) Absolute() Int8x32
...

// Absolute computes the absolute value of each element.
//
// Asm: VPABSQ, CPU Feature: AVX512EVEX
func (x Int64x8) Absolute() Int64x8

// Add

// Add adds corresponding elements of two vectors.
//
// Asm: VADDPS, CPU Feature: AVX
func (x Float32x4) Add(y Float32x4) Float32x4
...

```

Change-Id: I97d1c051d1cc9a1b610c907ce13c84907a48f7e2
Reviewed-on: https://go-review.googlesource.com/c/arch/+/681996
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdTypes.go | 51 ++++++++++++-------------------
 internal/simdgen/gen_utility.go   | 41 +++++++++++++++++++++++++
 internal/simdgen/godefs.go        | 35 ++++++++++++++++++---
 3 files changed, 90 insertions(+), 37 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 13920333..93bd49c3 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -89,56 +89,56 @@ const simdStubsTmpl = `{{define "fileHeader"}}// Code generated by x/arch/intern
 //go:build goexperiment.simd
 
 package simd{{end}}
-{{define "opsLen1"}}
+{{define "op1"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}{{end}}
 
-{{define "opsLen2"}}
+{{define "op2"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}{{end}}
 
-{{define "opsLen3"}}
+{{define "op3"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}}
 
-{{define "opsLen4"}}
+{{define "op4"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}}
 
-{{define "opsLen1Imm8"}}
+{{define "op1Imm8"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}}{{end}}
 
-{{define "opsLen2Imm8"}}
+{{define "op2Imm8"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}}
 
-{{define "opsLen3Imm8"}}
+{{define "op3Imm8"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}}
 
-{{define "opsLen4Imm8"}}
+{{define "op4Imm8"}}
 {{if .Documentation}}
 {{.Documentation}}
 //{{end}}
@@ -281,33 +281,20 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro
 	if err := t.ExecuteTemplate(file, "fileHeader", nil); err != nil {
 		return fmt.Errorf("failed to execute fileHeader template: %w", err)
 	}
-	if _, err := file.WriteString("\n"); err != nil {
-		return err
-	}
 
-	opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, err := opsByLen(ops)
-	if err != nil {
-		return err
-	}
+	slices.SortFunc(ops, compareOperations)
 
-	opLists := map[string][]Operation{
-		"opsLen1":     opsLen1,
-		"opsLen2":     opsLen2,
-		"opsLen3":     opsLen3,
-		"opsLen4":     opsLen4,
-		"opsLen1Imm8": opsLen1Imm8,
-		"opsLen2Imm8": opsLen2Imm8,
-		"opsLen3Imm8": opsLen3Imm8,
-		"opsLen4Imm8": opsLen4Imm8,
-	}
-
-	templateNames := []string{"opsLen1", "opsLen2", "opsLen3", "opsLen4", "opsLen1Imm8", "opsLen2Imm8", "opsLen3Imm8", "opsLen4Imm8"}
-
-	for _, name := range templateNames {
-		for _, op := range opLists[name] {
-			if err := t.ExecuteTemplate(file, name, op); err != nil {
-				return fmt.Errorf("failed to execute template %s for op %s: %w", name, op.Go, err)
+	for i, op := range ops {
+		if s, op, err := classifyOp(op); err == nil {
+			if i == 0 || op.Go != ops[i-1].Go {
+				fmt.Fprintf(file, "\n\n// %s", op.Go)
+			}
+			if err := t.ExecuteTemplate(file, s, op); err != nil {
+				return fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err)
 			}
+
+		} else {
+			return fmt.Errorf("failed to classify op %v: %w", op.Go, err)
 		}
 	}
 
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 074be682..cc97c3b3 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -308,6 +308,47 @@ func (op *Operation) sortOperand() {
 	})
 }
 
+func classifyOp(op Operation) (string, Operation, error) {
+	_, shapeOut, _, immType, _, opNoConstMask, gOp, err := op.shape()
+	if err != nil {
+		return "", op, err
+	}
+	// Put the go ssa type in GoArch field, simd intrinsics need it.
+	if shapeOut == OneVregOut || shapeOut == OneKmaskOut || shapeOut == OneVregOutAtIn {
+		opNoConstMask.GoArch = fmt.Sprintf("types.TypeVec%d", *opNoConstMask.Out[0].Bits)
+		gOp.GoArch = fmt.Sprintf("types.TypeVec%d", *gOp.Out[0].Bits)
+	}
+	if immType == VarImm || immType == ConstVarImm {
+		switch len(opNoConstMask.In) {
+		case 1:
+			return "", op, fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op)
+		case 2:
+			return "op1Imm8", opNoConstMask, nil
+		case 3:
+			return "op2Imm8", opNoConstMask, nil
+		case 4:
+			return "op3Imm8", opNoConstMask, nil
+		case 5:
+			return "op4Imm8", opNoConstMask, nil
+		default:
+			return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op)
+		}
+	} else {
+		switch len(gOp.In) {
+		case 1:
+			return "op1", gOp, nil
+		case 2:
+			return "op2", gOp, nil
+		case 3:
+			return "op3", gOp, nil
+		case 4:
+			return "op4", gOp, nil
+		default:
+			return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op)
+		}
+	}
+}
+
 // opsByLen returns the lists of ops stripping the const masks away, aggregated by input length.
 // Ops with only const imms also has their immediates removed.
 func opsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8 []Operation, e error) {
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index b6d872be..7701f2fb 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -52,10 +52,18 @@ func compareOperations(x, y Operation) int {
 	if c := strings.Compare(x.GoArch, y.GoArch); c != 0 {
 		return c
 	}
-	if len(x.In) < len(y.In) {
+	xIn, yIn := x.In, y.In
+
+	if len(xIn) > len(yIn) && xIn[len(xIn)-1].Class == "mask" {
+		xIn = xIn[:len(xIn)-1]
+	} else if len(xIn) < len(yIn) && yIn[len(yIn)-1].Class == "mask" {
+		yIn = yIn[:len(yIn)-1]
+	}
+
+	if len(xIn) < len(yIn) {
 		return -1
 	}
-	if len(x.In) > len(y.In) {
+	if len(xIn) > len(yIn) {
 		return 1
 	}
 	if len(x.Out) < len(y.Out) {
@@ -64,15 +72,32 @@ func compareOperations(x, y Operation) int {
 	if len(x.Out) > len(y.Out) {
 		return 1
 	}
-	for i := range x.In {
-		ox, oy := &x.In[i], y.In[i]
-		if c := compareStringPointers(ox.Go, oy.Go); c != 0 {
+	for i := range xIn {
+		ox, oy := &xIn[i], &yIn[i]
+		if c := compareOperands(ox, oy); c != 0 {
 			return c
 		}
 	}
 	return 0
 }
 
+func compareOperands(x, y *Operand) int {
+	if c := strings.Compare(x.Class, y.Class); c != 0 {
+		return c
+	}
+	if x.Class == "immediate" {
+		return compareStringPointers(x.ImmOffset, y.ImmOffset)
+	} else {
+		if c := strings.Compare(*x.Base, *y.Base); c != 0 {
+			return c
+		}
+		if c := *x.ElemBits - *y.ElemBits; c != 0 {
+			return c
+		}
+		return *x.Bits - *y.Bits
+	}
+}
+
 type Operand struct {
 	Class string // One of "mask", "immediate", "vreg" and "mem"
 

From b5719843eceadfabbd4a61dbdfd5ebaf0cdad4d2 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 16 Jun 2025 14:15:01 -0400
Subject: [PATCH 087/200] internal/simdgen: add 'go test simd' to etetest.sh

The end-to-end test should run this test.

Change-Id: I1ea64ef808cb18529b68d126640d4f1583a1eb79
Reviewed-on: https://go-review.googlesource.com/c/arch/+/681997
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/etetest.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/internal/simdgen/etetest.sh b/internal/simdgen/etetest.sh
index a88776bf..ea270429 100755
--- a/internal/simdgen/etetest.sh
+++ b/internal/simdgen/etetest.sh
@@ -19,6 +19,7 @@ go run . -xedPath xeddata  -o godefs -goroot ./go-test  go.yaml types.yaml categ
 (cd go-test/src ; GOEXPERIMENT=simd  ./make.bash )
 (cd go-test/bin; b=`pwd` ; cd ../src/simd/testdata; GOARCH=amd64 $b/go run .)
 (cd go-test/bin; b=`pwd` ; cd ../src ;
+GOEXPERIMENT=simd GOARCH=amd64 $b/go test -v simd
 GOEXPERIMENT=simd $b/go test go/doc
 GOEXPERIMENT=simd $b/go test go/build
 GOEXPERIMENT=simd $b/go test cmd/api -v -check

From 9edca9c6938c5693ab5769f37d5c6cd53e01408b Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 16 Jun 2025 19:40:02 +0000
Subject: [PATCH 088/200] internal/simdgen: make stubs file gofmt compliant

This CL makes the generated code pass git gofmt check.
This CL also make the top method Go name inside a /* */ comment, for
easier tracking of generated stubs.

Change-Id: I5eb6c98f9275b068205dcdece880cf585ef4ab3a
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682035
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdTypes.go | 54 ++++++++++++++++---------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 93bd49c3..ee118453 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -88,62 +88,64 @@ const simdStubsTmpl = `{{define "fileHeader"}}// Code generated by x/arch/intern
 
 //go:build goexperiment.simd
 
-package simd{{end}}
+package simd
+{{end}}
+
 {{define "op1"}}
-{{if .Documentation}}
-{{.Documentation}}
+{{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}{{end}}
+func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}
+{{end}}
 
 {{define "op2"}}
-{{if .Documentation}}
-{{.Documentation}}
+{{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}{{end}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}
+{{end}}
 
 {{define "op3"}}
-{{if .Documentation}}
-{{.Documentation}}
+{{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}
+{{end}}
 
 {{define "op4"}}
-{{if .Documentation}}
-{{.Documentation}}
+{{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}
+{{end}}
 
 {{define "op1Imm8"}}
-{{if .Documentation}}
-{{.Documentation}}
+{{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}}{{end}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}}
+{{end}}
 
 {{define "op2Imm8"}}
-{{if .Documentation}}
-{{.Documentation}}
+{{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}}{{end}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}}
+{{end}}
 
 {{define "op3Imm8"}}
-{{if .Documentation}}
-{{.Documentation}}
+{{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}}{{end}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}}
+{{end}}
 
 {{define "op4Imm8"}}
-{{if .Documentation}}
-{{.Documentation}}
+{{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{(index .Out 0).Go}}{{end}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{(index .Out 0).Go}}
+{{end}}
 
 {{define "vectorConversion"}}
 // {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}}
@@ -287,7 +289,7 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro
 	for i, op := range ops {
 		if s, op, err := classifyOp(op); err == nil {
 			if i == 0 || op.Go != ops[i-1].Go {
-				fmt.Fprintf(file, "\n\n// %s", op.Go)
+				fmt.Fprintf(file, "\n/* %s */\n", op.Go)
 			}
 			if err := t.ExecuteTemplate(file, s, op); err != nil {
 				return fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err)

From 2739fbbb93c46ea92032ddc524c1f0e7a1a01a68 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 16 Jun 2025 15:17:13 -0400
Subject: [PATCH 089/200] internal/simdgen: generate intrinsics in alphabetical
 order

To do this, rewrote template to remove iteration,
changed call to opsByLen to instead use classifyOp,
commented classifyOp, deleted dead code.

The alphabetized output appears in CL 682295

Change-Id: I30ef0fe1c6f3b0cdc2003b7da5ee794986272205
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682036
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdIntrinsics.go | 128 ++++++++++++++-----------
 internal/simdgen/gen_utility.go        |  75 +--------------
 2 files changed, 73 insertions(+), 130 deletions(-)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 02eb5a63..091320a5 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -6,9 +6,10 @@ package main
 
 import (
 	"fmt"
+	"slices"
 )
 
-const simdIntrinsicsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+const simdIntrinsicsTmpl = `{{define "header"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 package ssagen
 
 import (
@@ -21,48 +22,40 @@ import (
 const simdPackage = "` + simdPackage + `"
 
 func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
-{{- range .OpsLen1}}
-	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
-{{- end}}
-{{- range .OpsLen2}}
-	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
-{{- end}}
-{{- range .OpsLen3}}
-	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
-{{- end}}
-{{- range .OpsLen4}}
-	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
-{{- end}}
-{{- range .OpsLen1Imm8}}
-	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
-{{- end}}
-{{- range .OpsLen2Imm8}}
-	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
-{{- end}}
-{{- range .OpsLen3Imm8}}
-	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
-{{- end}}
-{{- range .OpsLen4Imm8}}
-	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
-{{- end}}
-
-{{- range .VectorConversions }}
-	addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
-{{- end}}
-
-{{- range $size, $ts := .TypeMap }}
-{{- range $t := $ts }}
-	addF(simdPackage, "Load{{$t.Name}}", simdLoad(), sys.AMD64)
-	addF(simdPackage, "{{$t.Name}}.Store", simdStore(), sys.AMD64)
-{{- end}}
-{{- end}}
-{{- range .Masks }}
-	addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
+{{end}}
+
+{{define "op1"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{end}}
+{{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{end}}
+{{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{end}}
+{{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{end}}
+{{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{end}}
+{{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{end}}
+{{define "op3Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{end}}
+{{define "op4Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{end}}
+
+{{define "vectorConversion"}}	addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
+{{end}}
+
+{{define "typeMap"}}	addF(simdPackage, "Load{{.Name}}", simdLoad(), sys.AMD64)
+	addF(simdPackage, "{{.Name}}.Store", simdStore(), sys.AMD64)
+{{end}}
+
+{{define "mask"}}	addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
 	addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
 	addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
 	addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
-{{- end}}
-}
+{{end}}
+
+{{define "footer"}}}
+{{end}}
 `
 
 // writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go
@@ -73,27 +66,46 @@ func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap)
 		return err
 	}
 	defer file.Close()
-	opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, err := opsByLen(ops)
-	if err != nil {
-		return err
+
+	if err := t.ExecuteTemplate(file, "header", nil); err != nil {
+		return fmt.Errorf("failed to execute header template: %w", err)
 	}
 
-	type templateData struct {
-		OpsLen1           []Operation
-		OpsLen2           []Operation
-		OpsLen3           []Operation
-		OpsLen4           []Operation
-		OpsLen1Imm8       []Operation
-		OpsLen2Imm8       []Operation
-		OpsLen3Imm8       []Operation
-		OpsLen4Imm8       []Operation
-		TypeMap           simdTypeMap
-		VectorConversions []simdTypePair
-		Masks             []simdType
+	slices.SortFunc(ops, compareOperations)
+
+	for _, op := range ops {
+		if s, op, err := classifyOp(op); err == nil {
+			if err := t.ExecuteTemplate(file, s, op); err != nil {
+				return fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err)
+			}
+
+		} else {
+			return fmt.Errorf("failed to classify op %v: %w", op.Go, err)
+		}
 	}
-	err = t.Execute(file, templateData{opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8, typeMap, vConvertFromTypeMap(typeMap), masksFromTypeMap(typeMap)})
-	if err != nil {
-		return fmt.Errorf("failed to execute template: %w", err)
+
+	for _, conv := range vConvertFromTypeMap(typeMap) {
+		if err := t.ExecuteTemplate(file, "vectorConversion", conv); err != nil {
+			return fmt.Errorf("failed to execute vectorConversion template: %w", err)
+		}
+	}
+
+	for _, ts := range typeMap {
+		for _, typ := range ts {
+			if err := t.ExecuteTemplate(file, "typeMap", typ); err != nil {
+				return fmt.Errorf("failed to execute typeMap template: %w", err)
+			}
+		}
+	}
+
+	for _, mask := range masksFromTypeMap(typeMap) {
+		if err := t.ExecuteTemplate(file, "mask", mask); err != nil {
+			return fmt.Errorf("failed to execute mask template: %w", err)
+		}
+	}
+
+	if err := t.ExecuteTemplate(file, "footer", nil); err != nil {
+		return fmt.Errorf("failed to execute footer template: %w", err)
 	}
 
 	return nil
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index cc97c3b3..6ec1b6f6 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -308,6 +308,9 @@ func (op *Operation) sortOperand() {
 	})
 }
 
+// classifyOp returns a classification string, modified operation, and perhaps error based
+// on the stub and intrinsic shape for the operation.
+// The classification string is in the regular expression set "op[1234](Imm8)?"
 func classifyOp(op Operation) (string, Operation, error) {
 	_, shapeOut, _, immType, _, opNoConstMask, gOp, err := op.shape()
 	if err != nil {
@@ -349,78 +352,6 @@ func classifyOp(op Operation) (string, Operation, error) {
 	}
 }
 
-// opsByLen returns the lists of ops stripping the const masks away, aggregated by input length.
-// Ops with only const imms also has their immediates removed.
-func opsByLen(ops []Operation) (opsLen1, opsLen2, opsLen3, opsLen4, opsLen1Imm8, opsLen2Imm8, opsLen3Imm8, opsLen4Imm8 []Operation, e error) {
-	opsLen1 = make([]Operation, 0)
-	opsLen2 = make([]Operation, 0)
-	opsLen3 = make([]Operation, 0)
-	opsLen4 = make([]Operation, 0)
-	opsLen1Imm8 = make([]Operation, 0)
-	opsLen2Imm8 = make([]Operation, 0)
-	opsLen3Imm8 = make([]Operation, 0)
-	opsLen4Imm8 = make([]Operation, 0)
-	for _, op := range ops {
-		_, shapeOut, _, immType, _, opNoConstMask, gOp, err := op.shape()
-		if err != nil {
-			e = err
-			return
-		}
-		// Put the go ssa type in GoArch field, simd intrinsics need it.
-		if shapeOut == OneVregOut || shapeOut == OneKmaskOut || shapeOut == OneVregOutAtIn {
-			opNoConstMask.GoArch = fmt.Sprintf("types.TypeVec%d", *opNoConstMask.Out[0].Bits)
-			gOp.GoArch = fmt.Sprintf("types.TypeVec%d", *gOp.Out[0].Bits)
-		}
-		if immType == VarImm || immType == ConstVarImm {
-			switch len(opNoConstMask.In) {
-			case 1:
-				e = fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op)
-				return
-			case 2:
-				opsLen1Imm8 = append(opsLen1Imm8, opNoConstMask)
-			case 3:
-				opsLen2Imm8 = append(opsLen2Imm8, opNoConstMask)
-			case 4:
-				opsLen3Imm8 = append(opsLen3Imm8, opNoConstMask)
-			case 5:
-				opsLen4Imm8 = append(opsLen4Imm8, opNoConstMask)
-			default:
-				e = fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op)
-			}
-		} else {
-			switch len(gOp.In) {
-			case 1:
-				opsLen1 = append(opsLen1, gOp)
-			case 2:
-				opsLen2 = append(opsLen2, gOp)
-			case 3:
-				opsLen3 = append(opsLen3, gOp)
-			case 4:
-				opsLen4 = append(opsLen4, gOp)
-			default:
-				e = fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op)
-			}
-		}
-	}
-	sortKey := func(op *Operation) string {
-		return *op.In[0].Go + op.Go
-	}
-	sortBySortKey := func(ops []Operation) {
-		sort.Slice(ops, func(i, j int) bool {
-			return sortKey(&ops[i]) < sortKey(&ops[j])
-		})
-	}
-	sortBySortKey(opsLen1)
-	sortBySortKey(opsLen2)
-	sortBySortKey(opsLen3)
-	sortBySortKey(opsLen4)
-	sortBySortKey(opsLen1Imm8)
-	sortBySortKey(opsLen2Imm8)
-	sortBySortKey(opsLen3Imm8)
-	sortBySortKey(opsLen4Imm8)
-	return
-}
-
 // dedup is deduping operations in the full structure level.
 func dedup(ops []Operation) (deduped []Operation) {
 	for _, op := range ops {

From 2f50423530578e96e7e5a08fdb622539b8906dbd Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 17 Jun 2025 11:40:52 -0400
Subject: [PATCH 090/200] internal/simdgen: remove map iteration; tweak type
 comparison

This makes the type conversion part of the simd intrinsics
have a repeatable order, and aligns the type comparison order
with the one chosen for methods.

Type order is element base type, element type width, vector width

Output CL (dev.simd) is CL 682355

Change-Id: If483f86bec1c1e24689913d89b58acc07c18477f
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682316
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdIntrinsics.go |  8 +++-----
 internal/simdgen/gen_simdTypes.go      | 28 +++++++++++++++++++++++---
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 091320a5..b34ffeb8 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -90,11 +90,9 @@ func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap)
 		}
 	}
 
-	for _, ts := range typeMap {
-		for _, typ := range ts {
-			if err := t.ExecuteTemplate(file, "typeMap", typ); err != nil {
-				return fmt.Errorf("failed to execute typeMap template: %w", err)
-			}
+	for _, typ := range typesFromTypeMap(typeMap) {
+		if err := t.ExecuteTemplate(file, "typeMap", typ); err != nil {
+			return fmt.Errorf("failed to execute typeMap template: %w", err)
 		}
 	}
 
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index ee118453..3120eb85 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -23,11 +23,22 @@ type simdType struct {
 }
 
 func compareSimdTypes(x, y simdType) int {
-	c := strings.Compare(x.Name, y.Name)
-	if c != 0 {
+	// "mask" then "vreg"
+	if c := strings.Compare(x.Type, y.Type); c != 0 {
+		return c
+	}
+	// want "flo" < "int" < "uin" (and then 8 < 16 < 32 < 64),
+	// not "int16" < "int32" < "int64" < "int8")
+	// so limit comparison to first 3 bytes in string.
+	if c := strings.Compare(x.Base[:3], y.Base[:3]); c != 0 {
 		return c
 	}
-	return strings.Compare(x.Type, y.Type)
+	// base type size, 8 < 16 < 32 < 64
+	if c := x.Size/x.Lanes - y.Size/y.Lanes; c != 0 {
+		return c
+	}
+	// vector size last
+	return x.Size - y.Size
 }
 
 type simdTypeMap map[int][]simdType
@@ -238,6 +249,17 @@ func masksFromTypeMap(typeMap simdTypeMap) []simdType {
 	return m
 }
 
+func typesFromTypeMap(typeMap simdTypeMap) []simdType {
+	m := []simdType{}
+	for _, ts := range typeMap {
+		for _, tsrc := range ts {
+			m = append(m, tsrc)
+		}
+	}
+	slices.SortFunc(m, compareSimdTypes)
+	return m
+}
+
 // writeSIMDTypes generates the simd vector type and writes it to types_amd64.go
 // within the specified directory.
 func writeSIMDTypes(directory string, typeMap simdTypeMap) error {

From 6a5ee49ac3818cb695f36315ce3be4d74f49b48e Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 16 Jun 2025 22:53:58 +0000
Subject: [PATCH 091/200] internal/simdgen: fix typo in PairDotProdAccumulate.

Change-Id: Ieb593812e2c53c3c22e76cc972f81a9a199abc90
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682135
Auto-Submit: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_utility.go    |  6 ++++++
 internal/simdgen/go.yaml           | 12 ++++++------
 internal/simdgen/ops/MLOps/go.yaml | 12 ++++++------
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 6ec1b6f6..848570ca 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -650,6 +650,12 @@ func (op Operand) String() string {
 		sb.WriteString("    OverwriteBase: <nil>\n")
 	}
 
+	if op.OverwriteElementBits != nil {
+		sb.WriteString(fmt.Sprintf("    OverwriteElementBits: %d\n", *op.OverwriteElementBits))
+	} else {
+		sb.WriteString("    OverwriteElementBits: <nil>\n")
+	}
+
 	sb.WriteString("  }\n")
 	return sb.String()
 }
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index f7e771c6..401a90fb 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -527,10 +527,10 @@
   - class: mask
   - go: $t1
     base: uint
-    overwriteElemBits: 8
+    overwriteElementBits: 8
   - go: $t2
     base: int
-    overwriteElemBits: 8
+    overwriteElementBits: 8
   out:
   - *int3
 - go: DotProdBroadcast
@@ -555,11 +555,11 @@
   - &qdpa_src1
     go: $t_src1
     base: uint
-    overwriteElemBits: 8
+    overwriteElementBits: 8
   - &qdpa_src2
     go: $t_src2
     base: int
-    overwriteElemBits: 8
+    overwriteElementBits: 8
   out:
   - *qdpa_acc
 - go: MaskedUnsignedSignedQuadDotProdAccumulate
@@ -598,11 +598,11 @@
   - &pdpa_src1
     go: $t_src1
     base: int
-    overwriteElemBits: 16
+    overwriteElementBits: 16
   - &pdpa_src2
     go: $t_src2
     base: int
-    overwriteElemBits: 16
+    overwriteElementBits: 16
   out:
   - *pdpa_acc
 - go: MaskedPairDotProdAccumulate
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
index e0cefda2..b9add167 100644
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -35,10 +35,10 @@
   - class: mask
   - go: $t1
     base: uint
-    overwriteElemBits: 8
+    overwriteElementBits: 8
   - go: $t2
     base: int
-    overwriteElemBits: 8
+    overwriteElementBits: 8
   out:
   - *int3
 - go: DotProdBroadcast
@@ -63,11 +63,11 @@
   - &qdpa_src1
     go: $t_src1
     base: uint
-    overwriteElemBits: 8
+    overwriteElementBits: 8
   - &qdpa_src2
     go: $t_src2
     base: int
-    overwriteElemBits: 8
+    overwriteElementBits: 8
   out:
   - *qdpa_acc
 - go: MaskedUnsignedSignedQuadDotProdAccumulate
@@ -106,11 +106,11 @@
   - &pdpa_src1
     go: $t_src1
     base: int
-    overwriteElemBits: 16
+    overwriteElementBits: 16
   - &pdpa_src2
     go: $t_src2
     base: int
-    overwriteElemBits: 16
+    overwriteElementBits: 16
   out:
   - *pdpa_acc
 - go: MaskedPairDotProdAccumulate

From 2c11faf3e697e1f2f777e394813d1b4159c0a0d8 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 16 Jun 2025 16:03:03 -0400
Subject: [PATCH 092/200] internal/simdgen: separate template creation from
 file opening

Change-Id: I158b1d6f3c6a010cbe2778447cf3d818c5014d51
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682115
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdGenericOps.go |  3 ++-
 internal/simdgen/gen_simdIntrinsics.go |  3 ++-
 internal/simdgen/gen_simdMachineOps.go |  3 ++-
 internal/simdgen/gen_simdTypes.go      |  6 ++++--
 internal/simdgen/gen_simdssa.go        |  2 +-
 internal/simdgen/gen_utility.go        | 21 ++++++++++++---------
 internal/simdgen/godefs.go             |  1 +
 7 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index bdda8b80..114888e7 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -27,7 +27,8 @@ func simdGenericOps() []opData {
 // writeSIMDGenericOps generates the generic ops and writes it to simdAMD64ops.go
 // within the specified directory.
 func writeSIMDGenericOps(directory string, ops []Operation) error {
-	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go", simdGenericOpsTmpl)
+	t := templateOf(simdGenericOpsTmpl, "simdgenericOps")
+	file, err := createPath(directory, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go")
 	if err != nil {
 		return err
 	}
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index b34ffeb8..92f5b6df 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -61,7 +61,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 // writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go
 // within the specified directory.
 func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap) error {
-	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssagen/simdintrinsics.go", simdIntrinsicsTmpl)
+	t := templateOf(simdIntrinsicsTmpl, "simdintrinsics")
+	file, err := createPath(directory, "src/cmd/compile/internal/ssagen/simdintrinsics.go")
 	if err != nil {
 		return err
 	}
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index f09b5568..c7c47d94 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -27,7 +27,8 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
 // writeSIMDMachineOps generates the machine ops and writes it to simdAMD64ops.go
 // within the specified directory.
 func writeSIMDMachineOps(directory string, ops []Operation) error {
-	file, t, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go", simdMachineOpsTmpl)
+	t := templateOf(simdMachineOpsTmpl, "simdAMD64Ops")
+	file, err := createPath(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go")
 	if err != nil {
 		return err
 	}
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 3120eb85..00de88d0 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -263,7 +263,8 @@ func typesFromTypeMap(typeMap simdTypeMap) []simdType {
 // writeSIMDTypes generates the simd vector type and writes it to types_amd64.go
 // within the specified directory.
 func writeSIMDTypes(directory string, typeMap simdTypeMap) error {
-	file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/types_amd64.go", simdTypesTemplates)
+	t := templateOf(simdTypesTemplates, "types_amd64")
+	file, err := createPath(directory, "src/"+simdPackage+"/types_amd64.go")
 	if err != nil {
 		return err
 	}
@@ -296,7 +297,8 @@ func writeSIMDTypes(directory string, typeMap simdTypeMap) error {
 // writeSIMDStubs generates the simd vector intrinsic stubs and writes it to stubs_amd64.go
 // within the specified directory.
 func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) error {
-	file, t, err := openFileAndPrepareTemplate(directory, "src/"+simdPackage+"/stubs_amd64.go", simdStubsTmpl)
+	t := templateOf(simdStubsTmpl, "simdStubs")
+	file, err := createPath(directory, "src/"+simdPackage+"/stubs_amd64.go")
 	if err != nil {
 		return err
 	}
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index 14f97e60..53ded489 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -121,7 +121,7 @@ func writeSIMDSSA(directory string, ops []Operation) error {
 		return fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen)
 	}
 
-	file, _, err := openFileAndPrepareTemplate(directory, "src/cmd/compile/internal/amd64/simdssa.go", "")
+	file, err := createPath(directory, "src/cmd/compile/internal/amd64/simdssa.go")
 	if err != nil {
 		return err
 	}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 848570ca..daa0db75 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -17,23 +17,26 @@ import (
 	"unicode"
 )
 
-func openFileAndPrepareTemplate(goroot string, file string, temp string) (*os.File, *template.Template, error) {
+func templateOf(temp, name string) *template.Template {
+	t, err := template.New(name).Parse(temp)
+	if err != nil {
+		panic(fmt.Errorf("failed to parse template %s: %w", name, err))
+	}
+	return t
+}
+
+func createPath(goroot string, file string) (*os.File, error) {
 	fp := filepath.Join(goroot, file)
 	dir := filepath.Dir(fp)
 	err := os.MkdirAll(dir, 0755)
 	if err != nil {
-		return nil, nil, fmt.Errorf("failed to create directory %s: %w", dir, err)
+		return nil, fmt.Errorf("failed to create directory %s: %w", dir, err)
 	}
 	f, err := os.Create(fp)
 	if err != nil {
-		return nil, nil, fmt.Errorf("failed to create file %s: %w", fp, err)
-	}
-	t, err := template.New(fp).Parse(temp)
-	if err != nil {
-		f.Close()
-		return nil, nil, fmt.Errorf("failed to parse template: %w", err)
+		return nil, fmt.Errorf("failed to create file %s: %w", fp, err)
 	}
-	return f, t, nil
+	return f, nil
 }
 
 const (
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 7701f2fb..009520df 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -174,6 +174,7 @@ func writeGoDefs(path string, cl unify.Closure) error {
 	}
 	log.Printf("dedup len: %d\n", len(deduped))
 	typeMap := parseSIMDTypes(deduped)
+
 	if err = writeSIMDTypes(path, typeMap); err != nil {
 		return err
 	}

From 9884aef72aede3ef3690240d2427e94ac7dfad8c Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 16 Jun 2025 18:07:19 -0400
Subject: [PATCH 093/200] internal/simdgen: use go/format on generated Go

also refactor generators to return a *bytes.Buffer,
and turn error returns into panics because it is
easier to read.

There are no changes to the output; the parent
generates correctly formatted Go.

Change-Id: I9ac3ed25c1e868f900fb6a6a9b80e8e33b5fe5cd
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682116
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdGenericOps.go | 18 ++++-----
 internal/simdgen/gen_simdIntrinsics.go | 37 ++++++++---------
 internal/simdgen/gen_simdMachineOps.go | 24 +++++------
 internal/simdgen/gen_simdTypes.go      | 56 +++++++++++---------------
 internal/simdgen/gen_simdrules.go      | 34 ++++++----------
 internal/simdgen/gen_simdssa.go        | 37 ++++++++---------
 internal/simdgen/gen_utility.go        | 20 +++++++++
 internal/simdgen/godefs.go             | 29 ++++---------
 8 files changed, 117 insertions(+), 138 deletions(-)

diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index 114888e7..6f8b16b7 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -5,6 +5,7 @@
 package main
 
 import (
+	"bytes"
 	"fmt"
 	"sort"
 )
@@ -26,13 +27,10 @@ func simdGenericOps() []opData {
 
 // writeSIMDGenericOps generates the generic ops and writes it to simdAMD64ops.go
 // within the specified directory.
-func writeSIMDGenericOps(directory string, ops []Operation) error {
+func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 	t := templateOf(simdGenericOpsTmpl, "simdgenericOps")
-	file, err := createPath(directory, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go")
-	if err != nil {
-		return err
-	}
-	defer file.Close()
+	buffer := new(bytes.Buffer)
+
 	type genericOpsData struct {
 		sortKey string
 		OpName  string
@@ -47,7 +45,7 @@ func writeSIMDGenericOps(directory string, ops []Operation) error {
 	for _, op := range ops {
 		_, _, _, immType, _, _, gOp, err := op.shape()
 		if err != nil {
-			return err
+			panic(err)
 		}
 		genericNames := gOp.Go + *gOp.In[0].Go
 		gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative}
@@ -64,10 +62,10 @@ func writeSIMDGenericOps(directory string, ops []Operation) error {
 		return opsData.OpsImm[i].sortKey < opsData.OpsImm[j].sortKey
 	})
 
-	err = t.Execute(file, opsData)
+	err := t.Execute(buffer, opsData)
 	if err != nil {
-		return fmt.Errorf("failed to execute template: %w", err)
+		panic(fmt.Errorf("failed to execute template: %w", err))
 	}
 
-	return nil
+	return buffer
 }
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 92f5b6df..3910e028 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -5,6 +5,7 @@
 package main
 
 import (
+	"bytes"
 	"fmt"
 	"slices"
 )
@@ -60,52 +61,48 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 
 // writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go
 // within the specified directory.
-func writeSIMDIntrinsics(directory string, ops []Operation, typeMap simdTypeMap) error {
+func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 	t := templateOf(simdIntrinsicsTmpl, "simdintrinsics")
-	file, err := createPath(directory, "src/cmd/compile/internal/ssagen/simdintrinsics.go")
-	if err != nil {
-		return err
-	}
-	defer file.Close()
+	buffer := new(bytes.Buffer)
 
-	if err := t.ExecuteTemplate(file, "header", nil); err != nil {
-		return fmt.Errorf("failed to execute header template: %w", err)
+	if err := t.ExecuteTemplate(buffer, "header", nil); err != nil {
+		panic(fmt.Errorf("failed to execute header template: %w", err))
 	}
 
 	slices.SortFunc(ops, compareOperations)
 
 	for _, op := range ops {
 		if s, op, err := classifyOp(op); err == nil {
-			if err := t.ExecuteTemplate(file, s, op); err != nil {
-				return fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err)
+			if err := t.ExecuteTemplate(buffer, s, op); err != nil {
+				panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err))
 			}
 
 		} else {
-			return fmt.Errorf("failed to classify op %v: %w", op.Go, err)
+			panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err))
 		}
 	}
 
 	for _, conv := range vConvertFromTypeMap(typeMap) {
-		if err := t.ExecuteTemplate(file, "vectorConversion", conv); err != nil {
-			return fmt.Errorf("failed to execute vectorConversion template: %w", err)
+		if err := t.ExecuteTemplate(buffer, "vectorConversion", conv); err != nil {
+			panic(fmt.Errorf("failed to execute vectorConversion template: %w", err))
 		}
 	}
 
 	for _, typ := range typesFromTypeMap(typeMap) {
-		if err := t.ExecuteTemplate(file, "typeMap", typ); err != nil {
-			return fmt.Errorf("failed to execute typeMap template: %w", err)
+		if err := t.ExecuteTemplate(buffer, "typeMap", typ); err != nil {
+			panic(fmt.Errorf("failed to execute typeMap template: %w", err))
 		}
 	}
 
 	for _, mask := range masksFromTypeMap(typeMap) {
-		if err := t.ExecuteTemplate(file, "mask", mask); err != nil {
-			return fmt.Errorf("failed to execute mask template: %w", err)
+		if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil {
+			panic(fmt.Errorf("failed to execute mask template: %w", err))
 		}
 	}
 
-	if err := t.ExecuteTemplate(file, "footer", nil); err != nil {
-		return fmt.Errorf("failed to execute footer template: %w", err)
+	if err := t.ExecuteTemplate(buffer, "footer", nil); err != nil {
+		panic(fmt.Errorf("failed to execute footer template: %w", err))
 	}
 
-	return nil
+	return buffer
 }
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index c7c47d94..43ede6ec 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -5,6 +5,7 @@
 package main
 
 import (
+	"bytes"
 	"fmt"
 	"sort"
 )
@@ -26,13 +27,10 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
 
 // writeSIMDMachineOps generates the machine ops and writes it to simdAMD64ops.go
 // within the specified directory.
-func writeSIMDMachineOps(directory string, ops []Operation) error {
+func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 	t := templateOf(simdMachineOpsTmpl, "simdAMD64Ops")
-	file, err := createPath(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go")
-	if err != nil {
-		return err
-	}
-	defer file.Close()
+	buffer := new(bytes.Buffer)
+
 	type opData struct {
 		sortKey      string
 		OpName       string
@@ -54,7 +52,7 @@ func writeSIMDMachineOps(directory string, ops []Operation) error {
 	for _, op := range ops {
 		shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape()
 		if err != nil {
-			return err
+			panic(err)
 		}
 		asm := gOp.Asm
 		if maskType == OneMask {
@@ -69,10 +67,10 @@ func writeSIMDMachineOps(directory string, ops []Operation) error {
 		seen[asm] = struct{}{}
 		regInfo, err := op.regShape()
 		if err != nil {
-			return err
+			panic(err)
 		}
 		if _, ok := regInfoSet[regInfo]; !ok {
-			return fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s", regInfo)
+			panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s", regInfo))
 		}
 		var outType string
 		if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil {
@@ -81,7 +79,7 @@ func writeSIMDMachineOps(directory string, ops []Operation) error {
 		} else if shapeOut == OneKmaskOut {
 			outType = "Mask"
 		} else {
-			return fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut)
+			panic(fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut))
 		}
 		resultInArg0 := "false"
 		if shapeOut == OneVregOutAtIn {
@@ -99,10 +97,10 @@ func writeSIMDMachineOps(directory string, ops []Operation) error {
 	sort.Slice(opsDataImm, func(i, j int) bool {
 		return opsDataImm[i].sortKey < opsDataImm[j].sortKey
 	})
-	err = t.Execute(file, machineOpsData{opsData, opsDataImm})
+	err := t.Execute(buffer, machineOpsData{opsData, opsDataImm})
 	if err != nil {
-		return fmt.Errorf("failed to execute template: %w", err)
+		panic(fmt.Errorf("failed to execute template: %w", err))
 	}
 
-	return nil
+	return buffer
 }
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 00de88d0..4966dae7 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -5,6 +5,7 @@
 package main
 
 import (
+	"bytes"
 	"fmt"
 	"slices"
 	"sort"
@@ -260,18 +261,13 @@ func typesFromTypeMap(typeMap simdTypeMap) []simdType {
 	return m
 }
 
-// writeSIMDTypes generates the simd vector type and writes it to types_amd64.go
-// within the specified directory.
-func writeSIMDTypes(directory string, typeMap simdTypeMap) error {
+// writeSIMDTypes generates the simd vector types into a bytes.Buffer
+func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 	t := templateOf(simdTypesTemplates, "types_amd64")
-	file, err := createPath(directory, "src/"+simdPackage+"/types_amd64.go")
-	if err != nil {
-		return err
-	}
-	defer file.Close()
+	buffer := new(bytes.Buffer)
 
-	if err := t.ExecuteTemplate(file, "fileHeader", nil); err != nil {
-		return fmt.Errorf("failed to execute fileHeader template: %w", err)
+	if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil {
+		panic(fmt.Errorf("failed to execute fileHeader template: %w", err))
 	}
 
 	sizes := make([]int, 0, len(typeMap))
@@ -281,31 +277,27 @@ func writeSIMDTypes(directory string, typeMap simdTypeMap) error {
 	sort.Ints(sizes)
 
 	for _, size := range sizes {
-		if err := t.ExecuteTemplate(file, "sizeTmpl", size); err != nil {
-			return fmt.Errorf("failed to execute size template for size %d: %w", size, err)
+		if err := t.ExecuteTemplate(buffer, "sizeTmpl", size); err != nil {
+			panic(fmt.Errorf("failed to execute size template for size %d: %w", size, err))
 		}
 		for _, typeDef := range typeMap[size] {
-			if err := t.ExecuteTemplate(file, "typeTmpl", typeDef); err != nil {
-				return fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err)
+			if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil {
+				panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err))
 			}
 		}
 	}
 
-	return nil
+	return buffer
 }
 
 // writeSIMDStubs generates the simd vector intrinsic stubs and writes it to stubs_amd64.go
 // within the specified directory.
-func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) error {
+func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 	t := templateOf(simdStubsTmpl, "simdStubs")
-	file, err := createPath(directory, "src/"+simdPackage+"/stubs_amd64.go")
-	if err != nil {
-		return err
-	}
-	defer file.Close()
+	buffer := new(bytes.Buffer)
 
-	if err := t.ExecuteTemplate(file, "fileHeader", nil); err != nil {
-		return fmt.Errorf("failed to execute fileHeader template: %w", err)
+	if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil {
+		panic(fmt.Errorf("failed to execute fileHeader template: %w", err))
 	}
 
 	slices.SortFunc(ops, compareOperations)
@@ -313,30 +305,30 @@ func writeSIMDStubs(directory string, ops []Operation, typeMap simdTypeMap) erro
 	for i, op := range ops {
 		if s, op, err := classifyOp(op); err == nil {
 			if i == 0 || op.Go != ops[i-1].Go {
-				fmt.Fprintf(file, "\n/* %s */\n", op.Go)
+				fmt.Fprintf(buffer, "\n/* %s */\n", op.Go)
 			}
-			if err := t.ExecuteTemplate(file, s, op); err != nil {
-				return fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err)
+			if err := t.ExecuteTemplate(buffer, s, op); err != nil {
+				panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err))
 			}
 
 		} else {
-			return fmt.Errorf("failed to classify op %v: %w", op.Go, err)
+			panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err))
 		}
 	}
 
 	vectorConversions := vConvertFromTypeMap(typeMap)
 	for _, conv := range vectorConversions {
-		if err := t.ExecuteTemplate(file, "vectorConversion", conv); err != nil {
-			return fmt.Errorf("failed to execute vectorConversion template: %w", err)
+		if err := t.ExecuteTemplate(buffer, "vectorConversion", conv); err != nil {
+			panic(fmt.Errorf("failed to execute vectorConversion template: %w", err))
 		}
 	}
 
 	masks := masksFromTypeMap(typeMap)
 	for _, mask := range masks {
-		if err := t.ExecuteTemplate(file, "mask", mask); err != nil {
-			return fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err)
+		if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil {
+			panic(fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err))
 		}
 	}
 
-	return nil
+	return buffer
 }
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index 5f51c6f8..651ae382 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -5,10 +5,9 @@
 package main
 
 import (
+	"bytes"
 	"fmt"
 	"io"
-	"os"
-	"path/filepath"
 	"slices"
 	"strings"
 	"text/template"
@@ -54,23 +53,14 @@ func compareTplRuleData(x, y tplRuleData) int {
 
 // writeSIMDRules generates the lowering and rewrite rules for ssa and writes it to simdAMD64.rules
 // within the specified directory.
-func writeSIMDRules(directory string, ops []Operation) error {
-
-	outPath := filepath.Join(directory, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules")
-	if err := os.MkdirAll(filepath.Dir(outPath), 0755); err != nil {
-		return fmt.Errorf("failed to create directory for %s: %w", outPath, err)
-	}
-	file, err := os.Create(outPath)
-	if err != nil {
-		return fmt.Errorf("failed to create %s: %w", outPath, err)
-	}
-	defer file.Close()
+func writeSIMDRules(ops []Operation) *bytes.Buffer {
+	buffer := new(bytes.Buffer)
 
 	header := `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 
 `
-	if _, err := io.WriteString(file, header); err != nil {
-		return fmt.Errorf("failed to write header to %s: %w", outPath, err)
+	if _, err := io.WriteString(buffer, header); err != nil {
+		panic(fmt.Errorf("failed to write header: %w", err))
 	}
 
 	var allData []tplRuleData
@@ -78,7 +68,7 @@ func writeSIMDRules(directory string, ops []Operation) error {
 	for _, opr := range ops {
 		opInShape, opOutShape, maskType, immType, _, _, gOp, err := opr.shape()
 		if err != nil {
-			return err
+			panic(err)
 		}
 		vregInCnt := len(gOp.In)
 		asm := gOp.Asm
@@ -103,7 +93,7 @@ func writeSIMDRules(directory string, ops []Operation) error {
 			data.Args = "x y z"
 			data.ArgsOut = data.Args
 		} else {
-			return fmt.Errorf("simdgen does not support more than 3 vreg in inputs")
+			panic(fmt.Errorf("simdgen does not support more than 3 vreg in inputs"))
 		}
 		if immType == ConstImm {
 			data.ArgsOut = fmt.Sprintf("[%s] %s", *opr.In[0].Const, data.ArgsOut)
@@ -136,7 +126,7 @@ func writeSIMDRules(directory string, ops []Operation) error {
 				// Mask is at the end.
 				data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes)
 			case PureKmaskIn:
-				return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")
+				panic(fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations"))
 			}
 		} else {
 			// OneKmaskOut case
@@ -155,7 +145,7 @@ func writeSIMDRules(directory string, ops []Operation) error {
 				rearIdx := len(gOp.In) - 1
 				data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes)
 			case PureKmaskIn:
-				return fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations")
+				panic(fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations"))
 			}
 		}
 
@@ -166,10 +156,10 @@ func writeSIMDRules(directory string, ops []Operation) error {
 	slices.SortFunc(allData, compareTplRuleData)
 
 	for _, data := range allData {
-		if err := ruleTemplates.ExecuteTemplate(file, data.tplName, data); err != nil {
-			return fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.GoOp+data.GoType, err)
+		if err := ruleTemplates.ExecuteTemplate(buffer, data.tplName, data); err != nil {
+			panic(fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.GoOp+data.GoType, err))
 		}
 	}
 
-	return nil
+	return buffer
 }
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index 53ded489..e606b69c 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -5,6 +5,7 @@
 package main
 
 import (
+	"bytes"
 	"fmt"
 	"strings"
 	"text/template"
@@ -56,7 +57,7 @@ type tplSSAData struct {
 
 // writeSIMDSSA generates the ssa to prog lowering codes and writes it to simdssa.go
 // within the specified directory.
-func writeSIMDSSA(directory string, ops []Operation) error {
+func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 	var ZeroingMask []string
 	regInfoKeys := []string{
 		"fp11",
@@ -86,7 +87,7 @@ func writeSIMDSSA(directory string, ops []Operation) error {
 		asm := op.Asm
 		shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape()
 		if err != nil {
-			return err
+			panic(err)
 		}
 		if maskType == 2 {
 			asm += "Masked"
@@ -104,7 +105,7 @@ func writeSIMDSSA(directory string, ops []Operation) error {
 		}
 		regShape, err := op.regShape()
 		if err != nil {
-			return err
+			panic(err)
 		}
 		if shapeOut == OneVregOutAtIn {
 			regShape += "ResultInArg0"
@@ -118,17 +119,13 @@ func writeSIMDSSA(directory string, ops []Operation) error {
 		regInfoSet[regShape] = append(regInfoSet[regShape], caseStr)
 	}
 	if len(allUnseen) != 0 {
-		return fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen)
+		panic(fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen))
 	}
 
-	file, err := createPath(directory, "src/cmd/compile/internal/amd64/simdssa.go")
-	if err != nil {
-		return err
-	}
-	defer file.Close()
+	buffer := new(bytes.Buffer)
 
-	if err := ssaTemplates.ExecuteTemplate(file, "header", nil); err != nil {
-		return fmt.Errorf("failed to execute header template: %w", err)
+	if err := ssaTemplates.ExecuteTemplate(buffer, "header", nil); err != nil {
+		panic(fmt.Errorf("failed to execute header template: %w", err))
 	}
 
 	for _, regShape := range regInfoKeys {
@@ -141,24 +138,24 @@ func writeSIMDSSA(directory string, ops []Operation) error {
 			Cases:  strings.Join(cases, ",\n\t\t"),
 			Helper: "simd" + capitalizeFirst(regShape),
 		}
-		if err := ssaTemplates.ExecuteTemplate(file, "case", data); err != nil {
-			return fmt.Errorf("failed to execute case template for %s: %w", regShape, err)
+		if err := ssaTemplates.ExecuteTemplate(buffer, "case", data); err != nil {
+			panic(fmt.Errorf("failed to execute case template for %s: %w", regShape, err))
 		}
 	}
 
-	if err := ssaTemplates.ExecuteTemplate(file, "footer", nil); err != nil {
-		return fmt.Errorf("failed to execute footer template: %w", err)
+	if err := ssaTemplates.ExecuteTemplate(buffer, "footer", nil); err != nil {
+		panic(fmt.Errorf("failed to execute footer template: %w", err))
 	}
 
 	if len(ZeroingMask) != 0 {
-		if err := ssaTemplates.ExecuteTemplate(file, "zeroing", strings.Join(ZeroingMask, ",\n\t\t")); err != nil {
-			return fmt.Errorf("failed to execute footer template: %w", err)
+		if err := ssaTemplates.ExecuteTemplate(buffer, "zeroing", strings.Join(ZeroingMask, ",\n\t\t")); err != nil {
+			panic(fmt.Errorf("failed to execute footer template: %w", err))
 		}
 	}
 
-	if err := ssaTemplates.ExecuteTemplate(file, "ending", nil); err != nil {
-		return fmt.Errorf("failed to execute footer template: %w", err)
+	if err := ssaTemplates.ExecuteTemplate(buffer, "ending", nil); err != nil {
+		panic(fmt.Errorf("failed to execute footer template: %w", err))
 	}
 
-	return nil
+	return buffer
 }
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index daa0db75..6cdc54ea 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -5,7 +5,9 @@
 package main
 
 import (
+	"bytes"
 	"fmt"
+	"go/format"
 	"log"
 	"os"
 	"path/filepath"
@@ -39,6 +41,24 @@ func createPath(goroot string, file string) (*os.File, error) {
 	return f, nil
 }
 
+func formatWriteAndClose(out *bytes.Buffer, goroot string, file string) {
+	b, err := format.Source(out.Bytes())
+	if err != nil {
+		panic(err)
+	} else {
+		writeAndClose(b, goroot, file)
+	}
+}
+
+func writeAndClose(b []byte, goroot string, file string) {
+	ofile, err := createPath(goroot, file)
+	if err != nil {
+		panic(err)
+	}
+	ofile.Write(b)
+	ofile.Close()
+}
+
 const (
 	InvalidIn int = iota
 	PureVregIn
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 009520df..64c2e6e9 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -175,26 +175,13 @@ func writeGoDefs(path string, cl unify.Closure) error {
 	log.Printf("dedup len: %d\n", len(deduped))
 	typeMap := parseSIMDTypes(deduped)
 
-	if err = writeSIMDTypes(path, typeMap); err != nil {
-		return err
-	}
-	if err = writeSIMDStubs(path, deduped, typeMap); err != nil {
-		return err
-	}
-	if err = writeSIMDIntrinsics(path, deduped, typeMap); err != nil {
-		return err
-	}
-	if err = writeSIMDGenericOps(path, deduped); err != nil {
-		return err
-	}
-	if err = writeSIMDMachineOps(path, deduped); err != nil {
-		return err
-	}
-	if err = writeSIMDRules(path, deduped); err != nil {
-		return err
-	}
-	if err = writeSIMDSSA(path, deduped); err != nil {
-		return err
-	}
+	formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go")
+	formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/stubs_amd64.go")
+	formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go")
+	formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go")
+	formatWriteAndClose(writeSIMDMachineOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go")
+	formatWriteAndClose(writeSIMDSSA(deduped), path, "src/cmd/compile/internal/amd64/simdssa.go")
+	writeAndClose(writeSIMDRules(deduped).Bytes(), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules")
+
 	return nil
 }

From 3373f86e010c24aa2ee246b58bfae78cc4d2e0d9 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 17 Jun 2025 19:39:58 +0000
Subject: [PATCH 094/200] internal/simdgen: remove redundant shapes of fused
 mul/add/sub.

The users do not need to see every shape of these VFM* instructions at
the API level. This CL keeps only one shape(213) of them; The rest will
be generated by lowering rules as an optimization.

This CL generates CL 682435.

Change-Id: I59ea9c568b0c00c8af6757b9c74f779abf397e3c
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682436
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml           | 102 +-------------
 internal/simdgen/go.yaml                   | 147 +--------------------
 internal/simdgen/ops/MLOps/categories.yaml | 102 +-------------
 internal/simdgen/ops/MLOps/go.yaml         | 147 +--------------------
 4 files changed, 24 insertions(+), 474 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 8b325fa4..c33a62e6 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -573,114 +573,24 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
-- go: MaskedFusedMultiplyAdd132
+- go: MaskedFusedMultiplyAdd
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplyAdd132 performs `(v1 * v3) + v2`.
-- go: MaskedFusedMultiplyAdd213
+    // MaskedFusedMultiplyAdd performs `(v1 * v2) + v3`.
+- go: MaskedFusedMultiplyAddSub
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplyAdd213 performs `(v2 * v1) + v3`.
-- go: MaskedFusedMultiplyAdd231
+    // MaskedFusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
+- go: MaskedFusedMultiplySubAdd
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplyAdd231 performs `(v2 * v3) + v1`.
-- go: MaskedFusedMultiplySub132
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySub132 performs `(v1 * v3) - v2`.
-- go: MaskedFusedMultiplySub213
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySub213 performs `(v2 * v1) - v3`.
-- go: MaskedFusedMultiplySub231
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySub231 performs `(v2 * v3) - v1`.
-- go: MaskedFusedNegativeMultiplyAdd132
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
-- go: MaskedFusedNegativeMultiplyAdd213
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
-- go: MaskedFusedNegativeMultiplyAdd231
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
-- go: MaskedFusedNegativeMultiplySub132
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
-- go: MaskedFusedNegativeMultiplySub213
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
-- go: MaskedFusedNegativeMultiplySub231
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
-- go: MaskedFusedMultiplyAddSub132
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
-- go: MaskedFusedMultiplyAddSub213
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
-- go: MaskedFusedMultiplyAddSub231
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
-- go: MaskedFusedMultiplySubAdd132
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
-- go: MaskedFusedMultiplySubAdd213
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
-- go: MaskedFusedMultiplySubAdd231
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+    // MaskedFusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 - go: Max
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 401a90fb..20bd9d57 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -631,126 +631,18 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
-- go: MaskedFusedMultiplyAdd132
-  asm: "VFMADD132PS|VFMADD132PD"
-  in:
-  - &fma_op
-    go: $t
-    base: float
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplyAdd213
+- go: MaskedFusedMultiplyAdd
   asm: "VFMADD213PS|VFMADD213PD"
   in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplyAdd231
-  asm: "VFMADD231PS|VFMADD231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplySub132
-  asm: "VFMSUB132PS|VFMSUB132PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplySub213
-  asm: "VFMSUB213PS|VFMSUB213PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplySub231
-  asm: "VFMSUB231PS|VFMSUB231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplyAdd132
-  asm: "VFNMADD132PS|VFNMADD132PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplyAdd213
-  asm: "VFNMADD213PS|VFNMADD213PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplyAdd231
-  asm: "VFNMADD231PS|VFNMADD231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplySub132
-  asm: "VFNMSUB132PS|VFNMSUB132PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplySub213
-  asm: "VFNMSUB213PS|VFNMSUB213PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplySub231
-  asm: "VFNMSUB231PS|VFNMSUB231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplyAddSub132
-  asm: "VFMADDSUB132PS|VFMADDSUB132PD"
-  in:
-  - *fma_op
+  - &fma_op
+   go: $t
+   base: float
   - class: mask
   - *fma_op
   - *fma_op
   out:
   - *fma_op
-- go: MaskedFusedMultiplyAddSub213
+- go: MaskedFusedMultiplyAddSub
   asm: "VFMADDSUB213PS|VFMADDSUB213PD"
   in:
   - *fma_op
@@ -759,25 +651,7 @@
   - *fma_op
   out:
   - *fma_op
-- go: MaskedFusedMultiplyAddSub231
-  asm: "VFMADDSUB231PS|VFMADDSUB231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplySubAdd132
-  asm: "VFMSUBADD132PS|VFMSUBADD132PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplySubAdd213
+- go: MaskedFusedMultiplySubAdd
   asm: "VFMSUBADD213PS|VFMSUBADD213PD"
   in:
   - *fma_op
@@ -786,15 +660,6 @@
   - *fma_op
   out:
   - *fma_op
-- go: MaskedFusedMultiplySubAdd231
-  asm: "VFMSUBADD231PS|VFMSUBADD231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
 - go: Max
   asm: "V?PMAXS[BWDQ]"
   in: &2int
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 412af692..6923dd37 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -77,111 +77,21 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
-- go: MaskedFusedMultiplyAdd132
+- go: MaskedFusedMultiplyAdd
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplyAdd132 performs `(v1 * v3) + v2`.
-- go: MaskedFusedMultiplyAdd213
+    // MaskedFusedMultiplyAdd performs `(v1 * v2) + v3`.
+- go: MaskedFusedMultiplyAddSub
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplyAdd213 performs `(v2 * v1) + v3`.
-- go: MaskedFusedMultiplyAdd231
+    // MaskedFusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
+- go: MaskedFusedMultiplySubAdd
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplyAdd231 performs `(v2 * v3) + v1`.
-- go: MaskedFusedMultiplySub132
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySub132 performs `(v1 * v3) - v2`.
-- go: MaskedFusedMultiplySub213
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySub213 performs `(v2 * v1) - v3`.
-- go: MaskedFusedMultiplySub231
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySub231 performs `(v2 * v3) - v1`.
-- go: MaskedFusedNegativeMultiplyAdd132
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplyAdd132 performs `-(v1 * v3) + v2`.
-- go: MaskedFusedNegativeMultiplyAdd213
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplyAdd213 performs `-(v2 * v1) + v3`.
-- go: MaskedFusedNegativeMultiplyAdd231
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplyAdd231 performs `-(v2 * v3) + v1`.
-- go: MaskedFusedNegativeMultiplySub132
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplySub132 performs `-(v1 * v3) - v2`.
-- go: MaskedFusedNegativeMultiplySub213
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplySub213 performs `-(v2 * v1) - v3`.
-- go: MaskedFusedNegativeMultiplySub231
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedNegativeMultiplySub231 performs `-(v2 * v3) - v1`.
-- go: MaskedFusedMultiplyAddSub132
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplyAddSub132 performs `(v1 * v3) - v2` for odd-indexed elements, and `(v1 * v3) + v2` for even-indexed elements.
-- go: MaskedFusedMultiplyAddSub213
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplyAddSub213 performs `(v2 * v1) - v3` for odd-indexed elements, and `(v2 * v1) + v3` for even-indexed elements.
-- go: MaskedFusedMultiplyAddSub231
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplyAddSub231 performs `(v2 * v3) - v1` for odd-indexed elements, and `(v2 * v3) + v1` for even-indexed elements.
-- go: MaskedFusedMultiplySubAdd132
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySubAdd132 performs `(v1 * v3) + v2` for odd-indexed elements, and `(v1 * v3) - v2` for even-indexed elements.
-- go: MaskedFusedMultiplySubAdd213
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySubAdd213 performs `(v2 * v1) + v3` for odd-indexed elements, and `(v2 * v1) - v3` for even-indexed elements.
-- go: MaskedFusedMultiplySubAdd231
-  masked: "true"
-  commutative: "false"
-  extension: "AVX.*"
-  documentation: !string |-
-    // MaskedFusedMultiplySubAdd231 performs `(v2 * v3) + v1` for odd-indexed elements, and `(v2 * v3) - v1` for even-indexed elements.
+    // MaskedFusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
index b9add167..da894ac7 100644
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -139,126 +139,18 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
-- go: MaskedFusedMultiplyAdd132
-  asm: "VFMADD132PS|VFMADD132PD"
-  in:
-  - &fma_op
-    go: $t
-    base: float
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplyAdd213
+- go: MaskedFusedMultiplyAdd
   asm: "VFMADD213PS|VFMADD213PD"
   in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplyAdd231
-  asm: "VFMADD231PS|VFMADD231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplySub132
-  asm: "VFMSUB132PS|VFMSUB132PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplySub213
-  asm: "VFMSUB213PS|VFMSUB213PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplySub231
-  asm: "VFMSUB231PS|VFMSUB231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplyAdd132
-  asm: "VFNMADD132PS|VFNMADD132PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplyAdd213
-  asm: "VFNMADD213PS|VFNMADD213PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplyAdd231
-  asm: "VFNMADD231PS|VFNMADD231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplySub132
-  asm: "VFNMSUB132PS|VFNMSUB132PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplySub213
-  asm: "VFNMSUB213PS|VFNMSUB213PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedNegativeMultiplySub231
-  asm: "VFNMSUB231PS|VFNMSUB231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplyAddSub132
-  asm: "VFMADDSUB132PS|VFMADDSUB132PD"
-  in:
-  - *fma_op
+  - &fma_op
+   go: $t
+   base: float
   - class: mask
   - *fma_op
   - *fma_op
   out:
   - *fma_op
-- go: MaskedFusedMultiplyAddSub213
+- go: MaskedFusedMultiplyAddSub
   asm: "VFMADDSUB213PS|VFMADDSUB213PD"
   in:
   - *fma_op
@@ -267,25 +159,7 @@
   - *fma_op
   out:
   - *fma_op
-- go: MaskedFusedMultiplyAddSub231
-  asm: "VFMADDSUB231PS|VFMADDSUB231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplySubAdd132
-  asm: "VFMSUBADD132PS|VFMSUBADD132PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MaskedFusedMultiplySubAdd213
+- go: MaskedFusedMultiplySubAdd
   asm: "VFMSUBADD213PS|VFMSUBADD213PD"
   in:
   - *fma_op
@@ -293,13 +167,4 @@
   - *fma_op
   - *fma_op
   out:
-  - *fma_op
-- go: MaskedFusedMultiplySubAdd231
-  asm: "VFMSUBADD231PS|VFMSUBADD231PD"
-  in:
-  - *fma_op
-  - class: mask
-  - *fma_op
-  - *fma_op
-  out:
   - *fma_op
\ No newline at end of file

From 85157288609be6cc2263598ead327572426b0ee8 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 18 Jun 2025 14:07:58 -0400
Subject: [PATCH 095/200] internal/simdgen: added fp1gp1fp1 register mask

This is for VPINSR[BWDQ] and is paired with
dev.simd CL 682656

Change-Id: I66d71c37c04a27e3cf113a0c2ffa63c5713cacf1
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682679
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdMachineOps.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 43ede6ec..0d357305 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -13,7 +13,7 @@ import (
 const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 package main
 
-func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 regInfo) []opData {
+func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1, fp1gp1fp1 regInfo) []opData {
 	return []opData{
 {{- range .OpsData }}
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
@@ -46,7 +46,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 		OpsDataImm []opData
 	}
 	seen := map[string]struct{}{}
-	regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k1": true, "fp2k1fp1": true, "fp2k1k1": true, "fp1k1fp1": true, "fp31": true, "fp3k1fp1": true}
+	regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k1": true, "fp2k1fp1": true, "fp2k1k1": true, "fp1k1fp1": true, "fp31": true, "fp3k1fp1": true, "fp1gp1fp1": true}
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {

From 9e765e371c565b49ab89e639ca5dee1f12441d4f Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 18 Jun 2025 13:06:05 -0400
Subject: [PATCH 096/200] internal/simdgen: add Operation.ResultType

This allows GoArch to retain its original use.
Also includes minor template renaming.

Change-Id: Idea71cb4b8c2e12356cff848b897f84549f536b0
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682676
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Bypass: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdIntrinsics.go | 22 +++++++++++-----------
 internal/simdgen/gen_utility.go        | 12 ++++++------
 internal/simdgen/godefs.go             | 22 ++++++++++++++++------
 3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 3910e028..14a5d41a 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -25,27 +25,27 @@ const simdPackage = "` + simdPackage + `"
 func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
 {{end}}
 
-{{define "op1"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{define "op1"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64)
 {{end}}
-{{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64)
 {{end}}
-{{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64)
 {{end}}
-{{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.GoArch}}), sys.AMD64)
+{{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64)
 {{end}}
-{{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op3Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op3Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op4Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.GoArch}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op4Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
 
 {{define "vectorConversion"}}	addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
 {{end}}
 
-{{define "typeMap"}}	addF(simdPackage, "Load{{.Name}}", simdLoad(), sys.AMD64)
+{{define "loadStore"}}	addF(simdPackage, "Load{{.Name}}", simdLoad(), sys.AMD64)
 	addF(simdPackage, "{{.Name}}.Store", simdStore(), sys.AMD64)
 {{end}}
 
@@ -89,8 +89,8 @@ func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 	}
 
 	for _, typ := range typesFromTypeMap(typeMap) {
-		if err := t.ExecuteTemplate(buffer, "typeMap", typ); err != nil {
-			panic(fmt.Errorf("failed to execute typeMap template: %w", err))
+		if err := t.ExecuteTemplate(buffer, "loadStore", typ); err != nil {
+			panic(fmt.Errorf("failed to execute loadStore template: %w", err))
 		}
 	}
 
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 6cdc54ea..e3545fe0 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -331,19 +331,19 @@ func (op *Operation) sortOperand() {
 	})
 }
 
+func (op Operation) ResultType() string {
+	return fmt.Sprintf("types.TypeVec%d", *op.Out[0].Bits)
+}
+
 // classifyOp returns a classification string, modified operation, and perhaps error based
 // on the stub and intrinsic shape for the operation.
 // The classification string is in the regular expression set "op[1234](Imm8)?"
 func classifyOp(op Operation) (string, Operation, error) {
-	_, shapeOut, _, immType, _, opNoConstMask, gOp, err := op.shape()
+	_, _, _, immType, _, opNoConstMask, gOp, err := op.shape()
 	if err != nil {
 		return "", op, err
 	}
-	// Put the go ssa type in GoArch field, simd intrinsics need it.
-	if shapeOut == OneVregOut || shapeOut == OneKmaskOut || shapeOut == OneVregOutAtIn {
-		opNoConstMask.GoArch = fmt.Sprintf("types.TypeVec%d", *opNoConstMask.Out[0].Bits)
-		gOp.GoArch = fmt.Sprintf("types.TypeVec%d", *gOp.Out[0].Bits)
-	}
+
 	if immType == VarImm || immType == ConstVarImm {
 		switch len(opNoConstMask.In) {
 		case 1:
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 64c2e6e9..1650fa2d 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -45,13 +45,23 @@ func compareStringPointers(x, y *string) int {
 	return 1
 }
 
+func compareIntPointers(x, y *int) int {
+	if x != nil && y != nil {
+		return *x - *y
+	}
+	if x == nil && y == nil {
+		return 0
+	}
+	if x == nil {
+		return -1
+	}
+	return 1
+}
+
 func compareOperations(x, y Operation) int {
 	if c := strings.Compare(x.Go, y.Go); c != 0 {
 		return c
 	}
-	if c := strings.Compare(x.GoArch, y.GoArch); c != 0 {
-		return c
-	}
 	xIn, yIn := x.In, y.In
 
 	if len(xIn) > len(yIn) && xIn[len(xIn)-1].Class == "mask" {
@@ -88,13 +98,13 @@ func compareOperands(x, y *Operand) int {
 	if x.Class == "immediate" {
 		return compareStringPointers(x.ImmOffset, y.ImmOffset)
 	} else {
-		if c := strings.Compare(*x.Base, *y.Base); c != 0 {
+		if c := compareStringPointers(x.Base, y.Base); c != 0 {
 			return c
 		}
-		if c := *x.ElemBits - *y.ElemBits; c != 0 {
+		if c := compareIntPointers(x.ElemBits, y.ElemBits); c != 0 {
 			return c
 		}
-		return *x.Bits - *y.Bits
+		return compareIntPointers(x.Bits, y.Bits)
 	}
 }
 

From 5de79ea63f8bc82d2a64331fa2150671172883e2 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 18 Jun 2025 13:19:44 -0400
Subject: [PATCH 097/200] internal/simdgen: more verbose+informative error
 printing

I found that I needed this while adding some new operations,
and probably we will need this again in the future.

Change-Id: I15bfe3a6117c7cb222df4c18258dded66f05e883
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682677
TryBot-Bypass: David Chase <drchase@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_utility.go | 12 ++++++++----
 internal/simdgen/main.go        |  3 ++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index e3545fe0..88e6c068 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -510,7 +510,7 @@ func capitalizeFirst(s string) string {
 //     and [writeSIMDSSA], please be careful when updating these constraints.
 func overwrite(ops []Operation) error {
 	hasClassOverwrite := false
-	overwrite := func(op []Operand, idx int) error {
+	overwrite := func(op []Operand, idx int, o Operation) error {
 		if op[idx].OverwriteClass != nil {
 			if op[idx].OverwriteBase == nil {
 				return fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx])
@@ -536,15 +536,19 @@ func overwrite(ops []Operation) error {
 			*op[idx].Base = oBase
 		}
 		if op[idx].OverwriteElementBits != nil {
+			if op[idx].ElemBits == nil {
+				panic(fmt.Errorf("ElemBits is nil at operand %d of %v", idx, o))
+			}
 			*op[idx].ElemBits = *op[idx].OverwriteElementBits
 			*op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Bits / *op[idx].ElemBits)
+
 		}
 		return nil
 	}
-	for i := range ops {
+	for i, o := range ops {
 		hasClassOverwrite = false
 		for j := range ops[i].In {
-			if err := overwrite(ops[i].In, j); err != nil {
+			if err := overwrite(ops[i].In, j, o); err != nil {
 				return err
 			}
 			if hasClassOverwrite {
@@ -552,7 +556,7 @@ func overwrite(ops []Operation) error {
 			}
 		}
 		for j := range ops[i].Out {
-			if err := overwrite(ops[i].Out, j); err != nil {
+			if err := overwrite(ops[i].Out, j, o); err != nil {
 				return err
 			}
 		}
diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index 14bf9b8f..f1c9dc8b 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -260,6 +260,7 @@ func validate(cl unify.Closure, required map[*unify.Value]struct{}) {
 		// at least say "it doesn't unify with anything in x.yaml". That's a lot
 		// of work, but if we have trouble debugging unification failure it may
 		// be worth it.
-		fmt.Fprintf(os.Stderr, "%s: def required, but did not unify\n", def.PosString())
+		fmt.Fprintf(os.Stderr, "%s: def required, but did not unify (%v)\n",
+			def.PosString(), def)
 	}
 }

From 09e5d8adfaa9ea1ec8deea023a394d10926eb9af Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 18 Jun 2025 13:22:44 -0400
Subject: [PATCH 098/200] internal/simdgen: add some support for scalar args

This is not complete; the SSA op change needs to be paired
with a change to the compiler, so it will be small and separate.

Change-Id: Iee0523152f0f9b158c6bfaa6403ee73c71a99665
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682678
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdTypes.go | 17 +++++--
 internal/simdgen/gen_utility.go   | 14 +++---
 internal/simdgen/godefs.go        |  2 +-
 internal/simdgen/types.yaml       | 82 ++++++++++++++++++-------------
 4 files changed, 69 insertions(+), 46 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 4966dae7..d5ba1267 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -192,7 +192,8 @@ func parseSIMDTypes(ops []Operation) simdTypeMap {
 			return
 		}
 		seen[*arg.Go] = struct{}{}
-		lanes := *arg.Bits / *arg.ElemBits
+
+		lanes := *arg.Lanes
 		base := fmt.Sprintf("%s%d", *arg.Base, *arg.ElemBits)
 		tagFieldNameS := fmt.Sprintf("%sx%d", base, lanes)
 		tagFieldS := fmt.Sprintf("%s v%d", tagFieldNameS, *arg.Bits)
@@ -227,7 +228,8 @@ func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair {
 	for _, ts := range typeMap {
 		for i, tsrc := range ts {
 			for j, tdst := range ts {
-				if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" {
+				if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" &&
+					tsrc.Lanes > 1 && tdst.Lanes > 1 {
 					v = append(v, simdTypePair{tsrc, tdst})
 				}
 			}
@@ -254,7 +256,9 @@ func typesFromTypeMap(typeMap simdTypeMap) []simdType {
 	m := []simdType{}
 	for _, ts := range typeMap {
 		for _, tsrc := range ts {
-			m = append(m, tsrc)
+			if tsrc.Lanes > 1 {
+				m = append(m, tsrc)
+			}
 		}
 	}
 	slices.SortFunc(m, compareSimdTypes)
@@ -277,10 +281,17 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 	sort.Ints(sizes)
 
 	for _, size := range sizes {
+		if size <= 64 {
+			// these are scalar
+			continue
+		}
 		if err := t.ExecuteTemplate(buffer, "sizeTmpl", size); err != nil {
 			panic(fmt.Errorf("failed to execute size template for size %d: %w", size, err))
 		}
 		for _, typeDef := range typeMap[size] {
+			if typeDef.Lanes == 1 {
+				continue
+			}
 			if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil {
 				panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err))
 			}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 88e6c068..1aab1d6e 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -128,7 +128,7 @@ const (
 // opNoConstImmMask is op with its inputs excluding the const imm and mask.
 //
 // This function does not modify op.
-func (op *Operation) shape() (shapeIn, shapeOut, maskType, immTyppe int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation, err error) {
+func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation, err error) {
 	if len(op.Out) > 1 {
 		err = fmt.Errorf("simdgen only supports 1 output: %s", op)
 		return
@@ -211,18 +211,18 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immTyppe int, opNoImm
 		removeImm(&opNoImmConstMask)
 		if op.In[0].Const != nil {
 			if op.In[0].ImmOffset != nil {
-				immTyppe = ConstVarImm
+				immType = ConstVarImm
 			} else {
-				immTyppe = ConstImm
+				immType = ConstImm
 			}
 		} else if op.In[0].ImmOffset != nil {
-			immTyppe = VarImm
+			immType = VarImm
 		} else {
 			err = fmt.Errorf("simdgen requires imm to have at least one of ImmOffset or Const set: %s", op)
 			return
 		}
 	} else {
-		immTyppe = NoImm
+		immType = NoImm
 	}
 	if maskCount == 0 {
 		if iConstMask == -1 {
@@ -317,8 +317,8 @@ func (op *Operation) regShape() (string, error) {
 
 // sortOperand sorts op.In by putting immediates first, then vreg, and mask the last.
 // TODO: verify that this is a safe assumption of the prog strcture.
-// from my observation looks like in asm, imms are always the first, masks are always the last, with
-// vreg in betwee...
+// from my observation looks like in asm, imms are always the first,
+// masks are always the last, with vreg in between.
 func (op *Operation) sortOperand() {
 	priority := map[string]int{"immediate": 2, "vreg": 1, "mask": 0}
 	sort.SliceStable(op.In, func(i, j int) bool {
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 1650fa2d..a8dd9791 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -124,7 +124,7 @@ type Operand struct {
 	// The compiler will right-shift the user-passed value by ImmOffset and set it as the AuxInt
 	// field of the operation.
 	ImmOffset *string
-	Lanes     *int // Lanes should equal Bits/ElemBits
+	Lanes     *int // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1
 	// If non-nil, it means the [Class] field is overwritten here, right now this is used to
 	// overwrite the results of AVX2 compares to masks.
 	OverwriteClass *string
diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml
index c8b3660e..ec087ffd 100644
--- a/internal/simdgen/types.yaml
+++ b/internal/simdgen/types.yaml
@@ -7,48 +7,60 @@
 
 in: !repeat
 - !sum &types
-  - {class: vreg, go: Int8x16,    base: "int",   elemBits: 8,  bits: 128, lanes: 16}
-  - {class: vreg, go: Uint8x16,   base: "uint",  elemBits: 8,  bits: 128, lanes: 16}
-  - {class: vreg, go: Int16x8,    base: "int",   elemBits: 16, bits: 128, lanes: 8}
-  - {class: vreg, go: Uint16x8,   base: "uint",  elemBits: 16, bits: 128, lanes: 8}
-  - {class: vreg, go: Int32x4,    base: "int",   elemBits: 32, bits: 128, lanes: 4}
-  - {class: vreg, go: Uint32x4,   base: "uint",  elemBits: 32, bits: 128, lanes: 4}
-  - {class: vreg, go: Int64x2,    base: "int",   elemBits: 64, bits: 128, lanes: 2}
-  - {class: vreg, go: Uint64x2,   base: "uint",  elemBits: 64, bits: 128, lanes: 2}
-  - {class: vreg, go: Float32x4,  base: "float", elemBits: 32, bits: 128, lanes: 4}
-  - {class: vreg, go: Float64x2,  base: "float", elemBits: 64, bits: 128, lanes: 2}
-  - {class: vreg, go: Int8x32,    base: "int",   elemBits: 8,  bits: 256, lanes: 32}
-  - {class: vreg, go: Uint8x32,   base: "uint",  elemBits: 8,  bits: 256, lanes: 32}
+  - {class: vreg, go: Int8x16,    base: "int",   elemBits:  8, bits: 128, lanes: 16}
+  - {class: vreg, go: Uint8x16,   base: "uint",  elemBits:  8, bits: 128, lanes: 16}
+  - {class: vreg, go: Int16x8,    base: "int",   elemBits: 16, bits: 128, lanes:  8}
+  - {class: vreg, go: Uint16x8,   base: "uint",  elemBits: 16, bits: 128, lanes:  8}
+  - {class: vreg, go: Int32x4,    base: "int",   elemBits: 32, bits: 128, lanes:  4}
+  - {class: vreg, go: Uint32x4,   base: "uint",  elemBits: 32, bits: 128, lanes:  4}
+  - {class: vreg, go: Int64x2,    base: "int",   elemBits: 64, bits: 128, lanes:  2}
+  - {class: vreg, go: Uint64x2,   base: "uint",  elemBits: 64, bits: 128, lanes:  2}
+  - {class: vreg, go: Float32x4,  base: "float", elemBits: 32, bits: 128, lanes:  4}
+  - {class: vreg, go: Float64x2,  base: "float", elemBits: 64, bits: 128, lanes:  2}
+  - {class: vreg, go: Int8x32,    base: "int",   elemBits:  8, bits: 256, lanes: 32}
+  - {class: vreg, go: Uint8x32,   base: "uint",  elemBits:  8, bits: 256, lanes: 32}
   - {class: vreg, go: Int16x16,   base: "int",   elemBits: 16, bits: 256, lanes: 16}
   - {class: vreg, go: Uint16x16,  base: "uint",  elemBits: 16, bits: 256, lanes: 16}
-  - {class: vreg, go: Int32x8,    base: "int",   elemBits: 32, bits: 256, lanes: 8}
-  - {class: vreg, go: Uint32x8,   base: "uint",  elemBits: 32, bits: 256, lanes: 8}
-  - {class: vreg, go: Int64x4,    base: "int",   elemBits: 64, bits: 256, lanes: 4}
-  - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 64, bits: 256, lanes: 4}
-  - {class: vreg, go: Float32x8,  base: "float", elemBits: 32, bits: 256, lanes: 8}
-  - {class: vreg, go: Float64x4,  base: "float", elemBits: 64, bits: 256, lanes: 4}
-  - {class: vreg, go: Int8x64,    base: "int",   elemBits: 8,  bits: 512, lanes: 64}
-  - {class: vreg, go: Uint8x64,   base: "uint",  elemBits: 8,  bits: 512, lanes: 64}
+  - {class: vreg, go: Int32x8,    base: "int",   elemBits: 32, bits: 256, lanes:  8}
+  - {class: vreg, go: Uint32x8,   base: "uint",  elemBits: 32, bits: 256, lanes:  8}
+  - {class: vreg, go: Int64x4,    base: "int",   elemBits: 64, bits: 256, lanes:  4}
+  - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 64, bits: 256, lanes:  4}
+  - {class: vreg, go: Float32x8,  base: "float", elemBits: 32, bits: 256, lanes:  8}
+  - {class: vreg, go: Float64x4,  base: "float", elemBits: 64, bits: 256, lanes:  4}
+  - {class: vreg, go: Int8x64,    base: "int",   elemBits:  8, bits: 512, lanes: 64}
+  - {class: vreg, go: Uint8x64,   base: "uint",  elemBits:  8, bits: 512, lanes: 64}
   - {class: vreg, go: Int16x32,   base: "int",   elemBits: 16, bits: 512, lanes: 32}
   - {class: vreg, go: Uint16x32,  base: "uint",  elemBits: 16, bits: 512, lanes: 32}
   - {class: vreg, go: Int32x16,   base: "int",   elemBits: 32, bits: 512, lanes: 16}
   - {class: vreg, go: Uint32x16,  base: "uint",  elemBits: 32, bits: 512, lanes: 16}
-  - {class: vreg, go: Int64x8,    base: "int",   elemBits: 64, bits: 512, lanes: 8}
-  - {class: vreg, go: Uint64x8,   base: "uint",  elemBits: 64, bits: 512, lanes: 8}
+  - {class: vreg, go: Int64x8,    base: "int",   elemBits: 64, bits: 512, lanes:  8}
+  - {class: vreg, go: Uint64x8,   base: "uint",  elemBits: 64, bits: 512, lanes:  8}
   - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512, lanes: 16}
-  - {class: vreg, go: Float64x8,  base: "float", elemBits: 64, bits: 512, lanes: 8}
-  - {class: mask, go: Mask8x16,  base: "int", elemBits: 8,  bits: 128, lanes: 16}
-  - {class: mask, go: Mask16x8,  base: "int", elemBits: 16, bits: 128, lanes: 8}
-  - {class: mask, go: Mask32x4,  base: "int", elemBits: 32, bits: 128, lanes: 4}
-  - {class: mask, go: Mask64x2,  base: "int", elemBits: 64, bits: 128, lanes: 2}
-  - {class: mask, go: Mask8x32,  base: "int", elemBits: 8,  bits: 256, lanes: 32}
-  - {class: mask, go: Mask16x16, base: "int", elemBits: 16, bits: 256, lanes: 16}
-  - {class: mask, go: Mask32x8,  base: "int", elemBits: 32, bits: 256, lanes: 8}
-  - {class: mask, go: Mask64x4,  base: "int", elemBits: 64, bits: 256, lanes: 4}
-  - {class: mask, go: Mask8x64,  base: "int", elemBits: 8,  bits: 512, lanes: 64}
-  - {class: mask, go: Mask16x32, base: "int", elemBits: 16, bits: 512, lanes: 32}
-  - {class: mask, go: Mask32x16, base: "int", elemBits: 32, bits: 512, lanes: 16}
-  - {class: mask, go: Mask64x8,  base: "int", elemBits: 64, bits: 512, lanes: 8}
+  - {class: vreg, go: Float64x8,  base: "float", elemBits: 64, bits: 512, lanes:  8}
+  - {class: mask, go: Mask8x16,   base: "int",   elemBits:  8, bits: 128, lanes: 16}
+  - {class: mask, go: Mask16x8,   base: "int",   elemBits: 16, bits: 128, lanes:  8}
+  - {class: mask, go: Mask32x4,   base: "int",   elemBits: 32, bits: 128, lanes:  4}
+  - {class: mask, go: Mask64x2,   base: "int",   elemBits: 64, bits: 128, lanes:  2}
+  - {class: mask, go: Mask8x32,   base: "int",   elemBits:  8, bits: 256, lanes: 32}
+  - {class: mask, go: Mask16x16,  base: "int",   elemBits: 16, bits: 256, lanes: 16}
+  - {class: mask, go: Mask32x8,   base: "int",   elemBits: 32, bits: 256, lanes:  8}
+  - {class: mask, go: Mask64x4,   base: "int",   elemBits: 64, bits: 256, lanes:  4}
+  - {class: mask, go: Mask8x64,   base: "int",   elemBits:  8, bits: 512, lanes: 64}
+  - {class: mask, go: Mask16x32,  base: "int",   elemBits: 16, bits: 512, lanes: 32}
+  - {class: mask, go: Mask32x16,  base: "int",   elemBits: 32, bits: 512, lanes: 16}
+  - {class: mask, go: Mask64x8,   base: "int",   elemBits: 64, bits: 512, lanes:  8}
+
+  - {class: vreg, go: float64,    base: "float", elemBits: 64, bits:  64, lanes:  1}
+  - {class: vreg, go: float32,    base: "float", elemBits: 32, bits:  32, lanes:  1}
+  - {class: vreg, go: int64,      base: "int",   elemBits: 64, bits:  64, lanes:  1}
+  - {class: vreg, go: int32,      base: "int",   elemBits: 32, bits:  32, lanes:  1}
+  - {class: vreg, go: int16,      base: "int",   elemBits: 16, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
+  - {class: vreg, go: int8,       base: "int",   elemBits:  8, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
+  - {class: vreg, go: uint64,     base: "uint",  elemBits: 64, bits:  64, lanes:  1}
+  - {class: vreg, go: uint32,     base: "uint",  elemBits: 32, bits:  32, lanes:  1}
+  - {class: vreg, go: uint16,     base: "uint",  elemBits: 16, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
+  - {class: vreg, go: uint8,      base: "uint",  elemBits:  8, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
+
   - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now.
 out: !repeat
 - *types

From b9b711eaf5b36f070d135c447ad8d18a0612b3a3 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 20 Jun 2025 15:34:17 -0400
Subject: [PATCH 099/200] internal/simdgen: add VPINSR[BWDQ]

includes adjustments to register mask and code generation
helper generation.

Paired with dev.simd CL 683035

Change-Id: Ibfd42bac14596601f81190535ecf6095dfb41123
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683055
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/categories.yaml           |  5 +++
 internal/simdgen/gen_simdMachineOps.go     |  2 +-
 internal/simdgen/gen_simdssa.go            |  5 +--
 internal/simdgen/gen_utility.go            | 36 ++++++++++++++++------
 internal/simdgen/go.yaml                   | 13 ++++++++
 internal/simdgen/ops/Moves/categories.yaml |  6 ++++
 internal/simdgen/ops/Moves/go.yaml         | 14 +++++++++
 7 files changed, 68 insertions(+), 13 deletions(-)
 create mode 100644 internal/simdgen/ops/Moves/categories.yaml
 create mode 100644 internal/simdgen/ops/Moves/go.yaml

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index c33a62e6..d8081bc4 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -613,6 +613,11 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedMin computes the minimum of corresponding elements.
+- go: SetElem
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // SetElem sets a single constant-indexed element's value.
 - go: Mul
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 0d357305..f251e2e6 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -70,7 +70,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 			panic(err)
 		}
 		if _, ok := regInfoSet[regInfo]; !ok {
-			panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s", regInfo))
+			panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s.  Op is %s", regInfo, op))
 		}
 		var outType string
 		if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil {
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index e606b69c..a7305c9b 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -75,6 +75,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 		"fp2k1k1Imm8",
 		"fp31ResultInArg0",
 		"fp3k1fp1ResultInArg0",
+		"fp1gp1fp1Imm8",
 	}
 	regInfoSet := map[string][]string{}
 	for _, key := range regInfoKeys {
@@ -82,7 +83,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 	}
 
 	seen := map[string]struct{}{}
-	allUnseen := map[string]struct{}{}
+	allUnseen := make(map[string][]Operation)
 	for _, op := range ops {
 		asm := op.Asm
 		shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape()
@@ -114,7 +115,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 			regShape += "Imm8"
 		}
 		if _, ok := regInfoSet[regShape]; !ok {
-			allUnseen[regShape] = struct{}{}
+			allUnseen[regShape] = append(allUnseen[regShape], op)
 		}
 		regInfoSet[regShape] = append(regInfoSet[regShape], caseStr)
 	}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 1aab1d6e..eb9e82b6 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -277,10 +277,14 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 func (op *Operation) regShape() (string, error) {
 	_, _, _, _, _, _, gOp, _ := op.shape()
 	var regInfo string
-	var vRegInCnt, kMaskInCnt, vRegOutCnt, kMaskOutCnt int
+	var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int
 	for _, in := range gOp.In {
 		if in.Class == "vreg" {
-			vRegInCnt++
+			if *in.Lanes == 1 {
+				gRegInCnt++
+			} else {
+				vRegInCnt++
+			}
 		} else if in.Class == "mask" {
 			kMaskInCnt++
 		}
@@ -288,29 +292,41 @@ func (op *Operation) regShape() (string, error) {
 	for _, out := range gOp.Out {
 		// If class overwrite is happening, that's not really a mask but a vreg.
 		if out.Class == "vreg" || out.OverwriteClass != nil {
-			vRegOutCnt++
+			if out.Lanes != nil && *out.Lanes == 1 {
+				gRegOutCnt++
+			} else {
+				vRegOutCnt++
+			}
 		} else if out.Class == "mask" {
 			kMaskOutCnt++
 		}
 	}
-	var vRegInS, kMaskInS, vRegOutS, kMaskOutS string
+	var inRegs, inMasks, outRegs, outMasks string
 	if vRegInCnt > 0 {
-		vRegInS = fmt.Sprintf("fp%d", vRegInCnt)
+		inRegs = fmt.Sprintf("fp%d", vRegInCnt)
+	}
+	if gRegInCnt > 0 {
+		inRegs += fmt.Sprintf("gp%d", gRegInCnt)
 	}
 	if kMaskInCnt > 0 {
-		kMaskInS = fmt.Sprintf("k%d", kMaskInCnt)
+		inMasks = fmt.Sprintf("k%d", kMaskInCnt)
 	}
 	if vRegOutCnt > 0 {
-		vRegOutS = fmt.Sprintf("fp%d", vRegOutCnt)
+		outRegs = fmt.Sprintf("fp%d", vRegOutCnt)
+	}
+	if gRegOutCnt > 0 {
+		outRegs += fmt.Sprintf("gp%d", gRegOutCnt)
 	}
 	if kMaskOutCnt > 0 {
-		kMaskOutS = fmt.Sprintf("k%d", kMaskOutCnt)
+		outMasks = fmt.Sprintf("k%d", kMaskOutCnt)
 	}
-	if kMaskInCnt == 0 && kMaskOutCnt == 0 {
+	if kMaskInCnt == 0 && kMaskOutCnt == 0 && gRegInCnt == 0 && gRegOutCnt == 0 {
 		// For pure fp we can abbreviate it as fp%d%d.
 		regInfo = fmt.Sprintf("fp%d%d", vRegInCnt, vRegOutCnt)
+	} else if kMaskInCnt == 0 && kMaskOutCnt == 0 {
+		regInfo = fmt.Sprintf("%s%s", inRegs, outRegs)
 	} else {
-		regInfo = fmt.Sprintf("%s%s%s%s", vRegInS, kMaskInS, vRegOutS, kMaskOutS)
+		regInfo = fmt.Sprintf("%s%s%s%s", inRegs, inMasks, outRegs, outMasks)
 	}
 	return regInfo, nil
 }
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 20bd9d57..e36fc350 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -734,6 +734,19 @@
   asm: "V?MINP[SD]"
   in: *1mask2float
   out: *1float
+- go: SetElem
+  asm: "VPINSR[BWDQ]"
+  in:
+  - &t
+    class: vreg
+    base: $b
+  - class: vreg
+    base: $b
+    lanes: 1 # Scalar, darn it!
+  - class: immediate
+    immOffset: 0
+  out:
+  - *t
 # "Normal" multiplication is only available for floats.
 # This only covers the single and double precision.
 - go: Mul
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
new file mode 100644
index 00000000..26a1aa7d
--- /dev/null
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -0,0 +1,6 @@
+!sum
+- go: SetElem
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // SetElem sets a single constant-indexed element's value
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
new file mode 100644
index 00000000..f015395e
--- /dev/null
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -0,0 +1,14 @@
+!sum
+- go: SetElem
+  asm: "VPINSR[BWDQ]"
+  in:
+  - &t
+    class: vreg
+    base: $b
+  - class: vreg
+    base: $b
+    lanes: 1 # Scalar, darn it!
+  - class: immediate
+    immOffset: 0
+  out:
+  - *t

From 738b605eb97c90d324cb7b39f287714fa13ddf4b Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 20 Jun 2025 17:11:30 -0400
Subject: [PATCH 100/200] internal/simdgen: make simd regmask naming more like
 existing conventions

Paired with dev.simd CL 683115

Change-Id: Ic1e8332480dbd5d7858912c603d35ec17032239e
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682937
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdMachineOps.go |  4 +--
 internal/simdgen/gen_simdssa.go        | 20 +++++++-------
 internal/simdgen/gen_utility.go        | 36 ++++++++++++++------------
 3 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index f251e2e6..a5ab8f27 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -13,7 +13,7 @@ import (
 const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 package main
 
-func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1, fp1gp1fp1 regInfo) []opData {
+func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp regInfo) []opData {
 	return []opData{
 {{- range .OpsData }}
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
@@ -46,7 +46,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 		OpsDataImm []opData
 	}
 	seen := map[string]struct{}{}
-	regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k1": true, "fp2k1fp1": true, "fp2k1k1": true, "fp1k1fp1": true, "fp31": true, "fp3k1fp1": true, "fp1gp1fp1": true}
+	regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k": true, "fp2kfp": true, "fp2kk": true, "fpkfp": true, "fp31": true, "fp3kfp": true, "fpgpfp": true}
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index a7305c9b..dc121507 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -62,20 +62,20 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 	regInfoKeys := []string{
 		"fp11",
 		"fp21",
-		"fp2k1",
-		"fp2k1fp1",
-		"fp2k1k1",
-		"fp1k1fp1",
+		"fp2k",
+		"fp2kfp",
+		"fp2kk",
+		"fpkfp",
 		"fp31",
-		"fp3k1fp1",
+		"fp3kfp",
 		"fp11Imm8",
-		"fp1k1fp1Imm8",
+		"fpkfpImm8",
 		"fp21Imm8",
-		"fp2k1Imm8",
-		"fp2k1k1Imm8",
+		"fp2kImm8",
+		"fp2kkImm8",
 		"fp31ResultInArg0",
-		"fp3k1fp1ResultInArg0",
-		"fp1gp1fp1Imm8",
+		"fp3kfpResultInArg0",
+		"fpgpfpImm8",
 	}
 	regInfoSet := map[string][]string{}
 	for _, key := range regInfoKeys {
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index eb9e82b6..b1ff4347 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -302,24 +302,26 @@ func (op *Operation) regShape() (string, error) {
 		}
 	}
 	var inRegs, inMasks, outRegs, outMasks string
-	if vRegInCnt > 0 {
-		inRegs = fmt.Sprintf("fp%d", vRegInCnt)
-	}
-	if gRegInCnt > 0 {
-		inRegs += fmt.Sprintf("gp%d", gRegInCnt)
-	}
-	if kMaskInCnt > 0 {
-		inMasks = fmt.Sprintf("k%d", kMaskInCnt)
-	}
-	if vRegOutCnt > 0 {
-		outRegs = fmt.Sprintf("fp%d", vRegOutCnt)
-	}
-	if gRegOutCnt > 0 {
-		outRegs += fmt.Sprintf("gp%d", gRegOutCnt)
-	}
-	if kMaskOutCnt > 0 {
-		outMasks = fmt.Sprintf("k%d", kMaskOutCnt)
+
+	rmAbbrev := func(s string, i int) string {
+		if i == 0 {
+			return ""
+		}
+		if i == 1 {
+			return s
+		}
+		return fmt.Sprintf("%s%d", s, i)
+
 	}
+
+	inRegs = rmAbbrev("fp", vRegInCnt)
+	inRegs += rmAbbrev("gp", gRegInCnt)
+	inMasks = rmAbbrev("k", kMaskInCnt)
+
+	outRegs = rmAbbrev("fp", vRegOutCnt)
+	outRegs += rmAbbrev("gp", gRegOutCnt)
+	outMasks = rmAbbrev("k", kMaskOutCnt)
+
 	if kMaskInCnt == 0 && kMaskOutCnt == 0 && gRegInCnt == 0 && gRegOutCnt == 0 {
 		// For pure fp we can abbreviate it as fp%d%d.
 		regInfo = fmt.Sprintf("fp%d%d", vRegInCnt, vRegOutCnt)

From 71e8be6bb281f5a0612bc03c64d1711b4f70deef Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 23 Jun 2025 11:05:30 -0400
Subject: [PATCH 101/200] internal/simdgen: fix priority

This reorders so that the order numbers
ascending and the comparision is "<", which
conforms to comparison function conventions
and will allow attending another operand class
(memory) without counting down into negative
numbers.

Change-Id: Ib8d229dd68c018c072f29ebd02424868004aa94b
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683335
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_utility.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index b1ff4347..24d42106 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -338,12 +338,12 @@ func (op *Operation) regShape() (string, error) {
 // from my observation looks like in asm, imms are always the first,
 // masks are always the last, with vreg in between.
 func (op *Operation) sortOperand() {
-	priority := map[string]int{"immediate": 2, "vreg": 1, "mask": 0}
+	priority := map[string]int{"immediate": 0, "vreg": 1, "mask": 2}
 	sort.SliceStable(op.In, func(i, j int) bool {
 		pi := priority[op.In[i].Class]
 		pj := priority[op.In[j].Class]
 		if pi != pj {
-			return pi > pj
+			return pi < pj
 		}
 		return op.In[i].AsmPos < op.In[j].AsmPos
 	})

From 2389045c982e0529eff298f84c7de93d1c47350f Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 23 Jun 2025 11:58:09 -0400
Subject: [PATCH 102/200] internal/arch: add separate "greg" operand class

For operations with scalar operands

Change-Id: I4849e6aec623787b07e5ebb26b053631c0ba5abe
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683375
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_utility.go    | 20 ++++------
 internal/simdgen/go.yaml           |  2 +-
 internal/simdgen/godefs.go         |  2 +-
 internal/simdgen/ops/Moves/go.yaml |  2 +-
 internal/simdgen/types.yaml        | 20 +++++-----
 internal/simdgen/xed.go            | 64 ++++++++++++++++++++++++------
 6 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 24d42106..5b3d2052 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -280,11 +280,9 @@ func (op *Operation) regShape() (string, error) {
 	var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int
 	for _, in := range gOp.In {
 		if in.Class == "vreg" {
-			if *in.Lanes == 1 {
-				gRegInCnt++
-			} else {
-				vRegInCnt++
-			}
+			vRegInCnt++
+		} else if in.Class == "greg" {
+			gRegInCnt++
 		} else if in.Class == "mask" {
 			kMaskInCnt++
 		}
@@ -292,11 +290,9 @@ func (op *Operation) regShape() (string, error) {
 	for _, out := range gOp.Out {
 		// If class overwrite is happening, that's not really a mask but a vreg.
 		if out.Class == "vreg" || out.OverwriteClass != nil {
-			if out.Lanes != nil && *out.Lanes == 1 {
-				gRegOutCnt++
-			} else {
-				vRegOutCnt++
-			}
+			vRegOutCnt++
+		} else if out.Class == "greg" {
+			gRegOutCnt++
 		} else if out.Class == "mask" {
 			kMaskOutCnt++
 		}
@@ -334,11 +330,11 @@ func (op *Operation) regShape() (string, error) {
 }
 
 // sortOperand sorts op.In by putting immediates first, then vreg, and mask the last.
-// TODO: verify that this is a safe assumption of the prog strcture.
+// TODO: verify that this is a safe assumption of the prog structure.
 // from my observation looks like in asm, imms are always the first,
 // masks are always the last, with vreg in between.
 func (op *Operation) sortOperand() {
-	priority := map[string]int{"immediate": 0, "vreg": 1, "mask": 2}
+	priority := map[string]int{"immediate": 0, "vreg": 1, "greg": 1, "mask": 2}
 	sort.SliceStable(op.In, func(i, j int) bool {
 		pi := priority[op.In[i].Class]
 		pj := priority[op.In[j].Class]
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index e36fc350..572f02ed 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -740,7 +740,7 @@
   - &t
     class: vreg
     base: $b
-  - class: vreg
+  - class: greg
     base: $b
     lanes: 1 # Scalar, darn it!
   - class: immediate
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index a8dd9791..d9d0c20f 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -109,7 +109,7 @@ func compareOperands(x, y *Operand) int {
 }
 
 type Operand struct {
-	Class string // One of "mask", "immediate", "vreg" and "mem"
+	Class string // One of "mask", "immediate", "vreg", "greg", and "mem"
 
 	Go     *string // Go type of this operand
 	AsmPos int     // Position of this operand in the assembly instruction
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index f015395e..cdcb0ee8 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -5,7 +5,7 @@
   - &t
     class: vreg
     base: $b
-  - class: vreg
+  - class: greg
     base: $b
     lanes: 1 # Scalar, darn it!
   - class: immediate
diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml
index ec087ffd..765ae2e0 100644
--- a/internal/simdgen/types.yaml
+++ b/internal/simdgen/types.yaml
@@ -50,16 +50,16 @@ in: !repeat
   - {class: mask, go: Mask32x16,  base: "int",   elemBits: 32, bits: 512, lanes: 16}
   - {class: mask, go: Mask64x8,   base: "int",   elemBits: 64, bits: 512, lanes:  8}
 
-  - {class: vreg, go: float64,    base: "float", elemBits: 64, bits:  64, lanes:  1}
-  - {class: vreg, go: float32,    base: "float", elemBits: 32, bits:  32, lanes:  1}
-  - {class: vreg, go: int64,      base: "int",   elemBits: 64, bits:  64, lanes:  1}
-  - {class: vreg, go: int32,      base: "int",   elemBits: 32, bits:  32, lanes:  1}
-  - {class: vreg, go: int16,      base: "int",   elemBits: 16, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
-  - {class: vreg, go: int8,       base: "int",   elemBits:  8, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
-  - {class: vreg, go: uint64,     base: "uint",  elemBits: 64, bits:  64, lanes:  1}
-  - {class: vreg, go: uint32,     base: "uint",  elemBits: 32, bits:  32, lanes:  1}
-  - {class: vreg, go: uint16,     base: "uint",  elemBits: 16, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
-  - {class: vreg, go: uint8,      base: "uint",  elemBits:  8, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
+  - {class: greg, go: float64,    base: "float", elemBits: 64, bits:  64, lanes:  1}
+  - {class: greg, go: float32,    base: "float", elemBits: 32, bits:  32, lanes:  1}
+  - {class: greg, go: int64,      base: "int",   elemBits: 64, bits:  64, lanes:  1}
+  - {class: greg, go: int32,      base: "int",   elemBits: 32, bits:  32, lanes:  1}
+  - {class: greg, go: int16,      base: "int",   elemBits: 16, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
+  - {class: greg, go: int8,       base: "int",   elemBits:  8, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
+  - {class: greg, go: uint64,     base: "uint",  elemBits: 64, bits:  64, lanes:  1}
+  - {class: greg, go: uint32,     base: "uint",  elemBits: 32, bits:  32, lanes:  1}
+  - {class: greg, go: uint16,     base: "uint",  elemBits: 16, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
+  - {class: greg, go: uint8,      base: "uint",  elemBits:  8, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
 
   - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now.
 out: !repeat
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index 44360435..e46e1be4 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -16,6 +16,12 @@ import (
 	"gopkg.in/yaml.v3"
 )
 
+const (
+	NOT_REG_CLASS = 0 // not a register
+	VREG_CLASS    = 1 // classify as a vector register; see
+	GREG_CLASS    = 2 // classify as a general register
+)
+
 // TODO: Doc. Returns Values with Def domains.
 func loadXED(xedPath string) []*unify.Value {
 	// TODO: Obviously a bunch more to do here.
@@ -102,6 +108,12 @@ type operandVReg struct { // Vector register
 	elemBaseType scalarBaseType
 }
 
+type operandGReg struct { // Vector register
+	operandCommon
+	vecShape
+	elemBaseType scalarBaseType
+}
+
 // operandMask is a vector mask.
 //
 // Regardless of the actual mask representation, the [vecShape] of this operand
@@ -155,6 +167,22 @@ func (o operandVReg) toValue() (fields []string, vals []*unify.Value) {
 	return
 }
 
+func (o operandGReg) toValue() (fields []string, vals []*unify.Value) {
+	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
+	if err != nil {
+		panic("parsing baseRe: " + err.Error())
+	}
+	fields, vals = []string{"class", "bits", "base"}, []*unify.Value{
+		strVal("greg"),
+		strVal(o.bits),
+		unify.NewValue(baseDomain)}
+	if o.elemBits != o.bits {
+		fields, vals = append(fields, "elemBits"), append(vals, strVal(o.elemBits))
+	}
+	// otherwise it means the vector could be any shape.
+	return
+}
+
 func (o operandMask) toValue() (fields []string, vals []*unify.Value) {
 	return []string{"class", "elemBits", "bits"}, []*unify.Value{strVal("mask"), strVal(o.elemBits), strVal(o.bits)}
 }
@@ -211,8 +239,8 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
 				operandCommon: common,
 			}, nil
 		} else {
-			regBits, ok := decodeReg(op)
-			if !ok {
+			class, regBits := decodeReg(op)
+			if class == NOT_REG_CLASS {
 				return nil, fmt.Errorf("failed to decode register %q", operand)
 			}
 			baseType, elemBits, ok := decodeType(op)
@@ -220,11 +248,20 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
 				return nil, fmt.Errorf("failed to decode register width %q", operand)
 			}
 			shape := vecShape{elemBits: elemBits, bits: regBits}
-			return operandVReg{
+			if class == VREG_CLASS {
+				return operandVReg{
+					operandCommon: common,
+					vecShape:      shape,
+					elemBaseType:  baseType,
+				}, nil
+			}
+			// general register
+			return operandGReg{
 				operandCommon: common,
 				vecShape:      shape,
 				elemBaseType:  baseType,
 			}, nil
+
 		}
 	} else if strings.HasPrefix(lhs, "IMM") {
 		_, bits, ok := decodeType(op)
@@ -395,7 +432,10 @@ func singular[T comparable](xs []T) (T, bool) {
 	return xs[0], true
 }
 
-func decodeReg(op *xeddata.Operand) (w int, ok bool) {
+// decodeReg returns class (NOT_REG_CLASS, VREG_CLASS, GREG_CLASS),
+// and width in bits.  If the operand cannot be decided as a register,
+// then the clas is NOT_REG_CLASS.
+func decodeReg(op *xeddata.Operand) (class, width int) {
 	// op.Width tells us the total width, e.g.,:
 	//
 	//    dq => 128 bits (XMM)
@@ -408,27 +448,27 @@ func decodeReg(op *xeddata.Operand) (w int, ok bool) {
 	// Hence, we dig into the register sets themselves.
 
 	if !strings.HasPrefix(op.NameLHS(), "REG") {
-		return 0, false
+		return NOT_REG_CLASS, 0
 	}
 	// TODO: We shouldn't be relying on the macro naming conventions. We should
 	// use all-dec-patterns.txt, but xeddata doesn't support that table right now.
 	rhs := op.NameRHS()
 	if !strings.HasSuffix(rhs, "()") {
-		return 0, false
+		return NOT_REG_CLASS, 0
 	}
 	switch {
 	case strings.HasPrefix(rhs, "XMM_"):
-		return 128, true
+		return VREG_CLASS, 128
 	case strings.HasPrefix(rhs, "YMM_"):
-		return 256, true
+		return VREG_CLASS, 256
 	case strings.HasPrefix(rhs, "ZMM_"):
-		return 512, true
+		return VREG_CLASS, 512
 	case strings.HasPrefix(rhs, "GPR64_"), strings.HasPrefix(rhs, "VGPR64_"):
-		return 64, true
+		return GREG_CLASS, 64
 	case strings.HasPrefix(rhs, "GPR32_"), strings.HasPrefix(rhs, "VGPR32_"):
-		return 32, true
+		return GREG_CLASS, 32
 	}
-	return 0, false
+	return NOT_REG_CLASS, 0
 }
 
 var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`)

From 1a0b84a45921bab571f64a62d4083f44a1c7474f Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 20 Jun 2025 05:54:38 +0000
Subject: [PATCH 103/200] internal/simdgen: add test wrapper generation

Thic CL generates CL 683015.

Change-Id: Idc7e8656835942aaefdb670c0de98e07c2cde8e1
Reviewed-on: https://go-review.googlesource.com/c/arch/+/682995
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdTypes.go | 162 ++++++++++++++++++++++++++++++
 internal/simdgen/godefs.go        |   1 +
 2 files changed, 163 insertions(+)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index d5ba1267..f43cc268 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -177,6 +177,168 @@ func (x {{.Name}}) Or(y {{.Name}}) {{.Name}}
 {{end}}
 `
 
+const simdTestsWrapperTmpl = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+
+//go:build goexperiment.simd
+
+package simd_test
+
+import (
+	"simd"
+	"testing"
+)
+{{end}}
+{{define "op"}}
+func test{{.OpShape}}(t *testing.T, {{.BaseArgDefList}}, want []{{.ResBaseType}}, which string) {
+	t.Helper()
+	var gotv simd.{{.ResVecType}}
+	got := make([]{{.ResBaseType}}, len(want)){{range $i, $a := .ArgVecTypes}}
+	vec{{$i}} := simd.Load{{$a}}Slice(v{{$i}}){{end}}
+	switch which {
+{{range .Ops}}case "{{.}}":
+		gotv = vec0.{{.}}({{$.VecArgList}}){{$.OptionalMaskToInt}}
+{{end}}
+	default:
+		t.Errorf("Unknown method: {{.Arg0VecType}}.%s", which)
+	}
+	gotv.StoreSlice(got)
+	for i := range len(want) {
+        if got[i] != want[i] {
+            t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
+        }
+    }
+}
+{{end}}
+`
+
+// writeSIMDTestsWrapper generates the test wrappers and writes it to simd_amd64_testwrappers.go
+// within the specified directory.
+func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
+	t := templateOf(simdTestsWrapperTmpl, "simdTestWrappers")
+	buffer := new(bytes.Buffer)
+
+	if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil {
+		panic(fmt.Errorf("failed to execute fileHeader template: %w", err))
+	}
+
+	// The comment shows an example of Uint8x64.Add
+	type opData struct {
+		OpShape           string   // "Uint8x64Uint8x64Uint8x64"
+		BaseArgDefList    string   // "v0 uint8[], v1 uint8[]"
+		VecArgList        string   // "vec1"
+		ResBaseType       string   // "uint8"
+		ResVecType        string   // "Uint8x64"
+		Arg0VecType       string   // "Uint8x64"
+		ArgVecTypes       []string // ["Uint8x64", "Uint8x64"]
+		OptionalMaskToInt string   // ".AsInt8x64()" or ""
+		Ops               []string // ["Add", "Sub"]
+	}
+
+	opsByShape := make(map[string]opData)
+
+	for _, o := range ops {
+		_, _, _, immType, _, _, gOp, err := o.shape()
+		if err != nil {
+			panic(err)
+		}
+		if immType == VarImm || immType == ConstVarImm {
+			// Operations with variable immediates should be called directly
+			// instead of through wrappers.
+			continue
+		}
+		var shape string
+		var baseArgDefList []string
+		var vecArgList []string
+		var argVecTypes []string
+		var vec string
+		allSameVec := true
+		masked := strings.HasPrefix(gOp.Go, "Masked")
+		skippedMaskCnt := 0
+		vecCnt := 0
+		for i, in := range gOp.In {
+			baseArgDefList = append(baseArgDefList, fmt.Sprintf("v%d []%s%d", i, *in.Base, *in.ElemBits))
+			if i != 0 {
+				maskConversion := ""
+				if in.Class == "mask" {
+					maskConversion = fmt.Sprintf(".As%s()", *in.Go)
+				}
+				vecArgList = append(vecArgList, fmt.Sprintf("vec%d%s", i, maskConversion))
+			}
+			// gOp will only have either mask or vreg operand, so the following check
+			// is sufficient to detect whether it's a pure vreg or masked pure vreg operation
+			// with all the same vectors.
+			if in.Class == "mask" {
+				if masked && skippedMaskCnt == 0 {
+					skippedMaskCnt++
+				} else {
+					allSameVec = false
+				}
+			} else {
+				if len(vec) > 0 {
+					if vec != *in.Go {
+						allSameVec = false
+					}
+				}
+				vecCnt++
+				vec = *in.Go
+			}
+			shape += *in.Go
+			argVecTypes = append(argVecTypes, strings.ReplaceAll(*in.Go, "Mask", "Int"))
+		}
+		if *gOp.Out[0].Go != vec {
+			allSameVec = false
+		}
+		shape += *gOp.Out[0].Go
+		if allSameVec {
+			numToName := map[int]string{1: "Unary", 2: "Binary", 3: "Ternary"}
+			if _, ok := numToName[vecCnt]; !ok {
+				panic(fmt.Errorf("unknown shape: %s", shape))
+			}
+			shape = vec + numToName[vecCnt]
+			if masked {
+				shape = "Masked" + shape
+			}
+		}
+		optionalMaskToInt := ""
+		if gOp.Out[0].Class == "mask" {
+			optionalMaskToInt = fmt.Sprintf(".As%s()", strings.ReplaceAll(*gOp.Out[0].Go, "Mask", "Int"))
+		}
+		if _, ok := opsByShape[shape]; !ok {
+			opsByShape[shape] = opData{
+				OpShape:           shape,
+				BaseArgDefList:    strings.Join(baseArgDefList, ", "),
+				VecArgList:        strings.Join(vecArgList, ", "),
+				ResBaseType:       fmt.Sprintf("%s%d", *gOp.Out[0].Base, *gOp.Out[0].ElemBits),
+				ResVecType:        strings.ReplaceAll(*gOp.Out[0].Go, "Mask", "Int"),
+				Arg0VecType:       *gOp.In[0].Go,
+				ArgVecTypes:       argVecTypes,
+				OptionalMaskToInt: optionalMaskToInt,
+			}
+		}
+		data := opsByShape[shape]
+		data.Ops = append(data.Ops, gOp.Go)
+		opsByShape[shape] = data
+	}
+
+	compareOpData := func(x, y opData) int {
+		return strings.Compare(x.OpShape, y.OpShape)
+	}
+	data := make([]opData, 0)
+	for _, d := range opsByShape {
+		slices.SortFunc(d.Ops, strings.Compare)
+		data = append(data, d)
+	}
+	slices.SortFunc(data, compareOpData)
+
+	for _, d := range data {
+		if err := t.ExecuteTemplate(buffer, "op", d); err != nil {
+			panic(fmt.Errorf("failed to execute op template for op shape %s: %w", d.OpShape, err))
+		}
+	}
+
+	return buffer
+}
+
 // parseSIMDTypes groups go simd types by their vector sizes, and
 // returns a map whose key is the vector size, value is the simd type.
 func parseSIMDTypes(ops []Operation) simdTypeMap {
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index d9d0c20f..15cce7dd 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -187,6 +187,7 @@ func writeGoDefs(path string, cl unify.Closure) error {
 
 	formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go")
 	formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/stubs_amd64.go")
+	formatWriteAndClose(writeSIMDTestsWrapper(deduped), path, "src/"+simdPackage+"/simd_wrapped_test.go")
 	formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go")
 	formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go")
 	formatWriteAndClose(writeSIMDMachineOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go")

From 9468e52a75e4ade23b53e06a15aa72b8b97b52d5 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 23 Jun 2025 19:17:39 +0000
Subject: [PATCH 104/200] internal/simdgen: change test wrapper names

Makes ($vector [, $vector]*) => $mask operations to be under wrapper
test$(vector)(Unary|Binary|Ternary)Compare.

Change-Id: I2194053d54f38e7f55b2822ced7fc7702b34f54f
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683455
Auto-Submit: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdTypes.go | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index f43cc268..01c5b503 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -251,6 +251,7 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 		var vecArgList []string
 		var argVecTypes []string
 		var vec string
+		var vecOp Operand
 		allSameVec := true
 		masked := strings.HasPrefix(gOp.Go, "Masked")
 		skippedMaskCnt := 0
@@ -281,12 +282,24 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 				}
 				vecCnt++
 				vec = *in.Go
+				vecOp = in
 			}
 			shape += *in.Go
 			argVecTypes = append(argVecTypes, strings.ReplaceAll(*in.Go, "Mask", "Int"))
 		}
-		if *gOp.Out[0].Go != vec {
-			allSameVec = false
+		isCompare := false
+		isWiden := false
+		outOp := gOp.Out[0]
+		if *outOp.Go != vec {
+			if allSameVec && outOp.Class == "mask" && *outOp.Bits == *vecOp.Bits && *outOp.Lanes == *vecOp.Lanes {
+				isCompare = true
+			}
+			if allSameVec && outOp.Class == "vreg" && *outOp.Bits == *vecOp.Bits && *outOp.Base == *vecOp.Base && *outOp.Lanes == *vecOp.Lanes/2 {
+				isWiden = true
+			}
+			if !isCompare && !isWiden {
+				allSameVec = false
+			}
 		}
 		shape += *gOp.Out[0].Go
 		if allSameVec {
@@ -296,7 +309,17 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 			}
 			shape = vec + numToName[vecCnt]
 			if masked {
-				shape = "Masked" + shape
+				shape += "Masked"
+			}
+			if isCompare {
+				if vecCnt == 2 {
+					// Remove "Binary"
+					shape = strings.ReplaceAll(shape, "Binary", "")
+				}
+				shape += "Compare"
+			}
+			if isWiden {
+				shape += "Widen"
 			}
 		}
 		optionalMaskToInt := ""

From d0b1dcac0887bbb854c914107d92d9b37b46c94a Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 23 Jun 2025 15:50:46 -0400
Subject: [PATCH 105/200] internal/simdgen: corrected type size confusion

mistakes were made through the magic of unification.
This seems to set the scalar type sizes in a way that
works better.

Before, weirdly, 32-bit scalars were claimed to have
only 8 bits of width.  This seems to make that right.

Change-Id: Ia89261e80e1529c757e6e26ca337523f76244a18
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683495
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdTypes.go          |  2 +-
 internal/simdgen/ops/Moves/categories.yaml |  2 +-
 internal/simdgen/types.yaml                | 20 ++++++++++----------
 internal/simdgen/xed.go                    |  2 ++
 4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 01c5b503..bf238f58 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -369,7 +369,7 @@ func parseSIMDTypes(ops []Operation) simdTypeMap {
 	ret := map[int][]simdType{}
 	seen := map[string]struct{}{}
 	processArg := func(arg Operand) {
-		if arg.Class == "immediate" {
+		if arg.Class == "immediate" || arg.Class == "greg" {
 			// Immediates are not encoded as vector types.
 			return
 		}
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index 26a1aa7d..9cf443a2 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -3,4 +3,4 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SetElem sets a single constant-indexed element's value
+    // SetElem sets a single constant-indexed element's value.
diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml
index 765ae2e0..5178a216 100644
--- a/internal/simdgen/types.yaml
+++ b/internal/simdgen/types.yaml
@@ -50,16 +50,16 @@ in: !repeat
   - {class: mask, go: Mask32x16,  base: "int",   elemBits: 32, bits: 512, lanes: 16}
   - {class: mask, go: Mask64x8,   base: "int",   elemBits: 64, bits: 512, lanes:  8}
 
-  - {class: greg, go: float64,    base: "float", elemBits: 64, bits:  64, lanes:  1}
-  - {class: greg, go: float32,    base: "float", elemBits: 32, bits:  32, lanes:  1}
-  - {class: greg, go: int64,      base: "int",   elemBits: 64, bits:  64, lanes:  1}
-  - {class: greg, go: int32,      base: "int",   elemBits: 32, bits:  32, lanes:  1}
-  - {class: greg, go: int16,      base: "int",   elemBits: 16, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
-  - {class: greg, go: int8,       base: "int",   elemBits:  8, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
-  - {class: greg, go: uint64,     base: "uint",  elemBits: 64, bits:  64, lanes:  1}
-  - {class: greg, go: uint32,     base: "uint",  elemBits: 32, bits:  32, lanes:  1}
-  - {class: greg, go: uint16,     base: "uint",  elemBits: 16, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
-  - {class: greg, go: uint8,      base: "uint",  elemBits:  8, bits:  32, lanes:  1} # bits: 32 is from XED for at least one instruction
+  - {class: greg, go: float64,    base: "float", bits:  64, lanes:  1}
+  - {class: greg, go: float32,    base: "float", bits:  32, lanes:  1}
+  - {class: greg, go: int64,      base: "int",   bits:  64, lanes:  1}
+  - {class: greg, go: int32,      base: "int",   bits:  32, lanes:  1}
+  - {class: greg, go: int16,      base: "int",   bits:  16, lanes:  1}
+  - {class: greg, go: int8,       base: "int",   bits:   8, lanes:  1}
+  - {class: greg, go: uint64,     base: "uint",  bits:  64, lanes:  1}
+  - {class: greg, go: uint32,     base: "uint",  bits:  32, lanes:  1}
+  - {class: greg, go: uint16,     base: "uint",  bits:  16, lanes:  1}
+  - {class: greg, go: uint8,      base: "uint",  bits:   8, lanes:  1}
 
   - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now.
 out: !repeat
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index e46e1be4..1c26e1d1 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -256,6 +256,8 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
 				}, nil
 			}
 			// general register
+			m := min(shape.bits, shape.elemBits)
+			shape.bits, shape.elemBits = m, m
 			return operandGReg{
 				operandCommon: common,
 				vecShape:      shape,

From 38b7fb13370c8364f7d3350bff021e892c98c669 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 24 Jun 2025 12:31:30 -0400
Subject: [PATCH 106/200] internal/simdgen: convert return-error to panics.

Panics are more helpful for debugging, and this
is not end-user code.

Change-Id: I965acf50f0e13b3e2b71ba509195df11e6b75e63
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683855
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdGenericOps.go |  5 +-
 internal/simdgen/gen_simdMachineOps.go |  8 +--
 internal/simdgen/gen_simdTypes.go      |  8 +--
 internal/simdgen/gen_simdrules.go      |  6 +-
 internal/simdgen/gen_simdssa.go        |  6 +-
 internal/simdgen/gen_utility.go        | 83 +++++++++++++-------------
 6 files changed, 52 insertions(+), 64 deletions(-)

diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index 6f8b16b7..f34cf9a1 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -43,10 +43,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 	}
 	var opsData opData
 	for _, op := range ops {
-		_, _, _, immType, _, _, gOp, err := op.shape()
-		if err != nil {
-			panic(err)
-		}
+		_, _, _, immType, _, _, gOp := op.shape()
 		genericNames := gOp.Go + *gOp.In[0].Go
 		gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative}
 		if immType == VarImm || immType == ConstVarImm {
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index a5ab8f27..f1de3104 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -50,15 +50,15 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
-		shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape()
-		if err != nil {
-			panic(err)
-		}
+		shapeIn, shapeOut, maskType, _, _, _, gOp := op.shape()
+
 		asm := gOp.Asm
 		if maskType == OneMask {
 			asm += "Masked"
 		}
+
 		asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits)
+
 		// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
 		// one here with a name suffix "Merging". The rewrite rules will need them.
 		if _, ok := seen[asm]; ok {
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index bf238f58..b4ec0206 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -237,10 +237,8 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 	opsByShape := make(map[string]opData)
 
 	for _, o := range ops {
-		_, _, _, immType, _, _, gOp, err := o.shape()
-		if err != nil {
-			panic(err)
-		}
+		_, _, _, immType, _, _, gOp := o.shape()
+
 		if immType == VarImm || immType == ConstVarImm {
 			// Operations with variable immediates should be called directly
 			// instead of through wrappers.
@@ -504,7 +502,7 @@ func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 				fmt.Fprintf(buffer, "\n/* %s */\n", op.Go)
 			}
 			if err := t.ExecuteTemplate(buffer, s, op); err != nil {
-				panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err))
+				panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
 			}
 
 		} else {
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index 651ae382..00ef8568 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -66,10 +66,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 	var allData []tplRuleData
 
 	for _, opr := range ops {
-		opInShape, opOutShape, maskType, immType, _, _, gOp, err := opr.shape()
-		if err != nil {
-			panic(err)
-		}
+		opInShape, opOutShape, maskType, immType, _, _, gOp := opr.shape()
+
 		vregInCnt := len(gOp.In)
 		asm := gOp.Asm
 		if maskType == OneMask {
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index dc121507..2993f27a 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -86,10 +86,8 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 	allUnseen := make(map[string][]Operation)
 	for _, op := range ops {
 		asm := op.Asm
-		shapeIn, shapeOut, maskType, _, _, _, gOp, err := op.shape()
-		if err != nil {
-			panic(err)
-		}
+		shapeIn, shapeOut, maskType, _, _, _, gOp := op.shape()
+
 		if maskType == 2 {
 			asm += "Masked"
 		}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 5b3d2052..af7b984f 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -128,10 +128,9 @@ const (
 // opNoConstImmMask is op with its inputs excluding the const imm and mask.
 //
 // This function does not modify op.
-func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation, err error) {
+func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation) {
 	if len(op.Out) > 1 {
-		err = fmt.Errorf("simdgen only supports 1 output: %s", op)
-		return
+		panic(fmt.Errorf("simdgen only supports 1 output: %s", op))
 	}
 	var outputReg int
 	if len(op.Out) == 1 {
@@ -141,15 +140,13 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 		} else if op.Out[0].Class == "mask" {
 			shapeOut = OneKmaskOut
 		} else {
-			err = fmt.Errorf("simdgen only supports output of class vreg or mask: %s", op)
-			return
+			panic(fmt.Errorf("simdgen only supports output of class vreg or mask: %s", op))
 		}
 	} else {
 		shapeOut = NoOut
 		// TODO: are these only Load/Stores?
 		// We manually supported two Load and Store, are those enough?
-		err = fmt.Errorf("simdgen only supports 1 output: %s", op)
-		return
+		panic(fmt.Errorf("simdgen only supports 1 output: %s", op))
 	}
 	hasImm := false
 	maskCount := 0
@@ -160,31 +157,28 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 			if shapeOut != OneVregOutAtIn && in.AsmPos == 0 && in.Class == "vreg" {
 				shapeOut = OneVregOutAtIn
 			} else {
-				err = fmt.Errorf("simdgen only support output and input sharing the same position case of \"the first input is vreg and the only output\": %s", op)
-				return
+				panic(fmt.Errorf("simdgen only support output and input sharing the same position case of \"the first input is vreg and the only output\": %s", op))
 			}
 		}
 		if in.Class == "immediate" {
 			// A manual check on XED data found that AMD64 SIMD instructions at most
 			// have 1 immediates. So we don't need to check this here.
 			if *in.Bits != 8 {
-				err = fmt.Errorf("simdgen only supports immediates of 8 bits: %s", op)
-				return
+				panic(fmt.Errorf("simdgen only supports immediates of 8 bits: %s", op))
 			}
 			hasImm = true
 		} else if in.Class == "mask" {
 			if in.Const != nil {
 				if *in.Const == "K0" {
 					if iConstMask != -1 {
-						err = fmt.Errorf("simdgen only supports one const mask in inputs: %s", op)
-						return
+						panic(fmt.Errorf("simdgen only supports one const mask in inputs: %s", op))
 					}
 					iConstMask = i
 					// Const mask should be invisible in ssa and prog, so we don't treat it as a mask.
 					// More specifically in prog, it's optional: when missing the assembler will default it to K0).
 					// TODO: verify the above assumption is safe.
 				} else {
-					err = fmt.Errorf("simdgen only supports const mask K0 in inputs: %s", op)
+					panic(fmt.Errorf("simdgen only supports const mask K0 in inputs: %s", op))
 				}
 			} else {
 				maskCount++
@@ -218,8 +212,7 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 		} else if op.In[0].ImmOffset != nil {
 			immType = VarImm
 		} else {
-			err = fmt.Errorf("simdgen requires imm to have at least one of ImmOffset or Const set: %s", op)
-			return
+			panic(fmt.Errorf("simdgen requires imm to have at least one of ImmOffset or Const set: %s", op))
 		}
 	} else {
 		immType = NoImm
@@ -235,16 +228,13 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 	}
 	checkPureMask := func() bool {
 		if hasImm {
-			err = fmt.Errorf("simdgen does not support immediates in pure mask operations: %s", op)
-			return true
+			panic(fmt.Errorf("simdgen does not support immediates in pure mask operations: %s", op))
 		}
 		if iConstMask != -1 {
-			err = fmt.Errorf("simdgen does not support const mask in pure mask operations: %s", op)
-			return true
+			panic(fmt.Errorf("simdgen does not support const mask in pure mask operations: %s", op))
 		}
 		if hasVreg {
-			err = fmt.Errorf("simdgen does not support more than 1 masks in non-pure mask operations: %s", op)
-			return true
+			panic(fmt.Errorf("simdgen does not support more than 1 masks in non-pure mask operations: %s", op))
 		}
 		return false
 	}
@@ -275,7 +265,7 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 
 // regShape returns a string representation of the register shape.
 func (op *Operation) regShape() (string, error) {
-	_, _, _, _, _, _, gOp, _ := op.shape()
+	_, _, _, _, _, _, gOp := op.shape()
 	var regInfo string
 	var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int
 	for _, in := range gOp.In {
@@ -349,14 +339,27 @@ func (op Operation) ResultType() string {
 	return fmt.Sprintf("types.TypeVec%d", *op.Out[0].Bits)
 }
 
+// GoType returns the Go type returned by this operation (relative to the simd package),
+// for example "int32" or "Int8x16".  This is used in a template.
+func (op Operation) GoType() string {
+	if op.Out[0].Class == "greg" {
+		if op.Go == "GetElem" {
+			at := 0 // proper value of at depends on whether immediate was stripped or not
+			if op.In[at].Class == "immediate" {
+				at++
+			}
+			return fmt.Sprintf("%s%d", *op.Out[0].Base, *op.In[at].ElemBits)
+		}
+		panic(fmt.Errorf("Implement this case for %v", op))
+	}
+	return *op.Out[0].Go
+}
+
 // classifyOp returns a classification string, modified operation, and perhaps error based
 // on the stub and intrinsic shape for the operation.
 // The classification string is in the regular expression set "op[1234](Imm8)?"
 func classifyOp(op Operation) (string, Operation, error) {
-	_, _, _, immType, _, opNoConstMask, gOp, err := op.shape()
-	if err != nil {
-		return "", op, err
-	}
+	_, _, _, immType, _, opNoConstMask, gOp := op.shape()
 
 	if immType == VarImm || immType == ConstVarImm {
 		switch len(opNoConstMask.In) {
@@ -415,10 +418,8 @@ func splitMask(ops []Operation) ([]Operation, error) {
 		if op.Masked == nil || *op.Masked != "true" {
 			continue
 		}
-		shapeIn, _, _, _, _, _, _, err := op.shape()
-		if err != nil {
-			return nil, err
-		}
+		shapeIn, _, _, _, _, _, _ := op.shape()
+
 		if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn {
 			op2 := op
 			op2.In = slices.Clone(op.In)
@@ -447,10 +448,8 @@ func splitMask(ops []Operation) ([]Operation, error) {
 func dedupGodef(ops []Operation) ([]Operation, error) {
 	seen := map[string][]Operation{}
 	for _, op := range ops {
-		_, _, _, _, _, _, gOp, err := op.shape()
-		if err != nil {
-			return nil, err
-		}
+		_, _, _, _, _, _, gOp := op.shape()
+
 		genericNames := gOp.Go + *gOp.In[0].Go
 		seen[genericNames] = append(seen[genericNames], op)
 	}
@@ -493,10 +492,8 @@ func copyConstImm(ops []Operation) error {
 		if op.ConstImm == nil {
 			continue
 		}
-		_, _, _, immType, _, _, _, err := op.shape()
-		if err != nil {
-			return err
-		}
+		_, _, _, immType, _, _, _ := op.shape()
+
 		if immType == ConstImm || immType == ConstVarImm {
 			op.In[0].Const = op.ConstImm
 		}
@@ -527,18 +524,18 @@ func overwrite(ops []Operation) error {
 	overwrite := func(op []Operand, idx int, o Operation) error {
 		if op[idx].OverwriteClass != nil {
 			if op[idx].OverwriteBase == nil {
-				return fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx])
+				panic(fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx]))
 			}
 			oBase := *op[idx].OverwriteBase
 			oClass := *op[idx].OverwriteClass
 			if oClass != "mask" {
-				return fmt.Errorf("simdgen: [Class] overwrite only supports overwritting to mask: %s", op[idx])
+				panic(fmt.Errorf("simdgen: [Class] overwrite only supports overwritting to mask: %s", op[idx]))
 			}
 			if oBase != "int" {
-				return fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to int: %s", op[idx])
+				panic(fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to int: %s", op[idx]))
 			}
 			if op[idx].Class != "vreg" {
-				return fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx])
+				panic(fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx]))
 			}
 			hasClassOverwrite = true
 			*op[idx].Base = oBase

From 9a776a7966febb277a303d90f42e8ef642c54207 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 24 Jun 2025 15:10:04 -0400
Subject: [PATCH 107/200] internal/simdgen: mute the not-normally-useful errors
 from XED

All they do is blow context off the screen

Change-Id: Id1b4d9cd487f568a161d9353b3c04d5e6630a115
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683857
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/godefs.go | 21 ++++++++++++++++-----
 internal/simdgen/main.go   | 10 ++++++++++
 internal/simdgen/xed.go    |  7 ++++++-
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 15cce7dd..8dc928f8 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -159,30 +159,41 @@ func writeGoDefs(path string, cl unify.Closure) error {
 	// The parsed XED data might contain duplicates, like
 	// 512 bits VPADDP.
 	deduped := dedup(ops)
-	log.Printf("dedup len: %d\n", len(ops))
+
+	if *Verbose {
+		log.Printf("dedup len: %d\n", len(ops))
+	}
 	var err error
 	if err = overwrite(deduped); err != nil {
 		return err
 	}
-	log.Printf("dedup len: %d\n", len(deduped))
+	if *Verbose {
+		log.Printf("dedup len: %d\n", len(deduped))
+	}
 	if !*FlagNoSplitMask {
 		if deduped, err = splitMask(deduped); err != nil {
 			return err
 		}
 	}
-	log.Printf("dedup len: %d\n", len(deduped))
+	if *Verbose {
+		log.Printf("dedup len: %d\n", len(deduped))
+	}
 	if !*FlagNoDedup {
 		if deduped, err = dedupGodef(deduped); err != nil {
 			return err
 		}
 	}
-	log.Printf("dedup len: %d\n", len(deduped))
+	if *Verbose {
+		log.Printf("dedup len: %d\n", len(deduped))
+	}
 	if !*FlagNoConstImmPorting {
 		if err = copyConstImm(deduped); err != nil {
 			return err
 		}
 	}
-	log.Printf("dedup len: %d\n", len(deduped))
+	if *Verbose {
+		log.Printf("dedup len: %d\n", len(deduped))
+	}
 	typeMap := parseSIMDTypes(deduped)
 
 	formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go")
diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index f1c9dc8b..db77d8c3 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -111,6 +111,8 @@ var (
 	FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand")
 	FlagArch              = flag.String("arch", "amd64", "the target architecture")
 
+	Verbose = flag.Bool("v", false, "verbose")
+
 	flagDebugXED   = flag.Bool("debug-xed", false, "show XED instructions")
 	flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace")
 	flagDebugHTML  = flag.String("debug-html", "", "write unification trace to `file.html`")
@@ -201,6 +203,14 @@ func main() {
 		}
 	}
 
+	if !*Verbose {
+		if operandRemarks == 0 {
+			fmt.Printf("XED decoding generated no errors, which is unusual.\n")
+		} else {
+			fmt.Printf("XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks)
+		}
+	}
+
 	// Validate results.
 	//
 	// Don't validate if this is a command-line query because that tends to
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index 1c26e1d1..387db08a 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -22,6 +22,8 @@ const (
 	GREG_CLASS    = 2 // classify as a general register
 )
 
+var operandRemarks int
+
 // TODO: Doc. Returns Values with Def domains.
 func loadXED(xedPath string) []*unify.Value {
 	// TODO: Obviously a bunch more to do here.
@@ -49,7 +51,10 @@ func loadXED(xedPath string) []*unify.Value {
 
 		ins, outs, err := decodeOperands(db, strings.Fields(inst.Operands))
 		if err != nil {
-			log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err)
+			operandRemarks++
+			if *Verbose {
+				log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err)
+			}
 			return
 		}
 		// TODO: "feature"

From 85ea620b1467dd69facea30c096c269be8aaf76a Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 24 Jun 2025 14:51:19 -0400
Subject: [PATCH 108/200] internal/simdgen: cleanups, and prep for VPEXTR*

Adding VPEXTR* requires a non-vector output, which
required some changes.

There's at least two more follow-on CLs, one that will
pair with a dev.simd glue update, and then one to add
VPEXTR*

Change-Id: I06ed9eb8b74304e39e0dc1356d726bae35295c79
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683856
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdIntrinsics.go | 16 +++++-----
 internal/simdgen/gen_simdMachineOps.go |  6 ++--
 internal/simdgen/gen_simdTypes.go      | 16 +++++-----
 internal/simdgen/gen_simdrules.go      |  5 ++-
 internal/simdgen/gen_simdssa.go        |  3 +-
 internal/simdgen/gen_utility.go        | 43 ++++++++++++++++++++------
 internal/simdgen/godefs.go             | 15 +++++++++
 7 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 14a5d41a..3fea1568 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -25,21 +25,21 @@ const simdPackage = "` + simdPackage + `"
 func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
 {{end}}
 
-{{define "op1"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64)
+{{define "op1"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64)
+{{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64)
+{{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.ResultType}}), sys.AMD64)
+{{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op3Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op3Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op4Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.ResultType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op4Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
 
 {{define "vectorConversion"}}	addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index f1de3104..4525ac85 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -46,7 +46,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 		OpsDataImm []opData
 	}
 	seen := map[string]struct{}{}
-	regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k": true, "fp2kfp": true, "fp2kk": true, "fpkfp": true, "fp31": true, "fp3kfp": true, "fpgpfp": true}
+	regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k": true, "fp2kfp": true, "fp2kk": true, "fpkfp": true, "fp31": true, "fp3kfp": true, "fpgpfp": true, "fpgp": true}
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
@@ -57,7 +57,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 			asm += "Masked"
 		}
 
-		asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits)
+		asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth())
 
 		// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
 		// one here with a name suffix "Merging". The rewrite rules will need them.
@@ -76,6 +76,8 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 		if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil {
 			// If class overwrite is happening, that's not really a mask but a vreg.
 			outType = fmt.Sprintf("Vec%d", *gOp.Out[0].Bits)
+		} else if shapeOut == OneGregOut {
+			outType = gOp.GoType() // this is a straight Go type, not a VecNNN type
 		} else if shapeOut == OneKmaskOut {
 			outType = "Mask"
 		} else {
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index b4ec0206..c5e7d2fa 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -107,56 +107,56 @@ package simd
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}() {{(index .Out 0).Go}}
+func (x {{(index .In 0).Go}}) {{.Go}}() {{.GoType}}
 {{end}}
 
 {{define "op2"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{.GoType}}
 {{end}}
 
 {{define "op3"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{.GoType}}
 {{end}}
 
 {{define "op4"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{.GoType}}
 {{end}}
 
 {{define "op1Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{(index .Out 0).Go}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{.GoType}}
 {{end}}
 
 {{define "op2Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{.GoType}}
 {{end}}
 
 {{define "op3Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{.GoType}}
 {{end}}
 
 {{define "op4Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{.GoType}}
 {{end}}
 
 {{define "vectorConversion"}}
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index 00ef8568..ad260829 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -74,7 +74,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 			asm += "Masked"
 			vregInCnt--
 		}
-		asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits)
+		asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth())
 
 		data := tplRuleData{
 			GoOp: gOp.Go,
@@ -126,6 +126,9 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 			case PureKmaskIn:
 				panic(fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations"))
 			}
+		} else if opOutShape == OneGregOut {
+			tplName = "pureVreg" // TODO this will be wrong
+			data.GoType = *gOp.In[0].Go
 		} else {
 			// OneKmaskOut case
 			data.MaskOutConvert = fmt.Sprintf("VPMOVMToVec%dx%d", *gOp.Out[0].ElemBits, *gOp.In[0].Lanes)
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index 2993f27a..35a061bf 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -76,6 +76,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 		"fp31ResultInArg0",
 		"fp3kfpResultInArg0",
 		"fpgpfpImm8",
+		"fpgpImm8",
 	}
 	regInfoSet := map[string][]string{}
 	for _, key := range regInfoKeys {
@@ -91,7 +92,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 		if maskType == 2 {
 			asm += "Masked"
 		}
-		asm = fmt.Sprintf("%s%d", asm, *gOp.Out[0].Bits)
+		asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth())
 		if _, ok := seen[asm]; ok {
 			continue
 		}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index af7b984f..83a3e982 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -72,6 +72,7 @@ const (
 	InvalidOut int = iota
 	NoOut
 	OneVregOut
+	OneGregOut
 	OneKmaskOut
 	OneVregOutAtIn
 )
@@ -137,6 +138,8 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 		outputReg = op.Out[0].AsmPos
 		if op.Out[0].Class == "vreg" {
 			shapeOut = OneVregOut
+		} else if op.Out[0].Class == "greg" {
+			shapeOut = OneGregOut
 		} else if op.Out[0].Class == "mask" {
 			shapeOut = OneKmaskOut
 		} else {
@@ -335,7 +338,36 @@ func (op *Operation) sortOperand() {
 	})
 }
 
-func (op Operation) ResultType() string {
+// goNormalType returns the Go type name for the result of an Op that
+// does not return a vector, i.e., that returns a result in a general
+// register.  Currently there's only one family of Ops in Go's simd library
+// that does this (GetElem), and so this is specialized to work for that,
+// but the problem (mismatch betwen hardware register width and Go type
+// width) seems likely to recur if there are any other cases.
+func (op Operation) goNormalType() string {
+	if op.Go == "GetElem" {
+		// GetElem returns an element of the vector into a general register
+		// but as far as the hardware is concerned, that result is either 32
+		// or 64 bits wide, no matter what the vector element width is.
+		// This is not "wrong" but it is not the right answer for Go source code.
+		// To get the Go type right, combine the base type ("int", "uint", "float"),
+		// with the input vector element width in bits (8,16,32,64).
+
+		at := 0 // proper value of at depends on whether immediate was stripped or not
+		if op.In[at].Class == "immediate" {
+			at++
+		}
+		return fmt.Sprintf("%s%d", *op.Out[0].Base, *op.In[at].ElemBits)
+	}
+	panic(fmt.Errorf("Implement goNormalType for %v", op))
+}
+
+// SSAType returns the string for the type reference in SSA generation,
+// for example in the intrinsics generating template.
+func (op Operation) SSAType() string {
+	if op.Out[0].Class == "greg" {
+		return fmt.Sprintf("types.Types[types.T%s]", strings.ToUpper(op.goNormalType()))
+	}
 	return fmt.Sprintf("types.TypeVec%d", *op.Out[0].Bits)
 }
 
@@ -343,14 +375,7 @@ func (op Operation) ResultType() string {
 // for example "int32" or "Int8x16".  This is used in a template.
 func (op Operation) GoType() string {
 	if op.Out[0].Class == "greg" {
-		if op.Go == "GetElem" {
-			at := 0 // proper value of at depends on whether immediate was stripped or not
-			if op.In[at].Class == "immediate" {
-				at++
-			}
-			return fmt.Sprintf("%s%d", *op.Out[0].Base, *op.In[at].ElemBits)
-		}
-		panic(fmt.Errorf("Implement this case for %v", op))
+		return op.goNormalType()
 	}
 	return *op.Out[0].Go
 }
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 8dc928f8..6b30dee2 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -5,6 +5,7 @@
 package main
 
 import (
+	"fmt"
 	"log"
 	"slices"
 	"strings"
@@ -32,6 +33,20 @@ type Operation struct {
 	Masked *string
 }
 
+func (o *Operation) VectorWidth() int {
+	out := o.Out[0]
+	if out.Class == "vreg" {
+		return *out.Bits
+	} else if out.Class == "greg" || out.Class == "mask" {
+		for i := range o.In {
+			if o.In[i].Class == "vreg" {
+				return *o.In[i].Bits
+			}
+		}
+	}
+	panic(fmt.Errorf("Figure out what the vector width is for %v and implement it", *o))
+}
+
 func compareStringPointers(x, y *string) int {
 	if x != nil && y != nil {
 		return strings.Compare(*x, *y)

From 8c668448498d5d2a28619f706c0f29e3e4ecff54 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 24 Jun 2025 16:21:58 -0400
Subject: [PATCH 109/200] internal/simdgen: changes to generated code, for
 VPEXTR

These changes generate dev.simd CL 683816
which should be submitted after this CL.

Change-Id: I9e26bc8aec74199d8e5dea9aca2520d455818a46
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683858
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdMachineOps.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 4525ac85..f879791d 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -13,7 +13,7 @@ import (
 const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 package main
 
-func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp regInfo) []opData {
+func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, fpgp regInfo) []opData {
 	return []opData{
 {{- range .OpsData }}
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},

From 7f641766c6c3fd78bfe5b218703d96afa7f32ebf Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 23 Jun 2025 19:33:46 +0000
Subject: [PATCH 110/200] internal/simdgen: add shift and rotate operations

Change-Id: Id593b325b4585010488e1cadc91c7f14637bc4cd
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683475
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              | 141 ++++++++++++
 internal/simdgen/gen_simdTypes.go             |  57 ++++-
 internal/simdgen/gen_simdssa.go               |  16 ++
 internal/simdgen/gen_utility.go               |  78 +++++++
 internal/simdgen/go.yaml                      | 208 +++++++++++++++++
 internal/simdgen/godefs.go                    |   6 +
 .../simdgen/ops/ShiftRotate/categories.yaml   | 142 ++++++++++++
 internal/simdgen/ops/ShiftRotate/go.yaml      | 209 ++++++++++++++++++
 8 files changed, 856 insertions(+), 1 deletion(-)
 create mode 100644 internal/simdgen/ops/ShiftRotate/categories.yaml
 create mode 100644 internal/simdgen/ops/ShiftRotate/go.yaml

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index d8081bc4..b349fc51 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -673,3 +673,144 @@
     // MaskedMulLow multiplies elements and stores the low part of the result, masked.
   docUnmasked: !string |-
     // MulLow multiplies elements and stores the low part of the result.
+- go: ShiftAllLeft
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+- go: MaskedShiftAllLeft
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+- go: ShiftAllRight
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+- go: MaskedShiftAllRight
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+- go: ShiftAllRightSignExtended
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+- go: MaskedShiftAllRightSignExtended
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+
+- go: ShiftLeft
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+- go: MaskedShiftLeft
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+- go: ShiftRight
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+- go: MaskedShiftRight
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+- go: ShiftRightSignExtended
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+- go: MaskedShiftRightSignExtended
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+
+- go: MaskedRotateAllLeft
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedRotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+- go: MaskedRotateLeft
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedRotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+- go: MaskedRotateAllRight
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedRotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+- go: MaskedRotateRight
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedRotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+
+- go: MaskedShiftAllLeftAndFillUpperFrom
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+    // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+- go: MaskedShiftAllRightAndFillUpperFrom
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+    // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+- go: MaskedShiftLeftAndFillUpperFrom
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+    // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+- go: MaskedShiftRightAndFillUpperFrom
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+    // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index c5e7d2fa..864e0b7f 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -124,6 +124,20 @@ func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{.GoType}}
 func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{.GoType}}
 {{end}}
 
+{{define "op2VecAsScalar"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}}
+{{end}}
+
+{{define "op3VecAsScalar"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}
+{{end}}
+
 {{define "op4"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
@@ -209,6 +223,11 @@ func test{{.OpShape}}(t *testing.T, {{.BaseArgDefList}}, want []{{.ResBaseType}}
     }
 }
 {{end}}
+{{define "untestedOpHeader"}}
+/* The operations below cannot be tested via wrappers, please test them directly */
+{{end}}
+{{define "untestedOp"}}
+// {{.}}{{end}}
 `
 
 // writeSIMDTestsWrapper generates the test wrappers and writes it to simd_amd64_testwrappers.go
@@ -235,15 +254,24 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 	}
 
 	opsByShape := make(map[string]opData)
-
+	opsSkipped := map[string]struct{}{}
 	for _, o := range ops {
 		_, _, _, immType, _, _, gOp := o.shape()
 
 		if immType == VarImm || immType == ConstVarImm {
 			// Operations with variable immediates should be called directly
 			// instead of through wrappers.
+			opsSkipped[o.Go] = struct{}{}
 			continue
 		}
+		if vasIdx, err := checkVecAsScalar(o); err != nil {
+			panic(err)
+		} else if vasIdx != -1 {
+			// TODO: these could be tested via wrappers, implement this.
+			opsSkipped[o.Go] = struct{}{}
+			continue
+		}
+
 		var shape string
 		var baseArgDefList []string
 		var vecArgList []string
@@ -357,6 +385,22 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 		}
 	}
 
+	if len(opsSkipped) != 0 {
+		if err := t.ExecuteTemplate(buffer, "untestedOpHeader", nil); err != nil {
+			panic(fmt.Errorf("failed to execute untestedOpHeader"))
+		}
+		opsK := []string{}
+		for k := range opsSkipped {
+			opsK = append(opsK, k)
+		}
+		slices.SortFunc(opsK, strings.Compare)
+		for _, k := range opsK {
+			if err := t.ExecuteTemplate(buffer, "untestedOp", k); err != nil {
+				panic(fmt.Errorf("failed to execute untestedOp"))
+			}
+		}
+	}
+
 	return buffer
 }
 
@@ -497,7 +541,18 @@ func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 	slices.SortFunc(ops, compareOperations)
 
 	for i, op := range ops {
+		idxVecAsScalar, err := checkVecAsScalar(op)
+		if err != nil {
+			panic(err)
+		}
 		if s, op, err := classifyOp(op); err == nil {
+			if idxVecAsScalar != -1 {
+				if s == "op2" || s == "op3" {
+					s += "VecAsScalar"
+				} else {
+					panic(fmt.Errorf("simdgen only supports op2 or op3 with TreatLikeAScalarOfSize"))
+				}
+			}
 			if i == 0 || op.Go != ops[i-1].Go {
 				fmt.Fprintf(buffer, "\n/* %s */\n", op.Go)
 			}
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index 35a061bf..ffb172a6 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -75,8 +75,11 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 		"fp2kkImm8",
 		"fp31ResultInArg0",
 		"fp3kfpResultInArg0",
+		"fpXfp",
+		"fpXkfp",
 		"fpgpfpImm8",
 		"fpgpImm8",
+		"fp2kfpImm8",
 	}
 	regInfoSet := map[string][]string{}
 	for _, key := range regInfoKeys {
@@ -113,6 +116,19 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 		if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
 			regShape += "Imm8"
 		}
+		idx, err := checkVecAsScalar(op)
+		if err != nil {
+			panic(err)
+		}
+		if idx != -1 {
+			if regShape == "fp21" {
+				regShape = "fpXfp"
+			} else if regShape == "fp2kfp" {
+				regShape = "fpXkfp"
+			} else {
+				panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regShape, op))
+			}
+		}
 		if _, ok := regInfoSet[regShape]; !ok {
 			allUnseen[regShape] = append(allUnseen[regShape], op)
 		}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 83a3e982..cba608e0 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -417,6 +417,33 @@ func classifyOp(op Operation) (string, Operation, error) {
 	}
 }
 
+func checkVecAsScalar(op Operation) (idx int, err error) {
+	idx = -1
+	sSize := 0
+	for i, o := range op.In {
+		if o.TreatLikeAScalarOfSize != nil {
+			if idx == -1 {
+				idx = i
+				sSize = *o.TreatLikeAScalarOfSize
+			} else {
+				err = fmt.Errorf("simdgen only supports one TreatLikeAScalarOfSize in the arg list: %s", op)
+				return
+			}
+		}
+	}
+	if idx >= 0 {
+		if idx != 1 {
+			err = fmt.Errorf("simdgen only supports TreatLikeAScalarOfSize at the 2nd arg of the arg list: %s", op)
+			return
+		}
+		if sSize != 8 && sSize != 16 && sSize != 32 && sSize != 64 {
+			err = fmt.Errorf("simdgen does not recognize this uint size: %d, %s", sSize, op)
+			return
+		}
+	}
+	return
+}
+
 // dedup is deduping operations in the full structure level.
 func dedup(ops []Operation) (deduped []Operation) {
 	for _, op := range ops {
@@ -607,6 +634,51 @@ func overwrite(ops []Operation) error {
 	return nil
 }
 
+// reportXEDInconsistency reports potential XED inconsistencies.
+// We can add more fields to [Operation] to enable more checks and implement it here.
+// Supported checks:
+// [NameAndSizeCheck]: NAME[BWDQ] should set the elemBits accordingly.
+// This check is useful to find inconsistencies, then we can add overwrite fields to
+// those defs to correct them manually.
+func reportXEDInconsistency(ops []Operation) error {
+	for _, o := range ops {
+		if o.NameAndSizeCheck != nil {
+			suffixSizeMap := map[byte]int{'B': 8, 'W': 16, 'D': 32, 'Q': 64}
+			checkOperand := func(opr Operand) error {
+				if opr.ElemBits == nil {
+					return fmt.Errorf("simdgen expects elemBits to be set when performing NameAndSizeCheck")
+				}
+				if v, ok := suffixSizeMap[o.Asm[len(o.Asm)-1]]; !ok {
+					return fmt.Errorf("simdgen expects asm to end with [BWDQ] when performing NameAndSizeCheck")
+				} else {
+					if v != *opr.ElemBits {
+						return fmt.Errorf("simdgen finds NameAndSizeCheck inconsistency in def: %s", o)
+					}
+				}
+				return nil
+			}
+			for _, in := range o.In {
+				if in.Class != "vreg" && in.Class != "mask" {
+					continue
+				}
+				if in.TreatLikeAScalarOfSize != nil {
+					// This is an irregular operand, don't check it.
+					continue
+				}
+				if err := checkOperand(in); err != nil {
+					return err
+				}
+			}
+			for _, out := range o.Out {
+				if err := checkOperand(out); err != nil {
+					return err
+				}
+			}
+		}
+	}
+	return nil
+}
+
 func (o Operation) String() string {
 	var sb strings.Builder
 	sb.WriteString("Operation {\n")
@@ -719,6 +791,12 @@ func (op Operand) String() string {
 		sb.WriteString("    OverwriteElementBits: <nil>\n")
 	}
 
+	if op.TreatLikeAScalarOfSize != nil {
+		sb.WriteString(fmt.Sprintf("    TreatLikeAScalarOfSize: %d\n", *op.TreatLikeAScalarOfSize))
+	} else {
+		sb.WriteString("    TreatLikeAScalarOfSize: <nil>\n")
+	}
+
 	sb.WriteString("  }\n")
 	return sb.String()
 }
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 572f02ed..52fef3b7 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -862,3 +862,211 @@
   - *int
   out:
   - *int2
+# Integers
+# ShiftAll*
+- go: ShiftAllLeft
+  asm: "VPSLL[WDQ]"
+  in:
+  - &any
+    go: $t
+  - &vecAsScalar64
+    treatLikeAScalarOfSize: 64
+    go: Uint64x2
+  out:
+  - *any
+- go: MaskedShiftAllLeft
+  asm: "VPSLL[WDQ]"
+  in:
+  - class: mask
+  - *any
+  - *vecAsScalar64
+  out:
+  - *any
+- go: ShiftAllRight
+  asm: "VPSRL[WDQ]"
+  in:
+  - *any
+  - *vecAsScalar64
+  out:
+  - *any
+- go: MaskedShiftAllRight
+  asm: "VPSRL[WDQ]"
+  in:
+  - class: mask
+  - *any
+  - *vecAsScalar64
+  out:
+  - *any
+- go: ShiftAllRightSignExtended
+  asm: "VPSRA[WDQ]"
+  in:
+  - &int
+    go: $t
+    base: int
+  - *vecAsScalar64
+  out:
+  - *int
+- go: MaskedShiftAllRightSignExtended
+  asm: "VPSRA[WDQ]"
+  in:
+  - class: mask
+  - *int
+  - *vecAsScalar64
+  out:
+  - *int
+
+# Shift* (variable)
+- go: ShiftLeft
+  asm: "VPSLLV[WD]"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedShiftLeft
+  asm: "VPSLLV[WD]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+# XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite
+# it to 64.
+- go: ShiftLeft
+  asm: "VPSLLVQ"
+  in:
+  - &anyOverwriteElemBits
+    go: $t
+    overwriteElementBits: 64
+  - *anyOverwriteElemBits
+  out:
+  - *anyOverwriteElemBits
+- go: MaskedShiftLeft
+  asm: "VPSLLVQ"
+  in:
+  - class: mask
+  - *anyOverwriteElemBits
+  - *anyOverwriteElemBits
+  out:
+  - *anyOverwriteElemBits
+- go: ShiftRight
+  asm: "VPSRLV[WD]"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedShiftRight
+  asm: "VPSRLV[WD]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+# XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
+- go: ShiftRight
+  asm: "VPSRLVQ"
+  in:
+  - *anyOverwriteElemBits
+  - *anyOverwriteElemBits
+  out:
+  - *anyOverwriteElemBits
+- go: MaskedShiftRight
+  asm: "VPSRLVQ"
+  in:
+  - class: mask
+  - *anyOverwriteElemBits
+  - *anyOverwriteElemBits
+  out:
+  - *anyOverwriteElemBits
+- go: ShiftRightSignExtended
+  asm: "VPSRAV[WDQ]"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedShiftRightSignExtended
+  asm: "VPSRAV[WDQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+
+# Rotate
+- go: MaskedRotateAllLeft
+  asm: "VPROL[DQ]"
+  in:
+  - class: mask
+  - *any
+  - &pureImm
+    class: immediate
+    immOffset: 0
+  out:
+  - *any
+- go: MaskedRotateAllRight
+  asm: "VPROR[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *pureImm
+  out:
+  - *any
+- go: MaskedRotateLeft
+  asm: "VPROLV[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedRotateRight
+  asm: "VPRORV[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+
+# Bizzare shifts.
+- go: MaskedShiftAllLeftAndFillUpperFrom
+  asm: "VPSHLD[WDQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  - *pureImm
+  out:
+  - *any
+- go: MaskedShiftAllRightAndFillUpperFrom
+  asm: "VPSHRD[WDQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  - *pureImm
+  out:
+  - *any
+- go: MaskedShiftLeftAndFillUpperFrom
+  asm: "VPSHLDV[WDQ]"
+  in:
+  - *any
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedShiftRightAndFillUpperFrom
+  asm: "VPSHRDV[WDQ]"
+  in:
+  - *any
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 6b30dee2..1dcd48ec 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -31,6 +31,8 @@ type Operation struct {
 	// Masked indicates that this is a masked operation, this field has to be set for masked operations
 	// otherwise simdgen won't recognize it in [splitMask].
 	Masked *string
+	// NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits.
+	NameAndSizeCheck *string
 }
 
 func (o *Operation) VectorWidth() int {
@@ -140,6 +142,10 @@ type Operand struct {
 	// field of the operation.
 	ImmOffset *string
 	Lanes     *int // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1
+	// TreatLikeAScalarOfSize means only the lower $TreatLikeAScalarOfSize bits of the vector
+	// is used, so at the API level we can make it just a scalar value of this size; Then we
+	// can overwrite it to a vector of the right size during intrinsics stage.
+	TreatLikeAScalarOfSize *int
 	// If non-nil, it means the [Class] field is overwritten here, right now this is used to
 	// overwrite the results of AVX2 compares to masks.
 	OverwriteClass *string
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
new file mode 100644
index 00000000..91a0e3d0
--- /dev/null
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -0,0 +1,142 @@
+!sum
+- go: ShiftAllLeft
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+- go: MaskedShiftAllLeft
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+- go: ShiftAllRight
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+- go: MaskedShiftAllRight
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+- go: ShiftAllRightSignExtended
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+- go: MaskedShiftAllRightSignExtended
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+
+- go: ShiftLeft
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+- go: MaskedShiftLeft
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+- go: ShiftRight
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+- go: MaskedShiftRight
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+- go: ShiftRightSignExtended
+  nameAndSizeCheck: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+- go: MaskedShiftRightSignExtended
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+
+- go: MaskedRotateAllLeft
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedRotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+- go: MaskedRotateLeft
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedRotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+- go: MaskedRotateAllRight
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedRotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+- go: MaskedRotateRight
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedRotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+
+- go: MaskedShiftAllLeftAndFillUpperFrom
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+    // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+- go: MaskedShiftAllRightAndFillUpperFrom
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+    // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+- go: MaskedShiftLeftAndFillUpperFrom
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+    // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+- go: MaskedShiftRightAndFillUpperFrom
+  nameAndSizeCheck: "true"
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // MaskedShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+    // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml
new file mode 100644
index 00000000..7205bab3
--- /dev/null
+++ b/internal/simdgen/ops/ShiftRotate/go.yaml
@@ -0,0 +1,209 @@
+!sum
+# Integers
+# ShiftAll*
+- go: ShiftAllLeft
+  asm: "VPSLL[WDQ]"
+  in:
+  - &any
+    go: $t
+  - &vecAsScalar64
+    treatLikeAScalarOfSize: 64
+    go: Uint64x2
+  out:
+  - *any
+- go: MaskedShiftAllLeft
+  asm: "VPSLL[WDQ]"
+  in:
+  - class: mask
+  - *any
+  - *vecAsScalar64
+  out:
+  - *any
+- go: ShiftAllRight
+  asm: "VPSRL[WDQ]"
+  in:
+  - *any
+  - *vecAsScalar64
+  out:
+  - *any
+- go: MaskedShiftAllRight
+  asm: "VPSRL[WDQ]"
+  in:
+  - class: mask
+  - *any
+  - *vecAsScalar64
+  out:
+  - *any
+- go: ShiftAllRightSignExtended
+  asm: "VPSRA[WDQ]"
+  in:
+  - &int
+    go: $t
+    base: int
+  - *vecAsScalar64
+  out:
+  - *int
+- go: MaskedShiftAllRightSignExtended
+  asm: "VPSRA[WDQ]"
+  in:
+  - class: mask
+  - *int
+  - *vecAsScalar64
+  out:
+  - *int
+
+# Shift* (variable)
+- go: ShiftLeft
+  asm: "VPSLLV[WD]"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedShiftLeft
+  asm: "VPSLLV[WD]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+# XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite
+# it to 64.
+- go: ShiftLeft
+  asm: "VPSLLVQ"
+  in:
+  - &anyOverwriteElemBits
+    go: $t
+    overwriteElementBits: 64
+  - *anyOverwriteElemBits
+  out:
+  - *anyOverwriteElemBits
+- go: MaskedShiftLeft
+  asm: "VPSLLVQ"
+  in:
+  - class: mask
+  - *anyOverwriteElemBits
+  - *anyOverwriteElemBits
+  out:
+  - *anyOverwriteElemBits
+- go: ShiftRight
+  asm: "VPSRLV[WD]"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedShiftRight
+  asm: "VPSRLV[WD]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+# XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
+- go: ShiftRight
+  asm: "VPSRLVQ"
+  in:
+  - *anyOverwriteElemBits
+  - *anyOverwriteElemBits
+  out:
+  - *anyOverwriteElemBits
+- go: MaskedShiftRight
+  asm: "VPSRLVQ"
+  in:
+  - class: mask
+  - *anyOverwriteElemBits
+  - *anyOverwriteElemBits
+  out:
+  - *anyOverwriteElemBits
+- go: ShiftRightSignExtended
+  asm: "VPSRAV[WDQ]"
+  in:
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedShiftRightSignExtended
+  asm: "VPSRAV[WDQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+
+# Rotate
+- go: MaskedRotateAllLeft
+  asm: "VPROL[DQ]"
+  in:
+  - class: mask
+  - *any
+  - &pureImm
+    class: immediate
+    immOffset: 0
+  out:
+  - *any
+- go: MaskedRotateAllRight
+  asm: "VPROR[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *pureImm
+  out:
+  - *any
+- go: MaskedRotateLeft
+  asm: "VPROLV[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedRotateRight
+  asm: "VPRORV[DQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+
+# Bizzare shifts.
+- go: MaskedShiftAllLeftAndFillUpperFrom
+  asm: "VPSHLD[WDQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  - *pureImm
+  out:
+  - *any
+- go: MaskedShiftAllRightAndFillUpperFrom
+  asm: "VPSHRD[WDQ]"
+  in:
+  - class: mask
+  - *any
+  - *any
+  - *pureImm
+  out:
+  - *any
+- go: MaskedShiftLeftAndFillUpperFrom
+  asm: "VPSHLDV[WDQ]"
+  in:
+  - *any
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
+- go: MaskedShiftRightAndFillUpperFrom
+  asm: "VPSHRDV[WDQ]"
+  in:
+  - *any
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
\ No newline at end of file

From 11b9f365aeeaca3d5de7d4c6b56eba2d2e35f36b Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Thu, 26 Jun 2025 04:07:24 +0000
Subject: [PATCH 111/200] internal/simdgen: add galois field instructions

This CL generates CL 684175.

Change-Id: I1b327fd1d3d3aa15cd23523371f186ceef37db76
Reviewed-on: https://go-review.googlesource.com/c/arch/+/684155
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml              | 26 ++++++++++++++++
 internal/simdgen/go.yaml                      | 30 ++++++++++++++++++
 .../simdgen/ops/GaloisField/categories.yaml   | 27 ++++++++++++++++
 internal/simdgen/ops/GaloisField/go.yaml      | 31 +++++++++++++++++++
 4 files changed, 114 insertions(+)
 create mode 100644 internal/simdgen/ops/GaloisField/categories.yaml
 create mode 100644 internal/simdgen/ops/GaloisField/go.yaml

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index b349fc51..4b21d5a6 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -455,6 +455,32 @@
   extension: "AVX.*"
   documentation: !string |-
     // AddSub subtracts even elements and adds odd elements of two vectors.
+- go: MaskedGaloisFieldAffineTransform
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
+    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
+    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // corresponding to a group of 8 elements in x.
+- go: MaskedGaloisFieldAffineTransformInversed
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
+    // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
+    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
+    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // corresponding to a group of 8 elements in x.
+- go: MaskedGaloisFieldMul
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldMul computes element-wise GF(2^8) multiplication with
+    // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 - go: Average
   commutative: "true"
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 52fef3b7..2d1038da 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -431,6 +431,36 @@
   - *fp
   out:
   - *fp
+- go: MaskedGaloisFieldAffineTransform
+  asm: VGF2P8AFFINEQB
+  in: &AffineArgs
+  - class: mask
+  - &uint8
+    go: $t
+    base: uint
+  - &uint8x8
+    go: $t2
+    base: uint
+  - &pureImmVar
+    class: immediate
+    immOffset: 0
+  out:
+  - *uint8
+
+- go: MaskedGaloisFieldAffineTransformInversed
+  asm: VGF2P8AFFINEINVQB
+  in: *AffineArgs
+  out:
+  - *uint8
+
+- go: MaskedGaloisFieldMul
+  asm: VGF2P8MULB
+  in:
+  - class: mask
+  - *uint8
+  - *uint8
+  out:
+  - *uint8
 # Average (unsigned byte, unsigned word)
 # Instructions: VPAVGB, VPAVGW
 - go: Average
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
new file mode 100644
index 00000000..915d3ec1
--- /dev/null
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -0,0 +1,27 @@
+!sum
+- go: MaskedGaloisFieldAffineTransform
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
+    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
+    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // corresponding to a group of 8 elements in x.
+- go: MaskedGaloisFieldAffineTransformInversed
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
+    // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
+    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
+    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // corresponding to a group of 8 elements in x.
+- go: MaskedGaloisFieldMul
+  masked: "true"
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GaloisFieldMul computes element-wise GF(2^8) multiplication with
+    // reduction polynomial x^8 + x^4 + x^3 + x + 1.
\ No newline at end of file
diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml
new file mode 100644
index 00000000..9008ab28
--- /dev/null
+++ b/internal/simdgen/ops/GaloisField/go.yaml
@@ -0,0 +1,31 @@
+!sum
+- go: MaskedGaloisFieldAffineTransform
+  asm: VGF2P8AFFINEQB
+  in: &AffineArgs
+  - class: mask
+  - &uint8
+    go: $t
+    base: uint
+  - &uint8x8
+    go: $t2
+    base: uint
+  - &pureImmVar
+    class: immediate
+    immOffset: 0
+  out:
+  - *uint8
+
+- go: MaskedGaloisFieldAffineTransformInversed
+  asm: VGF2P8AFFINEINVQB
+  in: *AffineArgs
+  out:
+  - *uint8
+
+- go: MaskedGaloisFieldMul
+  asm: VGF2P8MULB
+  in:
+  - class: mask
+  - *uint8
+  - *uint8
+  out:
+  - *uint8
\ No newline at end of file

From 0d73a5dacbc176103b28aeb254a961af60b206c5 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 24 Jun 2025 18:28:10 -0400
Subject: [PATCH 112/200] arch/internal: add VPEXTR* instructions

This CL generates dev.simd CL 683797
and this CL should be submitted before that one.

Change-Id: I3d2e292df2bed94aeb7c710a47c5e3c99c868b58
Reviewed-on: https://go-review.googlesource.com/c/arch/+/683836
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml           |  6 ++++++
 internal/simdgen/go.yaml                   | 12 ++++++++++++
 internal/simdgen/ops/Moves/categories.yaml |  6 ++++++
 internal/simdgen/ops/Moves/go.yaml         | 12 ++++++++++++
 4 files changed, 36 insertions(+)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 4b21d5a6..bb4492ed 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -644,6 +644,12 @@
   extension: "AVX.*"
   documentation: !string |-
     // SetElem sets a single constant-indexed element's value.
+- go: GetElem
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GetElem retrieves a single constant-indexed element's value.
+
 - go: Mul
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 2d1038da..76321579 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -777,6 +777,18 @@
     immOffset: 0
   out:
   - *t
+- go: GetElem
+  asm: "VPEXTR[BWDQ]"
+  in:
+  - class: vreg
+    base: $b
+    elemBits: $e
+  - class: immediate
+    immOffset: 0
+  out:
+  - class: greg
+    base: $b
+    bits: $e
 # "Normal" multiplication is only available for floats.
 # This only covers the single and double precision.
 - go: Mul
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index 9cf443a2..d0d4a304 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -4,3 +4,9 @@
   extension: "AVX.*"
   documentation: !string |-
     // SetElem sets a single constant-indexed element's value.
+- go: GetElem
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // GetElem retrieves a single constant-indexed element's value.
+
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index cdcb0ee8..20d4a053 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -12,3 +12,15 @@
     immOffset: 0
   out:
   - *t
+- go: GetElem
+  asm: "VPEXTR[BWDQ]"
+  in:
+  - class: vreg
+    base: $b
+    elemBits: $e
+  - class: immediate
+    immOffset: 0
+  out:
+  - class: greg
+    base: $b
+    bits: $e

From 9b12b481df57f2ceb86aae447134afa1cadc7e7a Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 25 Jun 2025 15:35:23 -0400
Subject: [PATCH 113/200] internal/simdgen: modify sorting so it is prettier

int8 < int16 < int32 etc

Paired with dev.simd CL 684076
This CL should submit first.

Change-Id: I1a6e80e06eef61f99556d0da13aa9e37dfd5285a
Reviewed-on: https://go-review.googlesource.com/c/arch/+/684056
Reviewed-by: Junyang Shao <shaojunyang@google.com>
TryBot-Bypass: David Chase <drchase@google.com>
Commit-Queue: David Chase <drchase@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 internal/simdgen/gen_simdTypes.go |  8 ++--
 internal/simdgen/gen_simdrules.go |  7 ++--
 internal/simdgen/godefs.go        | 66 +++++++++++++++++++++++++++++--
 internal/simdgen/sort_test.go     | 37 +++++++++++++++++
 4 files changed, 107 insertions(+), 11 deletions(-)
 create mode 100644 internal/simdgen/sort_test.go

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 864e0b7f..a87586aa 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -25,13 +25,13 @@ type simdType struct {
 
 func compareSimdTypes(x, y simdType) int {
 	// "mask" then "vreg"
-	if c := strings.Compare(x.Type, y.Type); c != 0 {
+	if c := compareNatural(x.Type, y.Type); c != 0 {
 		return c
 	}
 	// want "flo" < "int" < "uin" (and then 8 < 16 < 32 < 64),
 	// not "int16" < "int32" < "int64" < "int8")
 	// so limit comparison to first 3 bytes in string.
-	if c := strings.Compare(x.Base[:3], y.Base[:3]); c != 0 {
+	if c := compareNatural(x.Base[:3], y.Base[:3]); c != 0 {
 		return c
 	}
 	// base type size, 8 < 16 < 32 < 64
@@ -370,11 +370,11 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 	}
 
 	compareOpData := func(x, y opData) int {
-		return strings.Compare(x.OpShape, y.OpShape)
+		return compareNatural(x.OpShape, y.OpShape)
 	}
 	data := make([]opData, 0)
 	for _, d := range opsByShape {
-		slices.SortFunc(d.Ops, strings.Compare)
+		slices.SortFunc(d.Ops, compareNatural)
 		data = append(data, d)
 	}
 	slices.SortFunc(data, compareOpData)
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index ad260829..c3686a56 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -9,7 +9,6 @@ import (
 	"fmt"
 	"io"
 	"slices"
-	"strings"
 	"text/template"
 )
 
@@ -39,13 +38,13 @@ type tplRuleData struct {
 
 func compareTplRuleData(x, y tplRuleData) int {
 	// TODO should MaskedXYZ compare just after XYZ?
-	if c := strings.Compare(x.GoOp, y.GoOp); c != 0 {
+	if c := compareNatural(x.GoOp, y.GoOp); c != 0 {
 		return c
 	}
-	if c := strings.Compare(x.GoType, y.GoType); c != 0 {
+	if c := compareNatural(x.GoType, y.GoType); c != 0 {
 		return c
 	}
-	if c := strings.Compare(x.Args, y.Args); c != 0 {
+	if c := compareNatural(x.Args, y.Args); c != 0 {
 		return c
 	}
 	return 0
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 1dcd48ec..36e2409b 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -8,6 +8,7 @@ import (
 	"fmt"
 	"log"
 	"slices"
+	"strconv"
 	"strings"
 
 	"golang.org/x/arch/internal/unify"
@@ -51,7 +52,7 @@ func (o *Operation) VectorWidth() int {
 
 func compareStringPointers(x, y *string) int {
 	if x != nil && y != nil {
-		return strings.Compare(*x, *y)
+		return compareNatural(*x, *y)
 	}
 	if x == nil && y == nil {
 		return 0
@@ -76,7 +77,7 @@ func compareIntPointers(x, y *int) int {
 }
 
 func compareOperations(x, y Operation) int {
-	if c := strings.Compare(x.Go, y.Go); c != 0 {
+	if c := compareNatural(x.Go, y.Go); c != 0 {
 		return c
 	}
 	xIn, yIn := x.In, y.In
@@ -109,7 +110,7 @@ func compareOperations(x, y Operation) int {
 }
 
 func compareOperands(x, y *Operand) int {
-	if c := strings.Compare(x.Class, y.Class); c != 0 {
+	if c := compareNatural(x.Class, y.Class); c != 0 {
 		return c
 	}
 	if x.Class == "immediate" {
@@ -158,6 +159,65 @@ type Operand struct {
 	OverwriteElementBits *int
 }
 
+// isDigit returns true if the byte is an ASCII digit.
+func isDigit(b byte) bool {
+	return b >= '0' && b <= '9'
+}
+
+// compareNatural performs a "natural sort" comparison of two strings.
+// It compares non-digit sections lexicographically and digit sections
+// numerically.  In the case of string-unequal "equal" strings like
+// "a01b" and "a1b", strings.Compare breaks the tie.
+//
+// It returns:
+//
+//	-1 if s1 < s2
+//	 0 if s1 == s2
+//	+1 if s1 > s2
+func compareNatural(s1, s2 string) int {
+	i, j := 0, 0
+	len1, len2 := len(s1), len(s2)
+
+	for i < len1 && j < len2 {
+		// Find a non-digit segment or a number segment in both strings.
+		if isDigit(s1[i]) && isDigit(s2[j]) {
+			// Number segment comparison.
+			numStart1 := i
+			for i < len1 && isDigit(s1[i]) {
+				i++
+			}
+			num1, _ := strconv.Atoi(s1[numStart1:i])
+
+			numStart2 := j
+			for j < len2 && isDigit(s2[j]) {
+				j++
+			}
+			num2, _ := strconv.Atoi(s2[numStart2:j])
+
+			if num1 < num2 {
+				return -1
+			}
+			if num1 > num2 {
+				return 1
+			}
+			// If numbers are equal, continue to the next segment.
+		} else {
+			// Non-digit comparison.
+			if s1[i] < s2[j] {
+				return -1
+			}
+			if s1[i] > s2[j] {
+				return 1
+			}
+			i++
+			j++
+		}
+	}
+
+	// deal with a01b vs a1b; there needs to be an order.
+	return strings.Compare(s1, s2)
+}
+
 func writeGoDefs(path string, cl unify.Closure) error {
 	// TODO: Merge operations with the same signature but multiple
 	// implementations (e.g., SSE vs AVX)
diff --git a/internal/simdgen/sort_test.go b/internal/simdgen/sort_test.go
new file mode 100644
index 00000000..43a9fd64
--- /dev/null
+++ b/internal/simdgen/sort_test.go
@@ -0,0 +1,37 @@
+package main
+
+import "testing"
+
+func TestSort(t *testing.T) {
+	testCases := []struct {
+		s1, s2 string
+		want   int
+	}{
+		{"a1", "a2", -1},
+		{"a11a", "a11b", -1},
+		{"a01a1", "a1a01", -1},
+		{"a2", "a1", 1},
+		{"a10", "a2", 1},
+		{"a1", "a10", -1},
+		{"z11", "z2", 1},
+		{"z2", "z11", -1},
+		{"abc", "abd", -1},
+		{"123", "45", 1},
+		{"file1", "file1", 0},
+		{"file", "file1", -1},
+		{"file1", "file", 1},
+		{"a01", "a1", -1},
+		{"a1a", "a1b", -1},
+	}
+
+	for _, tc := range testCases {
+		got := compareNatural(tc.s1, tc.s2)
+		result := "✅"
+		if got != tc.want {
+			result = "❌"
+			t.Errorf("%s CompareNatural(\"%s\", \"%s\") -> got %2d, want %2d\n", result, tc.s1, tc.s2, got, tc.want)
+		} else {
+			t.Logf("%s CompareNatural(\"%s\", \"%s\") -> got %2d, want %2d\n", result, tc.s1, tc.s2, got, tc.want)
+		}
+	}
+}

From 025062f86f69c0d424f82bdf8d1a04c78ae14a2b Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 27 Jun 2025 13:47:37 -0400
Subject: [PATCH 114/200] internal/simdgen: possible way to configure parameter
 names

includes some name+type shorthand to make templates a little less verbose.

changes appear in dev.simd CL 684775

Change-Id: I83945f681729a6d97cc8acaccb7b2f35744811d9
Reviewed-on: https://go-review.googlesource.com/c/arch/+/684655
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdTypes.go        |  16 ++--
 internal/simdgen/gen_utility.go          | 100 +++++++++++++++++++++++
 internal/simdgen/go.yaml                 |   2 +
 internal/simdgen/godefs.go               |   3 +-
 internal/simdgen/ops/GaloisField/go.yaml |   2 +
 5 files changed, 114 insertions(+), 9 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index a87586aa..45f41bbf 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -114,14 +114,14 @@ func (x {{(index .In 0).Go}}) {{.Go}}() {{.GoType}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}) {{.GoType}}
+func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
 {{end}}
 
 {{define "op3"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}) {{.GoType}}
+func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
 {{end}}
 
 {{define "op2VecAsScalar"}}
@@ -135,42 +135,42 @@ func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSi
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, z {{(index .In 2).Go}}) {{(index .Out 0).Go}}
+func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}}
 {{end}}
 
 {{define "op4"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y {{(index .In 1).Go}}, z {{(index .In 2).Go}}, u {{(index .In 3).Go}}) {{.GoType}}
+func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
 {{end}}
 
 {{define "op1Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm8 uint8) {{.GoType}}
+func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
 {{end}}
 
 {{define "op2Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}) {{.GoType}}
+func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
 {{end}}
 
 {{define "op3Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}) {{.GoType}}
+func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
 {{end}}
 
 {{define "op4Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
-func (x {{(index .In 1).Go}}) {{.Go}}(imm uint8, y {{(index .In 2).Go}}, z {{(index .In 3).Go}}, u {{(index .In 4).Go}}) {{.GoType}}
+func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
 {{end}}
 
 {{define "vectorConversion"}}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index cba608e0..5ea475f1 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -5,6 +5,7 @@
 package main
 
 import (
+	"bufio"
 	"bytes"
 	"fmt"
 	"go/format"
@@ -44,6 +45,9 @@ func createPath(goroot string, file string) (*os.File, error) {
 func formatWriteAndClose(out *bytes.Buffer, goroot string, file string) {
 	b, err := format.Source(out.Bytes())
 	if err != nil {
+		fmt.Fprintf(os.Stderr, "%v\n", err)
+		fmt.Fprintf(os.Stderr, "%s\n", numberLines(out.Bytes()))
+		fmt.Fprintf(os.Stderr, "%v\n", err)
 		panic(err)
 	} else {
 		writeAndClose(b, goroot, file)
@@ -59,6 +63,18 @@ func writeAndClose(b []byte, goroot string, file string) {
 	ofile.Close()
 }
 
+// numberLines takes a slice of bytes, and returns a string where each line
+// is numbered, starting from 1.
+func numberLines(data []byte) string {
+	var buf bytes.Buffer
+	r := bytes.NewReader(data)
+	s := bufio.NewScanner(r)
+	for i := 1; s.Scan(); i++ {
+		fmt.Fprintf(&buf, "%d: %s\n", i, s.Text())
+	}
+	return buf.String()
+}
+
 const (
 	InvalidIn int = iota
 	PureVregIn
@@ -380,6 +396,84 @@ func (op Operation) GoType() string {
 	return *op.Out[0].Go
 }
 
+// ImmName returns the name to use for an operation's immediate operand.
+// This can be overriden in the yaml with "name" on an operand,
+// otherwise, for now, it is "imm" but
+// TODO come up with a better default immediate parameter name.
+func (op Operation) ImmName() string {
+	return op.Op0Name("imm")
+}
+
+func (o Operand) OpName(s string) string {
+	if n := o.Name; n != nil {
+		return *n
+	}
+	return s
+}
+
+func (o Operand) OpNameAndType(s string) string {
+	return o.OpName(s) + " " + *o.Go
+}
+
+// Op0Name returns the name to use for the 0 operand,
+// if any is present, otherwise the parameter is used.
+func (op Operation) Op0Name(s string) string {
+	return op.In[0].OpName(s)
+}
+
+// Op1Name returns the name to use for the 1 operand,
+// if any is present, otherwise the parameter is used.
+func (op Operation) Op1Name(s string) string {
+	return op.In[1].OpName(s)
+}
+
+// Op2Name returns the name to use for the 2 operand,
+// if any is present, otherwise the parameter is used.
+func (op Operation) Op2Name(s string) string {
+	return op.In[2].OpName(s)
+}
+
+// Op3Name returns the name to use for the 3 operand,
+// if any is present, otherwise the parameter is used.
+func (op Operation) Op3Name(s string) string {
+	return op.In[3].OpName(s)
+}
+
+// Op0NameAndType returns the name and type to use for
+// the 0 operand, if a name is provided, otherwise
+// the parameter value is used as the default.
+func (op Operation) Op0NameAndType(s string) string {
+	return op.In[0].OpNameAndType(s)
+}
+
+// Op1NameAndType returns the name and type to use for
+// the 1 operand, if a name is provided, otherwise
+// the parameter value is used as the default.
+func (op Operation) Op1NameAndType(s string) string {
+	return op.In[1].OpNameAndType(s)
+}
+
+// Op2NameAndType returns the name and type to use for
+// the 2 operand, if a name is provided, otherwise
+// the parameter value is used as the default.
+func (op Operation) Op2NameAndType(s string) string {
+	return op.In[2].OpNameAndType(s)
+}
+
+// Op3NameAndType returns the name and type to use for
+// the 3 operand, if a name is provided, otherwise
+// the parameter value is used as the default.
+func (op Operation) Op3NameAndType(s string) string {
+	return op.In[3].OpNameAndType(s)
+}
+
+// Op4NameAndType returns the name and type to use for
+// the 4 operand, if a name is provided, otherwise
+// the parameter value is used as the default.
+func (op Operation) Op4NameAndType(s string) string {
+	return op.In[4].OpNameAndType(s)
+}
+
 // classifyOp returns a classification string, modified operation, and perhaps error based
 // on the stub and intrinsic shape for the operation.
 // The classification string is in the regular expression set "op[1234](Imm8)?"
@@ -773,6 +867,12 @@ func (op Operand) String() string {
 		sb.WriteString("    Lanes: <nil>\n")
 	}
 
+	if op.Name != nil {
+		sb.WriteString(fmt.Sprintf("    Name: %s\n", *op.Name))
+	} else {
+		sb.WriteString("    Name: <nil>\n")
+	}
+
 	if op.OverwriteClass != nil {
 		sb.WriteString(fmt.Sprintf("    OverwriteClass: %s\n", *op.OverwriteClass))
 	} else {
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 76321579..925cc842 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -435,6 +435,7 @@
   asm: VGF2P8AFFINEQB
   in: &AffineArgs
   - class: mask
+    name: m
   - &uint8
     go: $t
     base: uint
@@ -444,6 +445,7 @@
   - &pureImmVar
     class: immediate
     immOffset: 0
+    name: b
   out:
   - *uint8
 
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 36e2409b..d76ee58b 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -142,7 +142,8 @@ type Operand struct {
 	// The compiler will right-shift the user-passed value by ImmOffset and set it as the AuxInt
 	// field of the operation.
 	ImmOffset *string
-	Lanes     *int // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1
+	Name      *string // optional name in the Go intrinsic declaration
+	Lanes     *int    // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1
 	// TreatLikeAScalarOfSize means only the lower $TreatLikeAScalarOfSize bits of the vector
 	// is used, so at the API level we can make it just a scalar value of this size; Then we
 	// can overwrite it to a vector of the right size during intrinsics stage.
diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml
index 9008ab28..159bfb1f 100644
--- a/internal/simdgen/ops/GaloisField/go.yaml
+++ b/internal/simdgen/ops/GaloisField/go.yaml
@@ -3,6 +3,7 @@
   asm: VGF2P8AFFINEQB
   in: &AffineArgs
   - class: mask
+    name: m
   - &uint8
     go: $t
     base: uint
@@ -12,6 +13,7 @@
   - &pureImmVar
     class: immediate
     immOffset: 0
+    name: b
   out:
   - *uint8
 

From 992047bbee528297556496f56fb3d34382e3e195 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 27 Jun 2025 15:33:48 -0400
Subject: [PATCH 115/200] internal/simdgen: possible way to configure parameter
 orders

This allows specification of an ad hoc (just a name) parameter
reordering that must be recognized by the Go declaration and
intrinsic mapping templates as a suffix ("_2I" in this case)
to an existing "shape", and that will probably also need to be
paired with a new helper function in ssagen/intrinsics.go

generated and glue code changes appear in dev.simd CL 684776

Change-Id: I95e107bc5c2684c5fa5b11c05169718d5d680148
Reviewed-on: https://go-review.googlesource.com/c/arch/+/684019
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              |  4 +-
 internal/simdgen/gen_simdIntrinsics.go        |  4 ++
 internal/simdgen/gen_simdTypes.go             | 16 +++++++
 internal/simdgen/gen_utility.go               | 42 ++++++++++---------
 internal/simdgen/go.yaml                      |  2 +
 internal/simdgen/godefs.go                    |  5 ++-
 .../simdgen/ops/GaloisField/categories.yaml   |  4 +-
 internal/simdgen/ops/GaloisField/go.yaml      |  2 +
 8 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index bb4492ed..7d0c526d 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -462,7 +462,7 @@
   documentation: !string |-
     // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
 - go: MaskedGaloisFieldAffineTransformInversed
   masked: "true"
@@ -472,7 +472,7 @@
     // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
     // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
 - go: MaskedGaloisFieldMul
   masked: "true"
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 3fea1568..3c40856b 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -37,8 +37,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 {{end}}
 {{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
+{{define "op2Imm8_2I"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_2I(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{end}}
 {{define "op3Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
+{{define "op3Imm8_2I"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{end}}
 {{define "op4Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
 
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 45f41bbf..9b57e472 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -159,6 +159,14 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
 {{end}}
 
+{{define "op2Imm8_2I"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
+{{end}}
+
+
 {{define "op3Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
@@ -166,6 +174,14 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
 {{end}}
 
+{{define "op3Imm8_2I"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
+{{end}}
+
+
 {{define "op4Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 5ea475f1..a01bc0c4 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -474,40 +474,44 @@ func (op Operation) Op4NameAndType(s string) string {
 	return op.In[4].OpNameAndType(s)
 }
 
+var immClasses []string = []string{"BAD0Imm", "BAD1Imm", "op1Imm8", "op2Imm8", "op3Imm8", "op4Imm8"}
+var classes []string = []string{"BAD0", "op1", "op2", "op3", "op4"}
+
 // classifyOp returns a classification string, modified operation, and perhaps error based
 // on the stub and intrinsic shape for the operation.
-// The classification string is in the regular expression set "op[1234](Imm8)?"
+// The classification string is in the regular expression set "op[1234](Imm8)?(_<order>)?"
+// where the "<order>" suffix is optionally attached to the Operation in its input yaml.
+// The classification string is used to select a template or a clause of a template
+// for intrinsics declaration and the ssagen intrinisics glue code in the compiler.
 func classifyOp(op Operation) (string, Operation, error) {
 	_, _, _, immType, _, opNoConstMask, gOp := op.shape()
 
+	var class string
+
 	if immType == VarImm || immType == ConstVarImm {
-		switch len(opNoConstMask.In) {
+		switch l := len(opNoConstMask.In); l {
 		case 1:
 			return "", op, fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op)
-		case 2:
-			return "op1Imm8", opNoConstMask, nil
-		case 3:
-			return "op2Imm8", opNoConstMask, nil
-		case 4:
-			return "op3Imm8", opNoConstMask, nil
-		case 5:
-			return "op4Imm8", opNoConstMask, nil
+		case 2, 3, 4, 5:
+			class = immClasses[l]
 		default:
 			return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op)
 		}
+		if order := op.OperandOrder; order != nil {
+			class += "_" + *order
+		}
+		return class, opNoConstMask, nil
 	} else {
-		switch len(gOp.In) {
-		case 1:
-			return "op1", gOp, nil
-		case 2:
-			return "op2", gOp, nil
-		case 3:
-			return "op3", gOp, nil
-		case 4:
-			return "op4", gOp, nil
+		switch l := len(gOp.In); l {
+		case 1, 2, 3, 4:
+			class = classes[l]
 		default:
 			return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op)
 		}
+		if order := op.OperandOrder; order != nil {
+			class += "_" + *order
+		}
+		return class, gOp, nil
 	}
 }
 
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 925cc842..5b3aa6a0 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -433,6 +433,7 @@
   - *fp
 - go: MaskedGaloisFieldAffineTransform
   asm: VGF2P8AFFINEQB
+  operandOrder: 2I # 2nd operand, then immediate
   in: &AffineArgs
   - class: mask
     name: m
@@ -451,6 +452,7 @@
 
 - go: MaskedGaloisFieldAffineTransformInversed
   asm: VGF2P8AFFINEINVQB
+  operandOrder: 2I # 2nd operand, then immediate
   in: *AffineArgs
   out:
   - *uint8
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index d76ee58b..4ac4a9a1 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -17,8 +17,9 @@ import (
 type Operation struct {
 	Go string // Go method name
 
-	GoArch string // GOARCH for this definition
-	Asm    string // Assembly mnemonic
+	GoArch       string  // GOARCH for this definition
+	Asm          string  // Assembly mnemonic
+	OperandOrder *string // optional Operand order for better Go declarations
 
 	In            []Operand // Arguments
 	Out           []Operand // Results
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
index 915d3ec1..2515893b 100644
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -6,7 +6,7 @@
   documentation: !string |-
     // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
 - go: MaskedGaloisFieldAffineTransformInversed
   masked: "true"
@@ -16,7 +16,7 @@
     // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
     // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-    // imm is an 8-bit vector. The affine transformation is y * x + imm, with each element of y
+    // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
 - go: MaskedGaloisFieldMul
   masked: "true"
diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml
index 159bfb1f..c4d02e17 100644
--- a/internal/simdgen/ops/GaloisField/go.yaml
+++ b/internal/simdgen/ops/GaloisField/go.yaml
@@ -1,6 +1,7 @@
 !sum
 - go: MaskedGaloisFieldAffineTransform
   asm: VGF2P8AFFINEQB
+  operandOrder: 2I # 2nd operand, then immediate
   in: &AffineArgs
   - class: mask
     name: m
@@ -19,6 +20,7 @@
 
 - go: MaskedGaloisFieldAffineTransformInversed
   asm: VGF2P8AFFINEINVQB
+  operandOrder: 2I # 2nd operand, then immediate
   in: *AffineArgs
   out:
   - *uint8

From 203f3d62f49b8b27488de7216051da8a339a6cbf Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 30 Jun 2025 18:34:53 +0000
Subject: [PATCH 116/200] internal/simdgen: cleanup unnecessary APIs,
 documentations

This CL cleans up:
1. Removes instructions that supress exceptions, defaults to use MXCSR
   instead.
2. Remove "Const Immediate" from documentations.
3. Correct the documentation for masked operations.

Change-Id: Ic53db59252093ec0132e99b6f73039bcaf20a614
Reviewed-on: https://go-review.googlesource.com/c/arch/+/685035
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml              | 121 +-----------------
 internal/simdgen/go.yaml                      |   1 -
 internal/simdgen/ops/Compares/categories.yaml |  17 ---
 .../simdgen/ops/FPonlyArith/categories.yaml   |  85 ------------
 .../simdgen/ops/GaloisField/categories.yaml   |   6 +-
 internal/simdgen/ops/MLOps/categories.yaml    |   3 +-
 internal/simdgen/ops/Mul/categories.yaml      |   9 --
 internal/simdgen/ops/main.go                  |   4 -
 8 files changed, 8 insertions(+), 238 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 7d0c526d..fbfd6613 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -1,5 +1,4 @@
 !sum
-# TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes.
 - go: Add
   commutative: "true"
   extension: "AVX.*"
@@ -127,49 +126,42 @@
   extension: "AVX.*"
   documentation: !string |-
     // Equal compares for equality.
-    // Const Immediate = 0.
 - go: Less
   constImm: 1
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // Less compares for less than.
-    // Const Immediate = 1.
 - go: LessEqual
   constImm: 2
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // LessEqual compares for less than or equal.
-    // Const Immediate = 2.
 - go: IsNan # For float only.
   constImm: 3
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // IsNan checks if elements are NaN. Use as x.IsNan(x).
-    // Const Immediate = 3.
 - go: NotEqual
   constImm: 4
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // NotEqual compares for inequality.
-    // Const Immediate = 4.
 - go: GreaterEqual
   constImm: 5
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // GreaterEqual compares for greater than or equal.
-    // Const Immediate = 5.
 - go: Greater
   constImm: 6
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // Greater compares for greater than.
-    // Const Immediate = 6.
 
 - go: MaskedEqual
   constImm: 0
@@ -178,10 +170,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedEqual compares for equality, masked.
-    // Const Immediate = 0.
-  docUnmasked: !string |-
-    // Equal compares for equality.
-    // Const Immediate = 0.
 - go: MaskedLess
   constImm: 1
   masked: "true"
@@ -189,7 +177,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedLess compares for less than.
-    // Const Immediate = 1.
 - go: MaskedLessEqual
   constImm: 2
   masked: "true"
@@ -197,7 +184,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedLessEqual compares for less than or equal.
-    // Const Immediate = 2.
 - go: MaskedIsNan # For float only.
   constImm: 3
   masked: "true"
@@ -205,7 +191,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x).
-    // Const Immediate = 3.
 - go: MaskedNotEqual
   constImm: 4
   masked: "true"
@@ -213,7 +198,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedNotEqual compares for inequality.
-    // Const Immediate = 4.
 - go: MaskedGreaterEqual
   constImm: 5
   masked: "true"
@@ -221,7 +205,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedGreaterEqual compares for greater than or equal.
-    // Const Immediate = 5.
 - go: MaskedGreater
   constImm: 6
   masked: "true"
@@ -229,7 +212,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedGreater compares for greater than.
-    // Const Immediate = 6.
 - go: Div
   commutative: "false"
   extension: "AVX.*"
@@ -287,7 +269,6 @@
   constImm: 0
   documentation: !string |-
     // Round rounds elements to the nearest integer.
-    // Const Immediate = 0.
 - go: MaskedRoundWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -295,15 +276,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedRoundWithPrecision rounds elements with specified precision.
-    // Const Immediate = 0.
-- go: MaskedRoundSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 8
-  masked: "true"
-  documentation: !string |-
-    // MaskedRoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
-    // Const Immediate = 8.
 - go: MaskedDiffWithRoundWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -311,15 +283,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-    // Const Immediate = 0.
-- go: MaskedDiffWithRoundSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 8
-  masked: "true"
-  documentation: !string |-
-    // MaskedDiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-    // Const Immediate = 8.
 
 - go: Floor
   commutative: "false"
@@ -327,7 +290,6 @@
   constImm: 1
   documentation: !string |-
     // Floor rounds elements down to the nearest integer.
-    // Const Immediate = 1.
 - go: MaskedFloorWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -335,21 +297,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedFloorWithPrecision rounds elements down with specified precision, masked.
-    // Const Immediate = 1.
-  docUnmasked: !string |-
-    // FloorWithPrecision rounds elements down with specified precision.
-    // Const Immediate = 1.
-- go: MaskedFloorSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 9
-  masked: "true"
-  documentation: !string |-
-    // MaskedFloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-    // Const Immediate = 9.
-  docUnmasked: !string |-
-    // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions.
-    // Const Immediate = 9.
 - go: MaskedDiffWithFloorWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -357,15 +304,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-    // Const Immediate = 1.
-- go: MaskedDiffWithFloorSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 9
-  masked: "true"
-  documentation: !string |-
-    // MaskedDiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-    // Const Immediate = 9.
 
 - go: Ceil
   commutative: "false"
@@ -373,7 +311,6 @@
   constImm: 2
   documentation: !string |-
     // Ceil rounds elements up to the nearest integer.
-    // Const Immediate = 2.
 - go: MaskedCeilWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -381,18 +318,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedCeilWithPrecision rounds elements up with specified precision, masked.
-    // Const Immediate = 2.
-  docUnmasked: !string |-
-    // CeilWithPrecision rounds elements up with specified precision.
-    // Const Immediate = 2.
-- go: MaskedCeilSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 10
-  masked: "true"
-  documentation: !string |-
-    // MaskedCeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-    // Const Immediate = 10.
 - go: MaskedDiffWithCeilWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -400,15 +325,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-    // Const Immediate = 2.
-- go: MaskedDiffWithCeilSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 10
-  masked: "true"
-  documentation: !string |-
-    // MaskedDiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-    // Const Immediate = 10.
 
 - go: Trunc
   commutative: "false"
@@ -416,7 +332,6 @@
   constImm: 3
   documentation: !string |-
     // Trunc truncates elements towards zero.
-    // Const Immediate = 3.
 - go: MaskedTruncWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -424,15 +339,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedTruncWithPrecision truncates elements with specified precision.
-    // Const Immediate = 3.
-- go: MaskedTruncSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 11
-  masked: "true"
-  documentation: !string |-
-    // MaskedTruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
-    // Const Immediate = 11.
 - go: MaskedDiffWithTruncWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -440,15 +346,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-    // Const Immediate = 3.
-- go: MaskedDiffWithTruncSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 11
-  masked: "true"
-  documentation: !string |-
-    // MaskedDiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-    // Const Immediate = 11.
 
 - go: AddSub
   commutative: "false"
@@ -460,7 +357,7 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
+    // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
@@ -469,7 +366,7 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
+    // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8),
     // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
@@ -479,7 +376,7 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldMul computes element-wise GF(2^8) multiplication with
+    // MaskedGaloisFieldMul computes element-wise GF(2^8) multiplication with
     // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 - go: Average
   commutative: "true"
@@ -554,7 +451,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // DotProdBroadcast multiplies all elements and broadcasts the sum.
-    // Const Immediate = 127.
 - go: UnsignedSignedQuadDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
@@ -576,7 +472,7 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // MaskedSaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 - go: PairDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
@@ -677,8 +573,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedMul multiplies corresponding elements of two vectors, masked.
-  docUnmasked: !string |-
-    // Mul multiplies corresponding elements of two vectors.
 - go: MaskedMulEvenWiden
   masked: "true"
   commutative: "true"
@@ -686,25 +580,18 @@
   documentation: !string |-
     // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked.
     // Result[i] = v1.Even[i] * v2.Even[i].
-  docUnmasked: !string |-
-    // MulEvenWiden multiplies even-indexed elements, widening the result.
-    // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MaskedMulHigh
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // MaskedMulHigh multiplies elements and stores the high part of the result, masked.
-  docUnmasked: !string |-
-    // MulHigh multiplies elements and stores the high part of the result.
 - go: MaskedMulLow
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // MaskedMulLow multiplies elements and stores the low part of the result, masked.
-  docUnmasked: !string |-
-    // MulLow multiplies elements and stores the low part of the result.
 - go: ShiftAllLeft
   nameAndSizeCheck: "true"
   commutative: "false"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 5b3aa6a0..f91bafac 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -1,5 +1,4 @@
 !sum
-# TODO: remove the "Const Immediate" from the documentation field, it's there only for debug purposes.
 # Add
 - go: Add
   asm: "VPADD[BWDQ]|VADDP[SD]"
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index bd4d8c76..08b153c7 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -12,49 +12,42 @@
   extension: "AVX.*"
   documentation: !string |-
     // Equal compares for equality.
-    // Const Immediate = 0.
 - go: Less
   constImm: 1
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // Less compares for less than.
-    // Const Immediate = 1.
 - go: LessEqual
   constImm: 2
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // LessEqual compares for less than or equal.
-    // Const Immediate = 2.
 - go: IsNan # For float only.
   constImm: 3
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // IsNan checks if elements are NaN. Use as x.IsNan(x).
-    // Const Immediate = 3.
 - go: NotEqual
   constImm: 4
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // NotEqual compares for inequality.
-    // Const Immediate = 4.
 - go: GreaterEqual
   constImm: 5
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // GreaterEqual compares for greater than or equal.
-    // Const Immediate = 5.
 - go: Greater
   constImm: 6
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // Greater compares for greater than.
-    // Const Immediate = 6.
 
 - go: MaskedEqual
   constImm: 0
@@ -63,10 +56,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedEqual compares for equality, masked.
-    // Const Immediate = 0.
-  docUnmasked: !string |-
-    // Equal compares for equality.
-    // Const Immediate = 0.
 - go: MaskedLess
   constImm: 1
   masked: "true"
@@ -74,7 +63,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedLess compares for less than.
-    // Const Immediate = 1.
 - go: MaskedLessEqual
   constImm: 2
   masked: "true"
@@ -82,7 +70,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedLessEqual compares for less than or equal.
-    // Const Immediate = 2.
 - go: MaskedIsNan # For float only.
   constImm: 3
   masked: "true"
@@ -90,7 +77,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x).
-    // Const Immediate = 3.
 - go: MaskedNotEqual
   constImm: 4
   masked: "true"
@@ -98,7 +84,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedNotEqual compares for inequality.
-    // Const Immediate = 4.
 - go: MaskedGreaterEqual
   constImm: 5
   masked: "true"
@@ -106,7 +91,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedGreaterEqual compares for greater than or equal.
-    // Const Immediate = 5.
 - go: MaskedGreater
   constImm: 6
   masked: "true"
@@ -114,4 +98,3 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedGreater compares for greater than.
-    // Const Immediate = 6.
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index c00d43d6..51dfd04d 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -56,7 +56,6 @@
   constImm: 0
   documentation: !string |-
     // Round rounds elements to the nearest integer.
-    // Const Immediate = 0.
 - go: MaskedRoundWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -64,15 +63,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedRoundWithPrecision rounds elements with specified precision.
-    // Const Immediate = 0.
-- go: MaskedRoundSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 8
-  masked: "true"
-  documentation: !string |-
-    // MaskedRoundSuppressExceptionWithPrecision rounds elements with specified precision, suppressing exceptions.
-    // Const Immediate = 8.
 - go: MaskedDiffWithRoundWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -80,15 +70,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision.
-    // Const Immediate = 0.
-- go: MaskedDiffWithRoundSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 8
-  masked: "true"
-  documentation: !string |-
-    // MaskedDiffWithRoundSuppressExceptionWithPrecision computes the difference after rounding with specified precision, suppressing exceptions.
-    // Const Immediate = 8.
 
 - go: Floor
   commutative: "false"
@@ -96,7 +77,6 @@
   constImm: 1
   documentation: !string |-
     // Floor rounds elements down to the nearest integer.
-    // Const Immediate = 1.
 - go: MaskedFloorWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -104,21 +84,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedFloorWithPrecision rounds elements down with specified precision, masked.
-    // Const Immediate = 1.
-  docUnmasked: !string |-
-    // FloorWithPrecision rounds elements down with specified precision.
-    // Const Immediate = 1.
-- go: MaskedFloorSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 9
-  masked: "true"
-  documentation: !string |-
-    // MaskedFloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions, masked.
-    // Const Immediate = 9.
-  docUnmasked: !string |-
-    // FloorSuppressExceptionWithPrecision rounds elements down with specified precision, suppressing exceptions.
-    // Const Immediate = 9.
 - go: MaskedDiffWithFloorWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -126,15 +91,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision.
-    // Const Immediate = 1.
-- go: MaskedDiffWithFloorSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 9
-  masked: "true"
-  documentation: !string |-
-    // MaskedDiffWithFloorSuppressExceptionWithPrecision computes the difference after flooring with specified precision, suppressing exceptions.
-    // Const Immediate = 9.
 
 - go: Ceil
   commutative: "false"
@@ -142,7 +98,6 @@
   constImm: 2
   documentation: !string |-
     // Ceil rounds elements up to the nearest integer.
-    // Const Immediate = 2.
 - go: MaskedCeilWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -150,18 +105,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedCeilWithPrecision rounds elements up with specified precision, masked.
-    // Const Immediate = 2.
-  docUnmasked: !string |-
-    // CeilWithPrecision rounds elements up with specified precision.
-    // Const Immediate = 2.
-- go: MaskedCeilSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 10
-  masked: "true"
-  documentation: !string |-
-    // MaskedCeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
-    // Const Immediate = 10.
 - go: MaskedDiffWithCeilWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -169,15 +112,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
-    // Const Immediate = 2.
-- go: MaskedDiffWithCeilSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 10
-  masked: "true"
-  documentation: !string |-
-    // MaskedDiffWithCeilSuppressExceptionWithPrecision computes the difference after ceiling with specified precision, suppressing exceptions.
-    // Const Immediate = 10.
 
 - go: Trunc
   commutative: "false"
@@ -185,7 +119,6 @@
   constImm: 3
   documentation: !string |-
     // Trunc truncates elements towards zero.
-    // Const Immediate = 3.
 - go: MaskedTruncWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -193,15 +126,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedTruncWithPrecision truncates elements with specified precision.
-    // Const Immediate = 3.
-- go: MaskedTruncSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 11
-  masked: "true"
-  documentation: !string |-
-    // MaskedTruncSuppressExceptionWithPrecision truncates elements with specified precision, suppressing exceptions.
-    // Const Immediate = 11.
 - go: MaskedDiffWithTruncWithPrecision
   commutative: "false"
   extension: "AVX.*"
@@ -209,15 +133,6 @@
   masked: "true"
   documentation: !string |-
     // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision.
-    // Const Immediate = 3.
-- go: MaskedDiffWithTruncSuppressExceptionWithPrecision
-  commutative: "false"
-  extension: "AVX.*"
-  constImm: 11
-  masked: "true"
-  documentation: !string |-
-    // MaskedDiffWithTruncSuppressExceptionWithPrecision computes the difference after truncating with specified precision, suppressing exceptions.
-    // Const Immediate = 11.
 
 - go: AddSub
   commutative: "false"
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
index 2515893b..0b3978a4 100644
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -4,7 +4,7 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
+    // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
@@ -13,7 +13,7 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransform computes an affine transformation in GF(2^8),
+    // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8),
     // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
@@ -23,5 +23,5 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldMul computes element-wise GF(2^8) multiplication with
+    // MaskedGaloisFieldMul computes element-wise GF(2^8) multiplication with
     // reduction polynomial x^8 + x^4 + x^3 + x + 1.
\ No newline at end of file
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 6923dd37..54911b16 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -32,7 +32,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // DotProdBroadcast multiplies all elements and broadcasts the sum.
-    // Const Immediate = 127.
 - go: UnsignedSignedQuadDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
@@ -54,7 +53,7 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // MaskedSaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 - go: PairDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index def502f3..c0f87beb 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -26,8 +26,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // MaskedMul multiplies corresponding elements of two vectors, masked.
-  docUnmasked: !string |-
-    // Mul multiplies corresponding elements of two vectors.
 - go: MaskedMulEvenWiden
   masked: "true"
   commutative: "true"
@@ -35,22 +33,15 @@
   documentation: !string |-
     // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked.
     // Result[i] = v1.Even[i] * v2.Even[i].
-  docUnmasked: !string |-
-    // MulEvenWiden multiplies even-indexed elements, widening the result.
-    // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MaskedMulHigh
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // MaskedMulHigh multiplies elements and stores the high part of the result, masked.
-  docUnmasked: !string |-
-    // MulHigh multiplies elements and stores the high part of the result.
 - go: MaskedMulLow
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // MaskedMulLow multiplies elements and stores the low part of the result, masked.
-  docUnmasked: !string |-
-    // MulLow multiplies elements and stores the low part of the result.
diff --git a/internal/simdgen/ops/main.go b/internal/simdgen/ops/main.go
index c71f3ff6..7e462bf7 100644
--- a/internal/simdgen/ops/main.go
+++ b/internal/simdgen/ops/main.go
@@ -32,10 +32,6 @@ func mergeYamlFiles(targetFileName string) error {
 	if err != nil {
 		return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err)
 	}
-	_, err = writer.WriteString("# TODO: remove the \"Const Immediate\" from the documentation field, it's there only for debug purposes.\n")
-	if err != nil {
-		return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err)
-	}
 
 	entries, err := os.ReadDir(baseDir)
 	if err != nil {

From bce7f6b5c889f86561d0586349fef0bcb8d22a18 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 30 Jun 2025 14:51:58 -0400
Subject: [PATCH 117/200] arch/internal: cleanup {Operation,Operand}.String()

replaced repeated open-coded if+formatting with
a small number of local functions, consolidated all
the nil values at the end, added some missing fields.

Change-Id: I5f261ba34626f173789d4c5e138345c16e1b341c
Reviewed-on: https://go-review.googlesource.com/c/arch/+/685116
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_utility.go | 157 +++++++++++++-------------------
 1 file changed, 65 insertions(+), 92 deletions(-)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index a01bc0c4..10c5af7c 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -779,48 +779,47 @@ func reportXEDInconsistency(ops []Operation) error {
 
 func (o Operation) String() string {
 	var sb strings.Builder
+	var nils string
+
+	optStr := func(field string, ps *string) {
+		if ps != nil {
+			fmt.Fprintf(&sb, "  %s: %s\n", field, *ps)
+		} else {
+			nils += " " + field
+		}
+	}
+
+	// two spaces then field: value
+	str := func(field string, value string) {
+		fmt.Fprintf(&sb, "  %s: %s\n", field, value)
+	}
+
 	sb.WriteString("Operation {\n")
-	sb.WriteString(fmt.Sprintf("  Go: %s\n", o.Go))
-	sb.WriteString(fmt.Sprintf("  GoArch: %s\n", o.GoArch))
-	sb.WriteString(fmt.Sprintf("  Asm: %s\n", o.Asm))
+	str("Go", o.Go)
+	str("GoArch", o.GoArch)
+	str("Asm", o.Asm)
+	str("Commutative", o.Commutative)
+	str("Extension", o.Extension)
+	optStr("ConstImm", o.ConstImm)
+	optStr("Masked", o.Masked)
+	optStr("Zeroing", o.Zeroing)
+	optStr("OperandOrder", o.OperandOrder)
 
 	sb.WriteString("  In: [\n")
 	for _, op := range o.In {
-		sb.WriteString(fmt.Sprintf("    %s,\n", op.String()))
+		fmt.Fprintf(&sb, "    %s,\n", op.String())
 	}
 	sb.WriteString("  ]\n")
 
 	sb.WriteString("  Out: [\n")
 	for _, op := range o.Out {
-		sb.WriteString(fmt.Sprintf("    %s,\n", op.String()))
+		fmt.Fprintf(&sb, "    %s,\n", op.String())
 	}
 	sb.WriteString("  ]\n")
 
-	sb.WriteString(fmt.Sprintf("  Commutative: %s\n", o.Commutative))
-	sb.WriteString(fmt.Sprintf("  Extension: %s\n", o.Extension))
-
-	if o.Zeroing != nil {
-		sb.WriteString(fmt.Sprintf("  Zeroing: %s\n", *o.Zeroing))
-	} else {
-		sb.WriteString("  Zeroing: <nil>\n")
-	}
-
-	if o.Documentation != nil {
-		sb.WriteString(fmt.Sprintf("  Documentation: %s\n", *o.Documentation))
-	} else {
-		sb.WriteString("  Documentation: <nil>\n")
-	}
-
-	if o.ConstImm != nil {
-		sb.WriteString(fmt.Sprintf("  ConstImm: %s\n", *o.ConstImm))
-	} else {
-		sb.WriteString("  ConstImm: <nil>\n")
-	}
-
-	if o.Masked != nil {
-		sb.WriteString(fmt.Sprintf("  Masked: %s\n", *o.Masked))
-	} else {
-		sb.WriteString("  Masked: <nil>\n")
+	optStr("Documentation", o.Documentation)
+	if len(nils) != 0 {
+		sb.WriteString("  nils = " + nils[1:] + "\n")
 	}
 
 	sb.WriteString("}\n")
@@ -830,75 +829,49 @@ func (o Operation) String() string {
 // String returns a string representation of the Operand.
 func (op Operand) String() string {
 	var sb strings.Builder
-	sb.WriteString("Operand {\n")
-	sb.WriteString(fmt.Sprintf("    Class: %s\n", op.Class))
-
-	if op.Go != nil {
-		sb.WriteString(fmt.Sprintf("    Go: %s\n", *op.Go))
-	} else {
-		sb.WriteString("    Go: <nil>\n")
-	}
-
-	sb.WriteString(fmt.Sprintf("    AsmPos: %d\n", op.AsmPos))
-
-	if op.Base != nil {
-		sb.WriteString(fmt.Sprintf("    Base: %s\n", *op.Base))
-	} else {
-		sb.WriteString("    Base: <nil>\n")
-	}
-
-	if op.ElemBits != nil {
-		sb.WriteString(fmt.Sprintf("    ElemBits: %d\n", *op.ElemBits))
-	} else {
-		sb.WriteString("    ElemBits: <nil>\n")
-	}
-
-	if op.Bits != nil {
-		sb.WriteString(fmt.Sprintf("    Bits: %d\n", *op.Bits))
-	} else {
-		sb.WriteString("    Bits: <nil>\n")
-	}
-
-	if op.Const != nil {
-		sb.WriteString(fmt.Sprintf("    Const: %s\n", *op.Const))
-	} else {
-		sb.WriteString("    Const: <nil>\n")
-	}
-
-	if op.Lanes != nil {
-		sb.WriteString(fmt.Sprintf("    Lanes: %d\n", *op.Lanes))
-	} else {
-		sb.WriteString("    Lanes: <nil>\n")
-	}
+	var nils string
 
-	if op.Name != nil {
-		sb.WriteString(fmt.Sprintf("    Name: %s\n", *op.Name))
-	} else {
-		sb.WriteString("    Name: <nil>\n")
+	optStr := func(field string, ps *string) {
+		if ps != nil {
+			fmt.Fprintf(&sb, "    %s: %s\n", field, *ps)
+		} else {
+			nils += " " + field
+		}
 	}
 
-	if op.OverwriteClass != nil {
-		sb.WriteString(fmt.Sprintf("    OverwriteClass: %s\n", *op.OverwriteClass))
-	} else {
-		sb.WriteString("    OverwriteClass: <nil>\n")
+	optNum := func(field string, pi *int) {
+		if pi != nil {
+			fmt.Fprintf(&sb, "    %s: %d\n", field, *pi)
+		} else {
+			nils += " " + field
+		}
 	}
 
-	if op.OverwriteBase != nil {
-		sb.WriteString(fmt.Sprintf("    OverwriteBase: %s\n", *op.OverwriteBase))
-	} else {
-		sb.WriteString("    OverwriteBase: <nil>\n")
+	// four spaces then field: value
+	str := func(field string, value string) {
+		fmt.Fprintf(&sb, "    %s: %s\n", field, value)
 	}
-
-	if op.OverwriteElementBits != nil {
-		sb.WriteString(fmt.Sprintf("    OverwriteElementBits: %d\n", *op.OverwriteElementBits))
-	} else {
-		sb.WriteString("    OverwriteElementBits: <nil>\n")
+	num := func(field string, value int) {
+		fmt.Fprintf(&sb, "    %s: %d\n", field, value)
 	}
-
-	if op.TreatLikeAScalarOfSize != nil {
-		sb.WriteString(fmt.Sprintf("    TreatLikeAScalarOfSize: %d\n", *op.TreatLikeAScalarOfSize))
-	} else {
-		sb.WriteString("    TreatLikeAScalarOfSize: <nil>\n")
+	sb.WriteString("Operand {\n")
+	str("Class", op.Class)
+	optStr("Go", op.Go)
+	num("AsmPos", op.AsmPos)
+	optStr("Base", op.Base)
+	optNum("ElemBits", op.ElemBits)
+	optNum("Bits", op.Bits)
+	optStr("Const", op.Const)
+	optStr("ImmOffset", op.ImmOffset)
+	optNum("Lanes", op.Lanes)
+	optStr("Name", op.Name)
+	optNum("TreatLikeAScalarOfSize", op.TreatLikeAScalarOfSize)
+	optStr("OverwriteClass", op.OverwriteClass)
+	optStr("OverwriteBase", op.OverwriteBase)
+	optNum("OverwriteElementBits", op.OverwriteElementBits)
+
+	if len(nils) != 0 {
+		sb.WriteString("    nils = " + nils[1:] + "\n")
 	}
 
 	sb.WriteString("  }\n")

From e3aff9d88cb120ff00c221d85384ea5464912345 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 2 Jul 2025 15:07:19 -0400
Subject: [PATCH 118/200] internal/simdgen: make types order really invariant

type declarations had an input-dependent ordering.
Not as bad as map iteration, but spurious changes
could still happen.

Also tweaks the type ordering to make it slightly
"better" (masks after vectors, not before).

Generates dev.simd CL 685615

Change-Id: I77af136e8817415c1465707575a222bed9ce88be
Reviewed-on: https://go-review.googlesource.com/c/arch/+/685595
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdTypes.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 9b57e472..6aeb4ee8 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -24,8 +24,8 @@ type simdType struct {
 }
 
 func compareSimdTypes(x, y simdType) int {
-	// "mask" then "vreg"
-	if c := compareNatural(x.Type, y.Type); c != 0 {
+	// "vreg" then "mask"
+	if c := -compareNatural(x.Type, y.Type); c != 0 {
 		return c
 	}
 	// want "flo" < "int" < "uin" (and then 8 < 16 < 32 < 64),
@@ -518,7 +518,8 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 	}
 
 	sizes := make([]int, 0, len(typeMap))
-	for size := range typeMap {
+	for size, types := range typeMap {
+		slices.SortFunc(types, compareSimdTypes)
 		sizes = append(sizes, size)
 	}
 	sort.Ints(sizes)

From df255ae7a1bae09254fc506851ea829f128208ec Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 2 Jul 2025 14:54:25 -0400
Subject: [PATCH 119/200] internal/simdgen: handle K0 by deleting operand

This removes some special cases from the code.

Change-Id: I69a25446a17fd4e50f202aa323bed3a6f3bb90bc
Reviewed-on: https://go-review.googlesource.com/c/arch/+/685596
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdGenericOps.go |  2 +-
 internal/simdgen/gen_simdMachineOps.go |  2 +-
 internal/simdgen/gen_simdTypes.go      |  2 +-
 internal/simdgen/gen_simdrules.go      |  2 +-
 internal/simdgen/gen_simdssa.go        |  2 +-
 internal/simdgen/gen_utility.go        | 65 +++++++-------------------
 6 files changed, 21 insertions(+), 54 deletions(-)

diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index f34cf9a1..1be01810 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -43,7 +43,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 	}
 	var opsData opData
 	for _, op := range ops {
-		_, _, _, immType, _, _, gOp := op.shape()
+		_, _, _, immType, _, gOp := op.shape()
 		genericNames := gOp.Go + *gOp.In[0].Go
 		gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative}
 		if immType == VarImm || immType == ConstVarImm {
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index f879791d..7e4f1d1a 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -50,7 +50,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
-		shapeIn, shapeOut, maskType, _, _, _, gOp := op.shape()
+		shapeIn, shapeOut, maskType, _, _, gOp := op.shape()
 
 		asm := gOp.Asm
 		if maskType == OneMask {
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 6aeb4ee8..16286736 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -272,7 +272,7 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 	opsByShape := make(map[string]opData)
 	opsSkipped := map[string]struct{}{}
 	for _, o := range ops {
-		_, _, _, immType, _, _, gOp := o.shape()
+		_, _, _, immType, _, gOp := o.shape()
 
 		if immType == VarImm || immType == ConstVarImm {
 			// Operations with variable immediates should be called directly
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index c3686a56..6f84b912 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -65,7 +65,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 	var allData []tplRuleData
 
 	for _, opr := range ops {
-		opInShape, opOutShape, maskType, immType, _, _, gOp := opr.shape()
+		opInShape, opOutShape, maskType, immType, _, gOp := opr.shape()
 
 		vregInCnt := len(gOp.In)
 		asm := gOp.Asm
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index ffb172a6..ee30c8eb 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -90,7 +90,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 	allUnseen := make(map[string][]Operation)
 	for _, op := range ops {
 		asm := op.Asm
-		shapeIn, shapeOut, maskType, _, _, _, gOp := op.shape()
+		shapeIn, shapeOut, maskType, _, _, gOp := op.shape()
 
 		if maskType == 2 {
 			asm += "Masked"
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 10c5af7c..b9206cda 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -97,7 +97,6 @@ const (
 	InvalidMask int = iota
 	NoMask
 	OneMask
-	OneConstMask
 	AllMasks
 )
 
@@ -130,7 +129,6 @@ const (
 //			InvalidMask: unknown, with err set to the error message
 //			NoMask: no mask
 //			OneMask: with mask (K1 to K7)
-//			OneConstMask: with const mask K0
 //			AllMasks: it's a K mask instruction
 //
 //	 	immType:
@@ -145,7 +143,7 @@ const (
 // opNoConstImmMask is op with its inputs excluding the const imm and mask.
 //
 // This function does not modify op.
-func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoConstMask Operation, opNoImmConstMask Operation) {
+func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoImmConstMask Operation) {
 	if len(op.Out) > 1 {
 		panic(fmt.Errorf("simdgen only supports 1 output: %s", op))
 	}
@@ -169,9 +167,8 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 	}
 	hasImm := false
 	maskCount := 0
-	iConstMask := -1
 	hasVreg := false
-	for i, in := range op.In {
+	for _, in := range op.In {
 		if in.AsmPos == outputReg {
 			if shapeOut != OneVregOutAtIn && in.AsmPos == 0 && in.Class == "vreg" {
 				shapeOut = OneVregOutAtIn
@@ -187,35 +184,14 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 			}
 			hasImm = true
 		} else if in.Class == "mask" {
-			if in.Const != nil {
-				if *in.Const == "K0" {
-					if iConstMask != -1 {
-						panic(fmt.Errorf("simdgen only supports one const mask in inputs: %s", op))
-					}
-					iConstMask = i
-					// Const mask should be invisible in ssa and prog, so we don't treat it as a mask.
-					// More specifically in prog, it's optional: when missing the assembler will default it to K0).
-					// TODO: verify the above assumption is safe.
-				} else {
-					panic(fmt.Errorf("simdgen only supports const mask K0 in inputs: %s", op))
-				}
-			} else {
-				maskCount++
-			}
+			maskCount++
 		} else {
 			hasVreg = true
 		}
 	}
 	opNoImm = *op
-	opNoConstMask = *op
 	opNoImmConstMask = *op
-	removeConstMask := func(o *Operation) {
-		o.In = append(o.In[:iConstMask], o.In[iConstMask+1:]...)
-	}
-	if iConstMask != -1 {
-		removeConstMask(&opNoConstMask)
-		removeConstMask(&opNoImmConstMask)
-	}
+
 	removeImm := func(o *Operation) {
 		o.In = o.In[1:]
 	}
@@ -237,11 +213,7 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 		immType = NoImm
 	}
 	if maskCount == 0 {
-		if iConstMask == -1 {
-			maskType = NoMask
-		} else {
-			maskType = OneConstMask
-		}
+		maskType = NoMask
 	} else {
 		maskType = OneMask
 	}
@@ -249,9 +221,6 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 		if hasImm {
 			panic(fmt.Errorf("simdgen does not support immediates in pure mask operations: %s", op))
 		}
-		if iConstMask != -1 {
-			panic(fmt.Errorf("simdgen does not support const mask in pure mask operations: %s", op))
-		}
 		if hasVreg {
 			panic(fmt.Errorf("simdgen does not support more than 1 masks in non-pure mask operations: %s", op))
 		}
@@ -284,7 +253,7 @@ func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm
 
 // regShape returns a string representation of the register shape.
 func (op *Operation) regShape() (string, error) {
-	_, _, _, _, _, _, gOp := op.shape()
+	_, _, _, _, _, gOp := op.shape()
 	var regInfo string
 	var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int
 	for _, in := range gOp.In {
@@ -484,29 +453,29 @@ var classes []string = []string{"BAD0", "op1", "op2", "op3", "op4"}
 // The classification string is used to select a template or a clause of a template
 // for intrinsics declaration and the ssagen intrinisics glue code in the compiler.
 func classifyOp(op Operation) (string, Operation, error) {
-	_, _, _, immType, _, opNoConstMask, gOp := op.shape()
+	_, _, _, immType, _, gOp := op.shape()
 
 	var class string
 
 	if immType == VarImm || immType == ConstVarImm {
-		switch l := len(opNoConstMask.In); l {
+		switch l := len(op.In); l {
 		case 1:
 			return "", op, fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op)
 		case 2, 3, 4, 5:
 			class = immClasses[l]
 		default:
-			return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op)
+			return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(op.In), op)
 		}
 		if order := op.OperandOrder; order != nil {
 			class += "_" + *order
 		}
-		return class, opNoConstMask, nil
+		return class, op, nil
 	} else {
 		switch l := len(gOp.In); l {
 		case 1, 2, 3, 4:
 			class = classes[l]
 		default:
-			return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(opNoConstMask.In), op)
+			return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(op.In), op)
 		}
 		if order := op.OperandOrder; order != nil {
 			class += "_" + *order
@@ -568,14 +537,12 @@ func splitMask(ops []Operation) ([]Operation, error) {
 		if op.Masked == nil || *op.Masked != "true" {
 			continue
 		}
-		shapeIn, _, _, _, _, _, _ := op.shape()
+		shapeIn, _, _, _, _, _ := op.shape()
 
 		if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn {
 			op2 := op
-			op2.In = slices.Clone(op.In)
-			constMask := "K0"
-			// The ops should be sorted when calling this function, the mask is in the end.
-			op2.In[len(op2.In)-1].Const = &constMask
+			// The ops should be sorted when calling this function, the mask is in the end, drop the mask
+			op2.In = slices.Clone(op.In)[:len(op.In)-1]
 			if !strings.HasPrefix(op2.Go, "Masked") {
 				return nil, fmt.Errorf("simdgen only recognizes masked operations with name starting with 'Masked': %s", op)
 			}
@@ -598,7 +565,7 @@ func splitMask(ops []Operation) ([]Operation, error) {
 func dedupGodef(ops []Operation) ([]Operation, error) {
 	seen := map[string][]Operation{}
 	for _, op := range ops {
-		_, _, _, _, _, _, gOp := op.shape()
+		_, _, _, _, _, gOp := op.shape()
 
 		genericNames := gOp.Go + *gOp.In[0].Go
 		seen[genericNames] = append(seen[genericNames], op)
@@ -642,7 +609,7 @@ func copyConstImm(ops []Operation) error {
 		if op.ConstImm == nil {
 			continue
 		}
-		_, _, _, immType, _, _, _ := op.shape()
+		_, _, _, immType, _, _ := op.shape()
 
 		if immType == ConstImm || immType == ConstVarImm {
 			op.In[0].Const = op.ConstImm

From 19fdaf8b68a218640532eefdc430218ab2afb21b Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 25 Jun 2025 13:24:30 -0400
Subject: [PATCH 120/200] internal/simdgen: add INSERT[IF]128 instructions

Paired with dev.simd CL 684077
This CL should submit first.

Change-Id: Ia9a0abce2d92b79db087256ca1bf17838e0b2dbb
Reviewed-on: https://go-review.googlesource.com/c/arch/+/684055
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/categories.yaml           |   6 +-
 internal/simdgen/gen_utility.go            |   4 +-
 internal/simdgen/go.yaml                   | 106 +++++++++++++++++++++
 internal/simdgen/godefs.go                 |   5 +-
 internal/simdgen/ops/Moves/categories.yaml |   6 +-
 internal/simdgen/ops/Moves/go.yaml         | 106 +++++++++++++++++++++
 internal/simdgen/types.yaml                |  21 ++++
 7 files changed, 249 insertions(+), 5 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index fbfd6613..6f3db7a0 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -545,7 +545,11 @@
   extension: "AVX.*"
   documentation: !string |-
     // GetElem retrieves a single constant-indexed element's value.
-
+- go: Set128
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
 - go: Mul
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index b9206cda..fa9920dd 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -668,8 +668,8 @@ func overwrite(ops []Operation) error {
 				panic(fmt.Errorf("ElemBits is nil at operand %d of %v", idx, o))
 			}
 			*op[idx].ElemBits = *op[idx].OverwriteElementBits
-			*op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Bits / *op[idx].ElemBits)
-
+			*op[idx].Lanes = *op[idx].Bits / *op[idx].ElemBits
+			*op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Lanes)
 		}
 		return nil
 	}
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index f91bafac..f8b27d85 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -792,6 +792,112 @@
   - class: greg
     base: $b
     bits: $e
+
+- go: Set128
+  asm: "VINSERTI128"
+  in:
+  - &i8x32
+    class: vreg
+    base: $t
+    bits: 256
+    OverwriteElementBits: 8
+  - &i8x16
+    class: vreg
+    base: $t
+    bits: 128
+    OverwriteElementBits: 8
+  - class: immediate
+    immOffset: 0
+  out:
+  - *i8x32
+
+- go: Set128
+  asm: "VINSERTI128"
+  in:
+  - &i16x16
+    class: vreg
+    base: $t
+    bits: 256
+    OverwriteElementBits: 16
+  - &i16x8
+    class: vreg
+    base: $t
+    bits: 128
+    OverwriteElementBits: 16
+  - class: immediate
+    immOffset: 0
+  out:
+  - *i16x16
+
+- go: Set128
+  asm: "VINSERTI128"
+  in:
+  - &i32x8
+    class: vreg
+    base: $t
+    bits: 256
+    OverwriteElementBits: 32
+  - &i32x4
+    class: vreg
+    base: $t
+    bits: 128
+    OverwriteElementBits: 32
+  - class: immediate
+    immOffset: 0
+  out:
+  - *i32x8
+
+- go: Set128
+  asm: "VINSERTI128"
+  in:
+  - &i64x4
+    class: vreg
+    base: $t
+    bits: 256
+    OverwriteElementBits: 64
+  - &i64x2
+    class: vreg
+    base: $t
+    bits: 128
+    OverwriteElementBits: 64
+  - class: immediate
+    immOffset: 0
+  out:
+  - *i64x4
+
+- go: Set128
+  asm: "VINSERTF128"
+  in:
+  - &f32x8
+    class: vreg
+    base: $t
+    bits: 256
+    OverwriteElementBits: 32
+  - &f32x4
+    class: vreg
+    base: $t
+    bits: 128
+    OverwriteElementBits: 32
+  - class: immediate
+    immOffset: 0
+  out:
+  - *f32x8
+
+- go: Set128
+  asm: "VINSERTF128"
+  in:
+  - &f64x4
+    class: vreg
+    base: $t
+    bits: 256
+  - &f64x2
+    class: vreg
+    base: $t
+    bits: 128
+  - class: immediate
+    immOffset: 0
+  out:
+  - *f64x4
 # "Normal" multiplication is only available for floats.
 # This only covers the single and double precision.
 - go: Mul
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 4ac4a9a1..e5ad5b82 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -123,7 +123,10 @@ func compareOperands(x, y *Operand) int {
 		if c := compareIntPointers(x.ElemBits, y.ElemBits); c != 0 {
 			return c
 		}
-		return compareIntPointers(x.Bits, y.Bits)
+		if c := compareIntPointers(x.Bits, y.Bits); c != 0 {
+			return c
+		}
+		return 0
 	}
 }
 
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index d0d4a304..3ebb24f3 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -9,4 +9,8 @@
   extension: "AVX.*"
   documentation: !string |-
     // GetElem retrieves a single constant-indexed element's value.
-
+- go: Set128
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index 20d4a053..89bd612c 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -24,3 +24,109 @@
   - class: greg
     base: $b
     bits: $e
+
+- go: Set128
+  asm: "VINSERTI128"
+  in:
+  - &i8x32
+    class: vreg
+    base: $t
+    bits: 256
+    OverwriteElementBits: 8
+  - &i8x16
+    class: vreg
+    base: $t
+    bits: 128
+    OverwriteElementBits: 8
+  - class: immediate
+    immOffset: 0
+  out:
+  - *i8x32
+
+- go: Set128
+  asm: "VINSERTI128"
+  in:
+  - &i16x16
+    class: vreg
+    base: $t
+    bits: 256
+    OverwriteElementBits: 16
+  - &i16x8
+    class: vreg
+    base: $t
+    bits: 128
+    OverwriteElementBits: 16
+  - class: immediate
+    immOffset: 0
+  out:
+  - *i16x16
+
+- go: Set128
+  asm: "VINSERTI128"
+  in:
+  - &i32x8
+    class: vreg
+    base: $t
+    bits: 256
+    OverwriteElementBits: 32
+  - &i32x4
+    class: vreg
+    base: $t
+    bits: 128
+    OverwriteElementBits: 32
+  - class: immediate
+    immOffset: 0
+  out:
+  - *i32x8
+
+- go: Set128
+  asm: "VINSERTI128"
+  in:
+  - &i64x4
+    class: vreg
+    base: $t
+    bits: 256
+    OverwriteElementBits: 64
+  - &i64x2
+    class: vreg
+    base: $t
+    bits: 128
+    OverwriteElementBits: 64
+  - class: immediate
+    immOffset: 0
+  out:
+  - *i64x4
+
+- go: Set128
+  asm: "VINSERTF128"
+  in:
+  - &f32x8
+    class: vreg
+    base: $t
+    bits: 256
+    OverwriteElementBits: 32
+  - &f32x4
+    class: vreg
+    base: $t
+    bits: 128
+    OverwriteElementBits: 32
+  - class: immediate
+    immOffset: 0
+  out:
+  - *f32x8
+
+- go: Set128
+  asm: "VINSERTF128"
+  in:
+  - &f64x4
+    class: vreg
+    base: $t
+    bits: 256
+  - &f64x2
+    class: vreg
+    base: $t
+    bits: 128
+  - class: immediate
+    immOffset: 0
+  out:
+  - *f64x4
diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml
index 5178a216..17f5be55 100644
--- a/internal/simdgen/types.yaml
+++ b/internal/simdgen/types.yaml
@@ -50,6 +50,7 @@ in: !repeat
   - {class: mask, go: Mask32x16,  base: "int",   elemBits: 32, bits: 512, lanes: 16}
   - {class: mask, go: Mask64x8,   base: "int",   elemBits: 64, bits: 512, lanes:  8}
 
+
   - {class: greg, go: float64,    base: "float", bits:  64, lanes:  1}
   - {class: greg, go: float32,    base: "float", bits:  32, lanes:  1}
   - {class: greg, go: int64,      base: "int",   bits:  64, lanes:  1}
@@ -61,6 +62,26 @@ in: !repeat
   - {class: greg, go: uint16,     base: "uint",  bits:  16, lanes:  1}
   - {class: greg, go: uint8,      base: "uint",  bits:   8, lanes:  1}
 
+# Special shapes just to make INSERT[IF]128 work.
+# The elemBits field of these shapes are wrong, it would be overwritten by overwriteElemBits.
+  - {class: vreg, go: Int8x16,    base: "int",   elemBits: 128, bits: 128, lanes: 16}
+  - {class: vreg, go: Uint8x16,   base: "uint",  elemBits: 128, bits: 128, lanes: 16}
+  - {class: vreg, go: Int16x8,    base: "int",   elemBits: 128, bits: 128, lanes: 8}
+  - {class: vreg, go: Uint16x8,   base: "uint",  elemBits: 128, bits: 128, lanes: 8}
+  - {class: vreg, go: Int32x4,    base: "int",   elemBits: 128, bits: 128, lanes: 4}
+  - {class: vreg, go: Uint32x4,   base: "uint",  elemBits: 128, bits: 128, lanes: 4}
+  - {class: vreg, go: Int64x2,    base: "int",   elemBits: 128, bits: 128, lanes: 2}
+  - {class: vreg, go: Uint64x2,   base: "uint",  elemBits: 128, bits: 128, lanes: 2}
+
+  - {class: vreg, go: Int8x32,    base: "int",   elemBits: 128, bits: 256, lanes: 32}
+  - {class: vreg, go: Uint8x32,   base: "uint",  elemBits: 128, bits: 256, lanes: 32}
+  - {class: vreg, go: Int16x16,   base: "int",   elemBits: 128, bits: 256, lanes: 16}
+  - {class: vreg, go: Uint16x16,  base: "uint",  elemBits: 128, bits: 256, lanes: 16}
+  - {class: vreg, go: Int32x8,    base: "int",   elemBits: 128, bits: 256, lanes: 8}
+  - {class: vreg, go: Uint32x8,   base: "uint",  elemBits: 128, bits: 256, lanes: 8}
+  - {class: vreg, go: Int64x4,    base: "int",   elemBits: 128, bits: 256, lanes: 4}
+  - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 128, bits: 256, lanes: 4}
+
   - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now.
 out: !repeat
 - *types

From 36da9b0a515c938c425536fb5317df5934b3336f Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 25 Jun 2025 18:19:15 -0400
Subject: [PATCH 121/200] internal/simdgen: add EXTRACT[IF]128 instructions

This generates dev.simd CL 684115

Change-Id: Ibb8e77e40c426b2cf3dd73c996e5118d5fd5afff
Reviewed-on: https://go-review.googlesource.com/c/arch/+/684080
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/categories.yaml           |  5 ++
 internal/simdgen/go.yaml                   | 82 ++++++++++++++++++----
 internal/simdgen/ops/Moves/categories.yaml |  5 ++
 internal/simdgen/ops/Moves/go.yaml         | 82 ++++++++++++++++++----
 4 files changed, 146 insertions(+), 28 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 6f3db7a0..d5cf67a5 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -550,6 +550,11 @@
   extension: "AVX.*"
   documentation: !string |-
     // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+- go: Get128
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
 - go: Mul
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index f8b27d85..18c937ac 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -776,23 +776,25 @@
   - class: greg
     base: $b
     lanes: 1 # Scalar, darn it!
-  - class: immediate
+  - &imm
+    class: immediate
     immOffset: 0
   out:
   - *t
+
 - go: GetElem
   asm: "VPEXTR[BWDQ]"
   in:
   - class: vreg
     base: $b
     elemBits: $e
-  - class: immediate
-    immOffset: 0
+  - *imm
   out:
   - class: greg
     base: $b
     bits: $e
 
+
 - go: Set128
   asm: "VINSERTI128"
   in:
@@ -806,11 +808,20 @@
     base: $t
     bits: 128
     OverwriteElementBits: 8
-  - class: immediate
+  - &imm01 # This immediate should be only 0 or 1
+    class: immediate
     immOffset: 0
   out:
   - *i8x32
 
+- go: Get128
+  asm: "VEXTRACTI128"
+  in:
+  - *i8x32
+  - *imm01
+  out:
+  - *i8x16
+
 - go: Set128
   asm: "VINSERTI128"
   in:
@@ -824,11 +835,18 @@
     base: $t
     bits: 128
     OverwriteElementBits: 16
-  - class: immediate
-    immOffset: 0
+  - *imm01
   out:
   - *i16x16
 
+- go: Get128
+  asm: "VEXTRACTI128"
+  in:
+  - *i16x16
+  - *imm01
+  out:
+  - *i16x8
+
 - go: Set128
   asm: "VINSERTI128"
   in:
@@ -842,11 +860,18 @@
     base: $t
     bits: 128
     OverwriteElementBits: 32
-  - class: immediate
-    immOffset: 0
+  - *imm01
   out:
   - *i32x8
 
+- go: Get128
+  asm: "VEXTRACTI128"
+  in:
+  - *i32x8
+  - *imm01
+  out:
+  - *i32x4
+
 - go: Set128
   asm: "VINSERTI128"
   in:
@@ -860,11 +885,18 @@
     base: $t
     bits: 128
     OverwriteElementBits: 64
-  - class: immediate
-    immOffset: 0
+  - *imm01
   out:
   - *i64x4
 
+- go: Get128
+  asm: "VEXTRACTI128"
+  in:
+  - *i64x4
+  - *imm01
+  out:
+  - *i64x2
+
 - go: Set128
   asm: "VINSERTF128"
   in:
@@ -878,11 +910,18 @@
     base: $t
     bits: 128
     OverwriteElementBits: 32
-  - class: immediate
-    immOffset: 0
+  - *imm01
   out:
   - *f32x8
 
+- go: Get128
+  asm: "VEXTRACTF128"
+  in:
+  - *f32x8
+  - *imm01
+  out:
+  - *f32x4
+
 - go: Set128
   asm: "VINSERTF128"
   in:
@@ -894,10 +933,25 @@
     class: vreg
     base: $t
     bits: 128
-  - class: immediate
-    immOffset: 0
+  - *imm01
   out:
   - *f64x4
+
+- go: Get128
+  asm: "VEXTRACTF128"
+  in:
+  - *f64x4
+  - *imm01
+  out:
+  - *f64x2
+
+
+
+
+
+
+
+
 # "Normal" multiplication is only available for floats.
 # This only covers the single and double precision.
 - go: Mul
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index 3ebb24f3..db36efd4 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -14,3 +14,8 @@
   extension: "AVX.*"
   documentation: !string |-
     // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+- go: Get128
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index 89bd612c..e6cd40f6 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -8,23 +8,25 @@
   - class: greg
     base: $b
     lanes: 1 # Scalar, darn it!
-  - class: immediate
+  - &imm
+    class: immediate
     immOffset: 0
   out:
   - *t
+
 - go: GetElem
   asm: "VPEXTR[BWDQ]"
   in:
   - class: vreg
     base: $b
     elemBits: $e
-  - class: immediate
-    immOffset: 0
+  - *imm
   out:
   - class: greg
     base: $b
     bits: $e
 
+
 - go: Set128
   asm: "VINSERTI128"
   in:
@@ -38,11 +40,20 @@
     base: $t
     bits: 128
     OverwriteElementBits: 8
-  - class: immediate
+  - &imm01 # This immediate should be only 0 or 1
+    class: immediate
     immOffset: 0
   out:
   - *i8x32
 
+- go: Get128
+  asm: "VEXTRACTI128"
+  in:
+  - *i8x32
+  - *imm01
+  out:
+  - *i8x16
+
 - go: Set128
   asm: "VINSERTI128"
   in:
@@ -56,11 +67,18 @@
     base: $t
     bits: 128
     OverwriteElementBits: 16
-  - class: immediate
-    immOffset: 0
+  - *imm01
   out:
   - *i16x16
 
+- go: Get128
+  asm: "VEXTRACTI128"
+  in:
+  - *i16x16
+  - *imm01
+  out:
+  - *i16x8
+
 - go: Set128
   asm: "VINSERTI128"
   in:
@@ -74,11 +92,18 @@
     base: $t
     bits: 128
     OverwriteElementBits: 32
-  - class: immediate
-    immOffset: 0
+  - *imm01
   out:
   - *i32x8
 
+- go: Get128
+  asm: "VEXTRACTI128"
+  in:
+  - *i32x8
+  - *imm01
+  out:
+  - *i32x4
+
 - go: Set128
   asm: "VINSERTI128"
   in:
@@ -92,11 +117,18 @@
     base: $t
     bits: 128
     OverwriteElementBits: 64
-  - class: immediate
-    immOffset: 0
+  - *imm01
   out:
   - *i64x4
 
+- go: Get128
+  asm: "VEXTRACTI128"
+  in:
+  - *i64x4
+  - *imm01
+  out:
+  - *i64x2
+
 - go: Set128
   asm: "VINSERTF128"
   in:
@@ -110,11 +142,18 @@
     base: $t
     bits: 128
     OverwriteElementBits: 32
-  - class: immediate
-    immOffset: 0
+  - *imm01
   out:
   - *f32x8
 
+- go: Get128
+  asm: "VEXTRACTF128"
+  in:
+  - *f32x8
+  - *imm01
+  out:
+  - *f32x4
+
 - go: Set128
   asm: "VINSERTF128"
   in:
@@ -126,7 +165,22 @@
     class: vreg
     base: $t
     bits: 128
-  - class: immediate
-    immOffset: 0
+  - *imm01
   out:
   - *f64x4
+
+- go: Get128
+  asm: "VEXTRACTF128"
+  in:
+  - *f64x4
+  - *imm01
+  out:
+  - *f64x2
+
+
+
+
+
+
+
+

From d0672853d4341b23d0beba9977f7aaa7c137281e Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 8 Jul 2025 02:42:34 +0000
Subject: [PATCH 122/200] internal/simdgen: make all compares between NaNs
 false

This CL updates the immediate predicate of Equal, GreaterEqual and
Greater. This CL generates CL 686235.

Change-Id: Iffabd0704e9f8f5c8800d81a688367bda5642416
Reviewed-on: https://go-review.googlesource.com/c/arch/+/686215
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              | 8 ++++----
 internal/simdgen/ops/Compares/categories.yaml | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index d5cf67a5..2384923b 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -151,13 +151,13 @@
   documentation: !string |-
     // NotEqual compares for inequality.
 - go: GreaterEqual
-  constImm: 5
+  constImm: 13
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // GreaterEqual compares for greater than or equal.
 - go: Greater
-  constImm: 6
+  constImm: 14
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
@@ -199,14 +199,14 @@
   documentation: !string |-
     // MaskedNotEqual compares for inequality.
 - go: MaskedGreaterEqual
-  constImm: 5
+  constImm: 13
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // MaskedGreaterEqual compares for greater than or equal.
 - go: MaskedGreater
-  constImm: 6
+  constImm: 14
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index 08b153c7..3c607c76 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -37,13 +37,13 @@
   documentation: !string |-
     // NotEqual compares for inequality.
 - go: GreaterEqual
-  constImm: 5
+  constImm: 13
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // GreaterEqual compares for greater than or equal.
 - go: Greater
-  constImm: 6
+  constImm: 14
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
@@ -85,14 +85,14 @@
   documentation: !string |-
     // MaskedNotEqual compares for inequality.
 - go: MaskedGreaterEqual
-  constImm: 5
+  constImm: 13
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // MaskedGreaterEqual compares for greater than or equal.
 - go: MaskedGreater
-  constImm: 6
+  constImm: 14
   masked: "true"
   commutative: "false"
   extension: "AVX.*"

From 8237fc3b7f473cdbc602e0a3cf86cf68cd5e246e Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 8 Jul 2025 17:27:23 +0000
Subject: [PATCH 123/200] internal/simdgen: remove FP bitwise logic operations.

This CL generates CL 686496.

Change-Id: I22ee7df6de59c11d00e041dfa56e2b1c442d82fa
Reviewed-on: https://go-review.googlesource.com/c/arch/+/686555
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/go.yaml                  | 70 ----------------------
 internal/simdgen/ops/BitwiseLogic/go.yaml | 72 +----------------------
 2 files changed, 1 insertion(+), 141 deletions(-)

diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 18c937ac..c4e648cf 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -201,76 +201,6 @@
   - *any
   out:
   - *any
-
-# FP operations.
-# Set the [base] to be "int" to not include duplicates(excluding "uint").
-# [base] is not used when [overwriteBase] is present.
-- go: And
-  asm: "VANDP[SD]"
-  in:
-  - &intToFloat
-    go: $t
-    base: int
-    overwriteBase: float
-  - *intToFloat
-  out:
-  - *intToFloat
-- go: MaskedAnd
-  asm: "VANDP[SD]"
-  in:
-  - class: mask
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-
-- go: AndNot
-  asm: "VANDNP[SD]"
-  in:
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-- go: MaskedAndNot
-  asm: "VANDNP[SD]"
-  in:
-  - class: mask
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-
-- go: Or
-  asm: "VORP[SD]"
-  in:
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-- go: MaskedOr
-  asm: "VORP[SD]"
-  in:
-  - class: mask
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-
-- go: Xor
-  asm: "VXORP[SD]"
-  in:
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-- go: MaskedXor
-  asm: "VXORP[SD]"
-  in:
-  - class: mask
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
 # Ints
 - go: Equal
   asm: "V?PCMPEQ[BWDQ]"
diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml
index 7e7adf7a..79d149ca 100644
--- a/internal/simdgen/ops/BitwiseLogic/go.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/go.yaml
@@ -76,74 +76,4 @@
   - *any
   - *any
   out:
-  - *any
-
-# FP operations.
-# Set the [base] to be "int" to not include duplicates(excluding "uint").
-# [base] is not used when [overwriteBase] is present.
-- go: And
-  asm: "VANDP[SD]"
-  in:
-  - &intToFloat
-    go: $t
-    base: int
-    overwriteBase: float
-  - *intToFloat
-  out:
-  - *intToFloat
-- go: MaskedAnd
-  asm: "VANDP[SD]"
-  in:
-  - class: mask
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-
-- go: AndNot
-  asm: "VANDNP[SD]"
-  in:
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-- go: MaskedAndNot
-  asm: "VANDNP[SD]"
-  in:
-  - class: mask
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-
-- go: Or
-  asm: "VORP[SD]"
-  in:
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-- go: MaskedOr
-  asm: "VORP[SD]"
-  in:
-  - class: mask
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-
-- go: Xor
-  asm: "VXORP[SD]"
-  in:
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
-- go: MaskedXor
-  asm: "VXORP[SD]"
-  in:
-  - class: mask
-  - *intToFloat
-  - *intToFloat
-  out:
-  - *intToFloat
\ No newline at end of file
+  - *any
\ No newline at end of file

From dea0129702c94fb714f87a0bfe1e11e4b5bca696 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 8 Jul 2025 12:32:59 -0400
Subject: [PATCH 124/200] internal/simdgen: doc/type cleanup around 'shape' and
 its return values.

No changes to the generated code, this helps a bit with
IDE tool tips.

Change-Id: I6aa286a3cf5b8f562c9149c943524b666345b643
Reviewed-on: https://go-review.googlesource.com/c/arch/+/686377
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_utility.go | 82 ++++++++++++---------------------
 1 file changed, 30 insertions(+), 52 deletions(-)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index fa9920dd..371132ed 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -75,75 +75,53 @@ func numberLines(data []byte) string {
 	return buf.String()
 }
 
+type inShape uint8
+type outShape uint8
+type maskShape uint8
+type immShape uint8
+
 const (
-	InvalidIn int = iota
-	PureVregIn
-	OneKmaskIn
-	OneImmIn
-	OneKmaskImmIn
-	PureKmaskIn
+	InvalidIn     inShape = iota
+	PureVregIn            // vector register input only
+	OneKmaskIn            // vector and kmask input
+	OneImmIn              // vector and immediate input
+	OneKmaskImmIn         // vector, kmask, and immediate inputs
+	PureKmaskIn           // only mask inputs.
 )
 
 const (
-	InvalidOut int = iota
-	NoOut
-	OneVregOut
-	OneGregOut
-	OneKmaskOut
-	OneVregOutAtIn
+	InvalidOut     outShape = iota
+	NoOut                   // no output
+	OneVregOut              // (one) vector register output
+	OneGregOut              // (one) general register output
+	OneKmaskOut             // mask output
+	OneVregOutAtIn          // the first input is also the output
 )
 
 const (
-	InvalidMask int = iota
-	NoMask
-	OneMask
-	AllMasks
+	InvalidMask maskShape = iota
+	NoMask                // no mask
+	OneMask               // with mask (K1 to K7)
+	AllMasks              // a K mask instruction (K0-K7)
 )
 
 const (
-	InvalidImm int = iota
-	NoImm
-	ConstImm
-	VarImm
-	ConstVarImm
+	InvalidImm  immShape = iota
+	NoImm                // no immediate
+	ConstImm             // const only immediate
+	VarImm               // pure imm argument provided by the users
+	ConstVarImm          // a combination of user arg and const
 )
 
-// opShape returns the an int denoting the shape of the operation:
-//
-//		shapeIn:
-//			InvalidIn: unknown, with err set to the error message
-//			PureVregIn: pure vreg operation
-//			OneKmaskIn: operation with one k mask input (TODO: verify if it's always opmask predicate)
-//			OneImmIn: operation with one imm input
-//			OneKmaskImmIn: operation with one k mask input and one imm input
-//			PureKmaskIn: it's a K mask instruction (which can use K0)
-//
-//		shapeOut:
-//		 	InvalidOut: unknown, with err set to the error message
-//			NoOut: no outputs, this is invalid now.
-//			OneVregOut: one vreg output
-//			OneKmaskOut: one mask output
-//			OneVregOutAtIn: one vreg output, it's at the same time the first input
-//
-//		maskType:
-//			InvalidMask: unknown, with err set to the error message
-//			NoMask: no mask
-//			OneMask: with mask (K1 to K7)
-//			AllMasks: it's a K mask instruction
-//
-//	 	immType:
-//			InvalidImm: unrecognize immediate structure
-//			NoImm: no immediate
-//			ConstImm: const only immediate
-//			VarImm: pure imm argument provided by the users
-//			ConstVarImm: a combination of user arg and const
+// opShape returns the several integers describing the shape of the operation,
+// and modified versions of the op:
 //
 // opNoImm is op with its inputs excluding the const imm.
-// opNoConstMask is op with its inputs excluding the const mask.
 // opNoConstImmMask is op with its inputs excluding the const imm and mask.
 //
 // This function does not modify op.
-func (op *Operation) shape() (shapeIn, shapeOut, maskType, immType int, opNoImm Operation, opNoImmConstMask Operation) {
+func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskShape, immType immShape,
+	opNoImm Operation, opNoImmConstMask Operation) {
 	if len(op.Out) > 1 {
 		panic(fmt.Errorf("simdgen only supports 1 output: %s", op))
 	}

From f7578004bf0cfb0bc4e1f8dd80f425dcf09a63aa Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 8 Jul 2025 18:06:02 +0000
Subject: [PATCH 125/200] internal/simdgen: change op name Masked$OP to
 $(OP)Masked

This CL generates CL 686516.

Change-Id: Ifa5320c656806b0e4aea921b27c0eb54671c9f36
Reviewed-on: https://go-review.googlesource.com/c/arch/+/686575
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              | 252 +++++++++---------
 internal/simdgen/gen_simdTypes.go             |   2 +-
 internal/simdgen/gen_utility.go               |   9 +-
 internal/simdgen/go.yaml                      | 130 ++++-----
 internal/simdgen/godefs.go                    |   2 +-
 internal/simdgen/ops/AddSub/categories.yaml   |  16 +-
 internal/simdgen/ops/AddSub/go.yaml           |  12 +-
 .../simdgen/ops/BitwiseLogic/categories.yaml  |  16 +-
 internal/simdgen/ops/BitwiseLogic/go.yaml     |   8 +-
 internal/simdgen/ops/Compares/categories.yaml |  28 +-
 internal/simdgen/ops/Compares/go.yaml         |  10 +-
 .../simdgen/ops/FPonlyArith/categories.yaml   |  52 ++--
 internal/simdgen/ops/FPonlyArith/go.yaml      |  14 +-
 .../simdgen/ops/GaloisField/categories.yaml   |  12 +-
 internal/simdgen/ops/GaloisField/go.yaml      |   6 +-
 .../simdgen/ops/IntOnlyArith/categories.yaml  |  12 +-
 internal/simdgen/ops/IntOnlyArith/go.yaml     |   6 +-
 internal/simdgen/ops/MLOps/categories.yaml    |  36 +--
 internal/simdgen/ops/MLOps/go.yaml            |  18 +-
 internal/simdgen/ops/MinMax/categories.yaml   |   8 +-
 internal/simdgen/ops/MinMax/go.yaml           |  12 +-
 internal/simdgen/ops/Mul/categories.yaml      |  16 +-
 internal/simdgen/ops/Mul/go.yaml              |  12 +-
 .../simdgen/ops/ShiftRotate/categories.yaml   |  56 ++--
 internal/simdgen/ops/ShiftRotate/go.yaml      |  32 +--
 25 files changed, 389 insertions(+), 388 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 2384923b..bfb0ff80 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -9,18 +9,18 @@
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedAdd adds corresponding elements of two vectors with saturation.
-- go: MaskedAdd
+- go: AddMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedAdd adds corresponding elements of two vectors.
-- go: MaskedSaturatedAdd
+    // AddMasked adds corresponding elements of two vectors.
+- go: SaturatedAddMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSaturatedAdd adds corresponding elements of two vectors with saturation.
+    // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: "false"
   extension: "AVX.*"
@@ -31,18 +31,18 @@
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedSub subtracts corresponding elements of two vectors with saturation.
-- go: MaskedSub
+- go: SubMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSub subtracts corresponding elements of two vectors.
-- go: MaskedSaturatedSub
+    // SubMasked subtracts corresponding elements of two vectors.
+- go: SaturatedSubMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSaturatedSub subtracts corresponding elements of two vectors with saturation.
+    // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
   commutative: "false"
   extension: "AVX.*"
@@ -72,45 +72,45 @@
   extension: "AVX.*"
   documentation: !string |-
     // And performs a bitwise AND operation between two vectors.
-- go: MaskedAnd
+- go: AndMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedAnd performs a masked bitwise AND operation between two vectors.
+    // AndMasked performs a masked bitwise AND operation between two vectors.
 - go: Or
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // Or performs a bitwise OR operation between two vectors.
-- go: MaskedOr
+- go: OrMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedOr performs a masked bitwise OR operation between two vectors.
+    // OrMasked performs a masked bitwise OR operation between two vectors.
 - go: AndNot
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // AndNot performs a bitwise AND NOT operation between two vectors.
-- go: MaskedAndNot
+- go: AndNotMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedAndNot performs a masked bitwise AND NOT operation between two vectors.
+    // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 - go: Xor
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // Xor performs a bitwise XOR operation between two vectors.
-- go: MaskedXor
+- go: XorMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedXor performs a masked bitwise XOR operation between two vectors.
+    // XorMasked performs a masked bitwise XOR operation between two vectors.
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
 # const imm predicate(holds for both float and int|uint):
@@ -163,105 +163,105 @@
   documentation: !string |-
     // Greater compares for greater than.
 
-- go: MaskedEqual
+- go: EqualMasked
   constImm: 0
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedEqual compares for equality, masked.
-- go: MaskedLess
+    // EqualMasked compares for equality, masked.
+- go: LessMasked
   constImm: 1
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedLess compares for less than.
-- go: MaskedLessEqual
+    // LessMasked compares for less than.
+- go: LessEqualMasked
   constImm: 2
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedLessEqual compares for less than or equal.
-- go: MaskedIsNan # For float only.
+    // LessEqualMasked compares for less than or equal.
+- go: IsNanMasked # For float only.
   constImm: 3
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x).
-- go: MaskedNotEqual
+    // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+- go: NotEqualMasked
   constImm: 4
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedNotEqual compares for inequality.
-- go: MaskedGreaterEqual
+    // NotEqualMasked compares for inequality.
+- go: GreaterEqualMasked
   constImm: 13
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedGreaterEqual compares for greater than or equal.
-- go: MaskedGreater
+    // GreaterEqualMasked compares for greater than or equal.
+- go: GreaterMasked
   constImm: 14
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedGreater compares for greater than.
+    // GreaterMasked compares for greater than.
 - go: Div
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // Div divides elements of two vectors.
-- go: MaskedDiv
+- go: DivMasked
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedDiv divides elements of two vectors.
+    // DivMasked divides elements of two vectors.
 - go: Sqrt
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // Sqrt computes the square root of each element.
-- go: MaskedSqrt
+- go: SqrtMasked
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSqrt computes the square root of each element.
+    // SqrtMasked computes the square root of each element.
 - go: ApproximateReciprocal
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocal computes an approximate reciprocal of each element.
-- go: MaskedApproximateReciprocal
+- go: ApproximateReciprocalMasked
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedApproximateReciprocal computes an approximate reciprocal of each element.
+    // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-- go: MaskedApproximateReciprocalOfSqrt
+- go: ApproximateReciprocalOfSqrtMasked
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-- go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated.
+    // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
+- go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMulByPowOf2 multiplies elements by a power of 2.
+    // MulByPowOf2Masked multiplies elements by a power of 2.
 
 - go: Round
   commutative: "false"
@@ -269,20 +269,20 @@
   constImm: 0
   documentation: !string |-
     // Round rounds elements to the nearest integer.
-- go: MaskedRoundWithPrecision
+- go: RoundWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 0
   masked: "true"
   documentation: !string |-
-    // MaskedRoundWithPrecision rounds elements with specified precision.
-- go: MaskedDiffWithRoundWithPrecision
+    // RoundWithPrecisionMasked rounds elements with specified precision.
+- go: DiffWithRoundWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 0
   masked: "true"
   documentation: !string |-
-    // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+    // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 
 - go: Floor
   commutative: "false"
@@ -290,20 +290,20 @@
   constImm: 1
   documentation: !string |-
     // Floor rounds elements down to the nearest integer.
-- go: MaskedFloorWithPrecision
+- go: FloorWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 1
   masked: "true"
   documentation: !string |-
-    // MaskedFloorWithPrecision rounds elements down with specified precision, masked.
-- go: MaskedDiffWithFloorWithPrecision
+    // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
+- go: DiffWithFloorWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 1
   masked: "true"
   documentation: !string |-
-    // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+    // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 
 - go: Ceil
   commutative: "false"
@@ -311,20 +311,20 @@
   constImm: 2
   documentation: !string |-
     // Ceil rounds elements up to the nearest integer.
-- go: MaskedCeilWithPrecision
+- go: CeilWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 2
   masked: "true"
   documentation: !string |-
-    // MaskedCeilWithPrecision rounds elements up with specified precision, masked.
-- go: MaskedDiffWithCeilWithPrecision
+    // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
+- go: DiffWithCeilWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 2
   masked: "true"
   documentation: !string |-
-    // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+    // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 
 - go: Trunc
   commutative: "false"
@@ -332,63 +332,63 @@
   constImm: 3
   documentation: !string |-
     // Trunc truncates elements towards zero.
-- go: MaskedTruncWithPrecision
+- go: TruncWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 3
   masked: "true"
   documentation: !string |-
-    // MaskedTruncWithPrecision truncates elements with specified precision.
-- go: MaskedDiffWithTruncWithPrecision
+    // TruncWithPrecisionMasked truncates elements with specified precision.
+- go: DiffWithTruncWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 3
   masked: "true"
   documentation: !string |-
-    // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+    // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 
 - go: AddSub
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // AddSub subtracts even elements and adds odd elements of two vectors.
-- go: MaskedGaloisFieldAffineTransform
+- go: GaloisFieldAffineTransformMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8):
+    // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
-- go: MaskedGaloisFieldAffineTransformInversed
+- go: GaloisFieldAffineTransformInversedMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8),
+    // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8),
     // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
-- go: MaskedGaloisFieldMul
+- go: GaloisFieldMulMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedGaloisFieldMul computes element-wise GF(2^8) multiplication with
+    // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
     // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 - go: Average
   commutative: "true"
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
     // Average computes the rounded average of corresponding elements.
-- go: MaskedAverage
+- go: AverageMasked
   commutative: "true"
   masked: "true"
   extension: "AVX512.*" # Masked operations are typically AVX512
   documentation: !string |-
-    // MaskedAverage computes the rounded average of corresponding elements.
+    // AverageMasked computes the rounded average of corresponding elements.
 
 - go: Absolute
   commutative: "false"
@@ -396,12 +396,12 @@
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
     // Absolute computes the absolute value of each element.
-- go: MaskedAbsolute
+- go: AbsoluteMasked
   commutative: "false"
   masked: "true"
   extension: "AVX512.*"
   documentation: !string |-
-    // MaskedAbsolute computes the absolute value of each element.
+    // AbsoluteMasked computes the absolute value of each element.
 
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
@@ -412,24 +412,24 @@
     // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
 
-- go: MaskedPopCount
+- go: PopCountMasked
   commutative: "false"
   masked: "true"
   extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
   documentation: !string |-
-    // MaskedPopCount counts the number of set bits in each element.
+    // PopCountMasked counts the number of set bits in each element.
 - go: PairDotProd
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProd multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
-- go: MaskedPairDotProd
+- go: PairDotProdMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedPairDotProd multiplies the elements and add the pairs together,
+    // PairDotProdMasked multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: "false"
@@ -437,12 +437,12 @@
   documentation: !string |-
     // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
-- go: MaskedSaturatedUnsignedSignedPairDotProd
+- go: SaturatedUnsignedSignedPairDotProdMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
@@ -456,85 +456,85 @@
   extension: "AVX.*"
   documentation: !string |-
     // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
-- go: MaskedUnsignedSignedQuadDotProdAccumulate
+- go: UnsignedSignedQuadDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedUnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
-- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
+- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 - go: PairDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
-- go: MaskedPairDotProdAccumulate
+- go: PairDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+    // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
 - go: SaturatedPairDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
-- go: MaskedSaturatedPairDotProdAccumulate
+- go: SaturatedPairDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
-- go: MaskedFusedMultiplyAdd
+    // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
+- go: FusedMultiplyAddMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplyAdd performs `(v1 * v2) + v3`.
-- go: MaskedFusedMultiplyAddSub
+    // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
+- go: FusedMultiplyAddSubMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
-- go: MaskedFusedMultiplySubAdd
+    // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
+- go: FusedMultiplySubAddMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
+    // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
 - go: Max
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // Max computes the maximum of corresponding elements.
-- go: MaskedMax
+- go: MaxMasked
   commutative: "true"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMax computes the maximum of corresponding elements.
+    // MaxMasked computes the maximum of corresponding elements.
 - go: Min
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // Min computes the minimum of corresponding elements.
-- go: MaskedMin
+- go: MinMasked
   commutative: "true"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMin computes the minimum of corresponding elements.
+    // MinMasked computes the minimum of corresponding elements.
 - go: SetElem
   commutative: "false"
   extension: "AVX.*"
@@ -576,70 +576,70 @@
   extension: "AVX.*"
   documentation: !string |-
     // MulLow multiplies elements and stores the low part of the result.
-- go: MaskedMul
+- go: MulMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMul multiplies corresponding elements of two vectors, masked.
-- go: MaskedMulEvenWiden
+    // MulMasked multiplies corresponding elements of two vectors, masked.
+- go: MulEvenWidenMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked.
+    // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
     // Result[i] = v1.Even[i] * v2.Even[i].
-- go: MaskedMulHigh
+- go: MulHighMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMulHigh multiplies elements and stores the high part of the result, masked.
-- go: MaskedMulLow
+    // MulHighMasked multiplies elements and stores the high part of the result, masked.
+- go: MulLowMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMulLow multiplies elements and stores the low part of the result, masked.
+    // MulLowMasked multiplies elements and stores the low part of the result, masked.
 - go: ShiftAllLeft
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-- go: MaskedShiftAllLeft
+- go: ShiftAllLeftMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+    // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-- go: MaskedShiftAllRight
+- go: ShiftAllRightMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRightSignExtended
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-- go: MaskedShiftAllRightSignExtended
+- go: ShiftAllRightSignExtendedMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+    // ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 
 - go: ShiftLeft
   nameAndSizeCheck: "true"
@@ -647,98 +647,98 @@
   extension: "AVX.*"
   documentation: !string |-
     // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-- go: MaskedShiftLeft
+- go: ShiftLeftMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+    // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-- go: MaskedShiftRight
+- go: ShiftRightMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRightSignExtended
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-- go: MaskedShiftRightSignExtended
+- go: ShiftRightSignExtendedMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+    // ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 
-- go: MaskedRotateAllLeft
+- go: RotateAllLeftMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedRotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
-- go: MaskedRotateLeft
+    // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
+- go: RotateLeftMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedRotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-- go: MaskedRotateAllRight
+    // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+- go: RotateAllRightMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedRotateAllRight rotates each element to the right by the number of bits specified by the immediate.
-- go: MaskedRotateRight
+    // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
+- go: RotateRightMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedRotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+    // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 
-- go: MaskedShiftAllLeftAndFillUpperFrom
+- go: ShiftAllLeftAndFillUpperFromMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+    // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-- go: MaskedShiftAllRightAndFillUpperFrom
+- go: ShiftAllRightAndFillUpperFromMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+    // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-- go: MaskedShiftLeftAndFillUpperFrom
+- go: ShiftLeftAndFillUpperFromMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+    // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-- go: MaskedShiftRightAndFillUpperFrom
+- go: ShiftRightAndFillUpperFromMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+    // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 16286736..7dcbc145 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -295,7 +295,7 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 		var vec string
 		var vecOp Operand
 		allSameVec := true
-		masked := strings.HasPrefix(gOp.Go, "Masked")
+		masked := strings.HasSuffix(gOp.Go, "Masked")
 		skippedMaskCnt := 0
 		vecCnt := 0
 		for i, in := range gOp.In {
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 371132ed..42aab212 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -521,12 +521,13 @@ func splitMask(ops []Operation) ([]Operation, error) {
 			op2 := op
 			// The ops should be sorted when calling this function, the mask is in the end, drop the mask
 			op2.In = slices.Clone(op.In)[:len(op.In)-1]
-			if !strings.HasPrefix(op2.Go, "Masked") {
-				return nil, fmt.Errorf("simdgen only recognizes masked operations with name starting with 'Masked': %s", op)
+			if !strings.HasSuffix(op2.Go, "Masked") {
+				return nil, fmt.Errorf("simdgen only recognizes masked operations with name ending with 'Masked': %s", op)
 			}
-			op2.Go = strings.ReplaceAll(op2.Go, "Masked", "")
+			maskedOpName := op2.Go
+			op2.Go = strings.TrimSuffix(op2.Go, "Masked")
 			if op2.Documentation != nil {
-				*op2.Documentation = strings.ReplaceAll(*op2.Documentation, "Masked", "")
+				*op2.Documentation = strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go)
 			}
 			splited = append(splited, op2)
 		} else {
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index c4e648cf..16dbf1e6 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -8,7 +8,7 @@
   - *any
   out:
   - *any
-- go: MaskedAdd
+- go: AddMasked
   asm: "VPADD[BWDQ]|VADDP[SD]"
   in:
   - class: mask
@@ -35,7 +35,7 @@
   - *uint
   out:
   - *uint
-- go: MaskedSaturatedAdd
+- go: SaturatedAddMasked
   asm: "VPADDS[BWDQ]"
   in:
   - class: mask
@@ -43,7 +43,7 @@
   - *int
   out:
   - *int
-- go: MaskedSaturatedAdd
+- go: SaturatedAddMasked
   asm: "VPADDS[BWDQ]"
   in:
   - class: mask
@@ -60,7 +60,7 @@
   - *any
   out: &1any
   - *any
-- go: MaskedSub
+- go: SubMasked
   asm: "VPSUB[BWDQ]|VSUBP[SD]"
   in:
   - class: mask
@@ -83,7 +83,7 @@
   - *uint
   out:
   - *uint
-- go: MaskedSaturatedSub
+- go: SaturatedSubMasked
   asm: "VPSUBS[BWDQ]"
   in:
   - class: mask
@@ -91,7 +91,7 @@
   - *int
   out:
   - *int
-- go: MaskedSaturatedSub
+- go: SaturatedSubMasked
   asm: "VPSUBS[BWDQ]"
   in:
   - class: mask
@@ -145,7 +145,7 @@
 # Dword and Qword.
 # TODO: should we wildcard other smaller elemBits to VPANDQ or
 # VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations.
-- go: MaskedAnd
+- go: AndMasked
   asm: "VPAND[DQ]"
   in:
   - class: mask
@@ -161,7 +161,7 @@
   - *any
   out:
   - *any
-- go: MaskedAndNot
+- go: AndNotMasked
   asm: "VPANDN[DQ]"
   in:
   - class: mask
@@ -177,7 +177,7 @@
   - *any
   out:
   - *any
-- go: MaskedOr
+- go: OrMasked
   asm: "VPOR[DQ]"
   in:
   - class: mask
@@ -193,7 +193,7 @@
   - *any
   out:
   - *any
-- go: MaskedXor
+- go: XorMasked
   asm: "VPXOR[DQ]"
   in:
   - class: mask
@@ -219,7 +219,7 @@
   in: *int2
   out:
   - *anyvregToMask
-- go: MaskedEqual
+- go: EqualMasked
   asm: "V?PCMPEQ[BWDQ]"
   in: &maskint2
   - class: mask
@@ -227,14 +227,14 @@
   - *int
   out:
   - class: mask
-- go: MaskedGreater
+- go: GreaterMasked
   asm: "V?PCMPGT[BWDQ]"
   in: *maskint2
   out:
   - class: mask
 # The const imm predicated compares after AVX512, please see categories.yaml
 # for const imm specification.
-- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked
   asm: "VPCMP[BWDQ]"
   in:
   - class: mask
@@ -246,7 +246,7 @@
     const: 0 # Just a placeholder, will be overwritten by const imm porting.
   out:
   - class: mask
-- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked
   asm: "VPCMPU[BWDQ]"
   in:
   - class: mask
@@ -273,7 +273,7 @@
   - go: $t # We still need the output to be the same shape as inputs.
     overwriteBase: int
     overwriteClass: mask
-- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked
   asm: "VCMPP[SD]"
   in:
   - class: mask
@@ -292,7 +292,7 @@
   - *fp
   out: &1fp
   - *fp
-- go: MaskedDiv
+- go: DivMasked
   asm: "V?DIVP[SD]"
   in: &1mask2fp
   - class: mask
@@ -303,13 +303,13 @@
   asm: "V?SQRTP[SD]"
   in: *1fp
   out: *1fp
-- go: MaskedSqrt
+- go: SqrtMasked
   asm: "V?SQRTP[SD]"
   in: &1mask1fp
   - class: mask
   - *fp
   out: *1fp
-- go: MaskedApproximateReciprocal
+- go: ApproximateReciprocalMasked
   asm: "VRCP14P[SD]"
   in: *1mask1fp
   out: *1fp
@@ -317,11 +317,11 @@
   asm: "V?RSQRTPS"
   in: *1fp
   out: *1fp
-- go: MaskedApproximateReciprocalOfSqrt
+- go: ApproximateReciprocalOfSqrtMasked
   asm: "VRSQRT14P[SD]"
   in: *1mask1fp
   out: *1fp
-- go: MaskedMulByPowOf2
+- go: MulByPowOf2Masked
   asm: "VSCALEFP[SD]"
   in: *1mask2fp
   out: *1fp
@@ -334,7 +334,7 @@
     const: 0 # place holder
   out: *1fp
 
-- go: "Masked(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
+- go: "(Round|Ceil|Floor|Trunc)WithPrecisionMasked"
   asm: "VRNDSCALEP[SD]"
   in:
   - class: mask
@@ -343,7 +343,7 @@
     const: 0 # place holder
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
   out: *1fp
-- go: "MaskedDiffWith(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
+- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked"
   asm: "VREDUCEP[SD]"
   in:
   - class: mask
@@ -360,7 +360,7 @@
   - *fp
   out:
   - *fp
-- go: MaskedGaloisFieldAffineTransform
+- go: GaloisFieldAffineTransformMasked
   asm: VGF2P8AFFINEQB
   operandOrder: 2I # 2nd operand, then immediate
   in: &AffineArgs
@@ -379,14 +379,14 @@
   out:
   - *uint8
 
-- go: MaskedGaloisFieldAffineTransformInversed
+- go: GaloisFieldAffineTransformInversedMasked
   asm: VGF2P8AFFINEINVQB
   operandOrder: 2I # 2nd operand, then immediate
   in: *AffineArgs
   out:
   - *uint8
 
-- go: MaskedGaloisFieldMul
+- go: GaloisFieldMulMasked
   asm: VGF2P8MULB
   in:
   - class: mask
@@ -405,7 +405,7 @@
   - *uint_t
   out:
   - *uint_t
-- go: MaskedAverage
+- go: AverageMasked
   asm: "VPAVG[BW]"
   in:
   - class: mask
@@ -424,7 +424,7 @@
     base: int
   out:
   - *int_t # Output is magnitude, fits in the same signed type
-- go: MaskedAbsolute
+- go: AbsoluteMasked
   asm: "VPABS[BWDQ]"
   in:
   - class: mask
@@ -446,7 +446,7 @@
 # Population Count (count set bits in each element)
 # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
 #               VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
-- go: MaskedPopCount
+- go: PopCountMasked
   asm: "VPOPCNT[BWDQ]"
   in:
   - class: mask
@@ -465,7 +465,7 @@
   - &int2 # The elemBits are different
     go: $t2
     base: int
-- go: MaskedPairDotProd
+- go: PairDotProdMasked
   asm: VPMADDWD
   in:
   - class: mask
@@ -484,7 +484,7 @@
     base: int
   out:
   - *int2
-- go: MaskedSaturatedUnsignedSignedPairDotProd
+- go: SaturatedUnsignedSignedPairDotProdMasked
   asm: VPMADDUBSW
   in:
   - class: mask
@@ -525,7 +525,7 @@
     overwriteElementBits: 8
   out:
   - *qdpa_acc
-- go: MaskedUnsignedSignedQuadDotProdAccumulate
+- go: UnsignedSignedQuadDotProdAccumulateMasked
   asm: "VPDPBUSD"
   in:
   - *qdpa_acc
@@ -542,7 +542,7 @@
   - *qdpa_src2
   out:
   - *qdpa_acc
-- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
+- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   asm: "VPDPBUSDS"
   in:
   - *qdpa_acc
@@ -568,7 +568,7 @@
     overwriteElementBits: 16
   out:
   - *pdpa_acc
-- go: MaskedPairDotProdAccumulate
+- go: PairDotProdAccumulateMasked
   asm: "VPDPWSSD"
   in:
   - *pdpa_acc
@@ -585,7 +585,7 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
-- go: MaskedSaturatedPairDotProdAccumulate
+- go: SaturatedPairDotProdAccumulateMasked
   asm: "VPDPWSSDS"
   in:
   - *pdpa_acc
@@ -594,7 +594,7 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
-- go: MaskedFusedMultiplyAdd
+- go: FusedMultiplyAddMasked
   asm: "VFMADD213PS|VFMADD213PD"
   in:
   - &fma_op
@@ -605,7 +605,7 @@
   - *fma_op
   out:
   - *fma_op
-- go: MaskedFusedMultiplyAddSub
+- go: FusedMultiplyAddSubMasked
   asm: "VFMADDSUB213PS|VFMADDSUB213PD"
   in:
   - *fma_op
@@ -614,7 +614,7 @@
   - *fma_op
   out:
   - *fma_op
-- go: MaskedFusedMultiplySubAdd
+- go: FusedMultiplySubAddMasked
   asm: "VFMSUBADD213PS|VFMSUBADD213PD"
   in:
   - *fma_op
@@ -641,14 +641,14 @@
   - *uint
   out: &1uint
   - *uint
-- go: MaskedMax
+- go: MaxMasked
   asm: "V?PMAXS[BWDQ]"
   in: &1mask2int
   - class: mask
   - *int
   - *int
   out: *1int
-- go: MaskedMax
+- go: MaxMasked
   asm: "V?PMAXU[BWDQ]"
   in: &1mask2uint
   - class: mask
@@ -664,11 +664,11 @@
   asm: "V?PMINU[BWDQ]"
   in: *2uint
   out: *1uint
-- go: MaskedMin
+- go: MinMasked
   asm: "V?PMINS[BWDQ]"
   in: *1mask2int
   out: *1int
-- go: MaskedMin
+- go: MinMasked
   asm: "V?PMINU[BWDQ]"
   in: *1mask2uint
   out: *1uint
@@ -682,7 +682,7 @@
   - *float
   out: &1float
   - *float
-- go: MaskedMax
+- go: MaxMasked
   asm: "V?MAXP[SD]"
   in: &1mask2float
   - class: mask
@@ -693,7 +693,7 @@
   asm: "V?MINP[SD]"
   in: *2float
   out: *1float
-- go: MaskedMin
+- go: MinMasked
   asm: "V?MINP[SD]"
   in: *1mask2float
   out: *1float
@@ -893,7 +893,7 @@
   - *fp
   out:
   - *fp
-- go: MaskedMul
+- go: MulMasked
   asm: "VMULP[SD]"
   in:
   - class: mask
@@ -928,7 +928,7 @@
   - &uint2
     go: $t2
     base: uint
-- go: MaskedMulEvenWiden
+- go: MulEvenWidenMasked
   asm: "VPMULDQ"
   in:
   - class: mask
@@ -936,7 +936,7 @@
   - *int
   out:
   - *int2
-- go: MaskedMulEvenWiden
+- go: MulEvenWidenMasked
   asm: "VPMULUDQ"
   in:
   - class: mask
@@ -962,7 +962,7 @@
   - *uint
   out:
   - *uint2
-- go: MaskedMulHigh
+- go: MulHighMasked
   asm: "VPMULHW"
   in:
   - class: mask
@@ -970,7 +970,7 @@
   - *int
   out:
   - *int2
-- go: MaskedMulHigh
+- go: MulHighMasked
   asm: "VPMULHUW"
   in:
   - class: mask
@@ -989,7 +989,7 @@
   - *int
   out:
   - *int2
-- go: MaskedMulLow
+- go: MulLowMasked
   asm: "VPMULL[WDQ]"
   in:
   - class: mask
@@ -1009,7 +1009,7 @@
     go: Uint64x2
   out:
   - *any
-- go: MaskedShiftAllLeft
+- go: ShiftAllLeftMasked
   asm: "VPSLL[WDQ]"
   in:
   - class: mask
@@ -1024,7 +1024,7 @@
   - *vecAsScalar64
   out:
   - *any
-- go: MaskedShiftAllRight
+- go: ShiftAllRightMasked
   asm: "VPSRL[WDQ]"
   in:
   - class: mask
@@ -1041,7 +1041,7 @@
   - *vecAsScalar64
   out:
   - *int
-- go: MaskedShiftAllRightSignExtended
+- go: ShiftAllRightSignExtendedMasked
   asm: "VPSRA[WDQ]"
   in:
   - class: mask
@@ -1058,7 +1058,7 @@
   - *any
   out:
   - *any
-- go: MaskedShiftLeft
+- go: ShiftLeftMasked
   asm: "VPSLLV[WD]"
   in:
   - class: mask
@@ -1077,7 +1077,7 @@
   - *anyOverwriteElemBits
   out:
   - *anyOverwriteElemBits
-- go: MaskedShiftLeft
+- go: ShiftLeftMasked
   asm: "VPSLLVQ"
   in:
   - class: mask
@@ -1092,7 +1092,7 @@
   - *any
   out:
   - *any
-- go: MaskedShiftRight
+- go: ShiftRightMasked
   asm: "VPSRLV[WD]"
   in:
   - class: mask
@@ -1108,7 +1108,7 @@
   - *anyOverwriteElemBits
   out:
   - *anyOverwriteElemBits
-- go: MaskedShiftRight
+- go: ShiftRightMasked
   asm: "VPSRLVQ"
   in:
   - class: mask
@@ -1123,7 +1123,7 @@
   - *any
   out:
   - *any
-- go: MaskedShiftRightSignExtended
+- go: ShiftRightSignExtendedMasked
   asm: "VPSRAV[WDQ]"
   in:
   - class: mask
@@ -1133,7 +1133,7 @@
   - *any
 
 # Rotate
-- go: MaskedRotateAllLeft
+- go: RotateAllLeftMasked
   asm: "VPROL[DQ]"
   in:
   - class: mask
@@ -1143,7 +1143,7 @@
     immOffset: 0
   out:
   - *any
-- go: MaskedRotateAllRight
+- go: RotateAllRightMasked
   asm: "VPROR[DQ]"
   in:
   - class: mask
@@ -1151,7 +1151,7 @@
   - *pureImm
   out:
   - *any
-- go: MaskedRotateLeft
+- go: RotateLeftMasked
   asm: "VPROLV[DQ]"
   in:
   - class: mask
@@ -1159,7 +1159,7 @@
   - *any
   out:
   - *any
-- go: MaskedRotateRight
+- go: RotateRightMasked
   asm: "VPRORV[DQ]"
   in:
   - class: mask
@@ -1169,7 +1169,7 @@
   - *any
 
 # Bizzare shifts.
-- go: MaskedShiftAllLeftAndFillUpperFrom
+- go: ShiftAllLeftAndFillUpperFromMasked
   asm: "VPSHLD[WDQ]"
   in:
   - class: mask
@@ -1178,7 +1178,7 @@
   - *pureImm
   out:
   - *any
-- go: MaskedShiftAllRightAndFillUpperFrom
+- go: ShiftAllRightAndFillUpperFromMasked
   asm: "VPSHRD[WDQ]"
   in:
   - class: mask
@@ -1187,7 +1187,7 @@
   - *pureImm
   out:
   - *any
-- go: MaskedShiftLeftAndFillUpperFrom
+- go: ShiftLeftAndFillUpperFromMasked
   asm: "VPSHLDV[WDQ]"
   in:
   - *any
@@ -1196,7 +1196,7 @@
   - *any
   out:
   - *any
-- go: MaskedShiftRightAndFillUpperFrom
+- go: ShiftRightAndFillUpperFromMasked
   asm: "VPSHRDV[WDQ]"
   in:
   - *any
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index e5ad5b82..52cfd1e8 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -283,7 +283,7 @@ func writeGoDefs(path string, cl unify.Closure) error {
 	typeMap := parseSIMDTypes(deduped)
 
 	formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go")
-	formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/stubs_amd64.go")
+	formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/ops_amd64.go")
 	formatWriteAndClose(writeSIMDTestsWrapper(deduped), path, "src/"+simdPackage+"/simd_wrapped_test.go")
 	formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go")
 	formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go")
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index e87ead1d..95775bb8 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -9,18 +9,18 @@
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedAdd adds corresponding elements of two vectors with saturation.
-- go: MaskedAdd
+- go: AddMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedAdd adds corresponding elements of two vectors.
-- go: MaskedSaturatedAdd
+    // AddMasked adds corresponding elements of two vectors.
+- go: SaturatedAddMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSaturatedAdd adds corresponding elements of two vectors with saturation.
+    // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: "false"
   extension: "AVX.*"
@@ -31,18 +31,18 @@
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedSub subtracts corresponding elements of two vectors with saturation.
-- go: MaskedSub
+- go: SubMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSub subtracts corresponding elements of two vectors.
-- go: MaskedSaturatedSub
+    // SubMasked subtracts corresponding elements of two vectors.
+- go: SaturatedSubMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSaturatedSub subtracts corresponding elements of two vectors with saturation.
+    // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
   commutative: "false"
   extension: "AVX.*"
diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml
index c2df1e2c..793bc489 100644
--- a/internal/simdgen/ops/AddSub/go.yaml
+++ b/internal/simdgen/ops/AddSub/go.yaml
@@ -8,7 +8,7 @@
   - *any
   out:
   - *any
-- go: MaskedAdd
+- go: AddMasked
   asm: "VPADD[BWDQ]|VADDP[SD]"
   in:
   - class: mask
@@ -35,7 +35,7 @@
   - *uint
   out:
   - *uint
-- go: MaskedSaturatedAdd
+- go: SaturatedAddMasked
   asm: "VPADDS[BWDQ]"
   in:
   - class: mask
@@ -43,7 +43,7 @@
   - *int
   out:
   - *int
-- go: MaskedSaturatedAdd
+- go: SaturatedAddMasked
   asm: "VPADDS[BWDQ]"
   in:
   - class: mask
@@ -60,7 +60,7 @@
   - *any
   out: &1any
   - *any
-- go: MaskedSub
+- go: SubMasked
   asm: "VPSUB[BWDQ]|VSUBP[SD]"
   in:
   - class: mask
@@ -83,7 +83,7 @@
   - *uint
   out:
   - *uint
-- go: MaskedSaturatedSub
+- go: SaturatedSubMasked
   asm: "VPSUBS[BWDQ]"
   in:
   - class: mask
@@ -91,7 +91,7 @@
   - *int
   out:
   - *int
-- go: MaskedSaturatedSub
+- go: SaturatedSubMasked
   asm: "VPSUBS[BWDQ]"
   in:
   - class: mask
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index 4d948364..1ef1d360 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -4,44 +4,44 @@
   extension: "AVX.*"
   documentation: !string |-
     // And performs a bitwise AND operation between two vectors.
-- go: MaskedAnd
+- go: AndMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedAnd performs a masked bitwise AND operation between two vectors.
+    // AndMasked performs a masked bitwise AND operation between two vectors.
 - go: Or
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // Or performs a bitwise OR operation between two vectors.
-- go: MaskedOr
+- go: OrMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedOr performs a masked bitwise OR operation between two vectors.
+    // OrMasked performs a masked bitwise OR operation between two vectors.
 - go: AndNot
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // AndNot performs a bitwise AND NOT operation between two vectors.
-- go: MaskedAndNot
+- go: AndNotMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedAndNot performs a masked bitwise AND NOT operation between two vectors.
+    // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
 - go: Xor
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // Xor performs a bitwise XOR operation between two vectors.
-- go: MaskedXor
+- go: XorMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedXor performs a masked bitwise XOR operation between two vectors.
+    // XorMasked performs a masked bitwise XOR operation between two vectors.
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml
index 79d149ca..49e2dbc9 100644
--- a/internal/simdgen/ops/BitwiseLogic/go.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/go.yaml
@@ -21,7 +21,7 @@
 # Dword and Qword.
 # TODO: should we wildcard other smaller elemBits to VPANDQ or
 # VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations.
-- go: MaskedAnd
+- go: AndMasked
   asm: "VPAND[DQ]"
   in:
   - class: mask
@@ -37,7 +37,7 @@
   - *any
   out:
   - *any
-- go: MaskedAndNot
+- go: AndNotMasked
   asm: "VPANDN[DQ]"
   in:
   - class: mask
@@ -53,7 +53,7 @@
   - *any
   out:
   - *any
-- go: MaskedOr
+- go: OrMasked
   asm: "VPOR[DQ]"
   in:
   - class: mask
@@ -69,7 +69,7 @@
   - *any
   out:
   - *any
-- go: MaskedXor
+- go: XorMasked
   asm: "VPXOR[DQ]"
   in:
   - class: mask
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index 3c607c76..3b021e4c 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -49,52 +49,52 @@
   documentation: !string |-
     // Greater compares for greater than.
 
-- go: MaskedEqual
+- go: EqualMasked
   constImm: 0
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedEqual compares for equality, masked.
-- go: MaskedLess
+    // EqualMasked compares for equality, masked.
+- go: LessMasked
   constImm: 1
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedLess compares for less than.
-- go: MaskedLessEqual
+    // LessMasked compares for less than.
+- go: LessEqualMasked
   constImm: 2
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedLessEqual compares for less than or equal.
-- go: MaskedIsNan # For float only.
+    // LessEqualMasked compares for less than or equal.
+- go: IsNanMasked # For float only.
   constImm: 3
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedIsNan checks if elements are NaN. Use as x.IsNan(x).
-- go: MaskedNotEqual
+    // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+- go: NotEqualMasked
   constImm: 4
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedNotEqual compares for inequality.
-- go: MaskedGreaterEqual
+    // NotEqualMasked compares for inequality.
+- go: GreaterEqualMasked
   constImm: 13
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedGreaterEqual compares for greater than or equal.
-- go: MaskedGreater
+    // GreaterEqualMasked compares for greater than or equal.
+- go: GreaterMasked
   constImm: 14
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedGreater compares for greater than.
+    // GreaterMasked compares for greater than.
diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml
index 2fc1f225..8e46cdbd 100644
--- a/internal/simdgen/ops/Compares/go.yaml
+++ b/internal/simdgen/ops/Compares/go.yaml
@@ -17,7 +17,7 @@
   in: *int2
   out:
   - *anyvregToMask
-- go: MaskedEqual
+- go: EqualMasked
   asm: "V?PCMPEQ[BWDQ]"
   in: &maskint2
   - class: mask
@@ -25,14 +25,14 @@
   - *int
   out:
   - class: mask
-- go: MaskedGreater
+- go: GreaterMasked
   asm: "V?PCMPGT[BWDQ]"
   in: *maskint2
   out:
   - class: mask
 # The const imm predicated compares after AVX512, please see categories.yaml
 # for const imm specification.
-- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked
   asm: "VPCMP[BWDQ]"
   in:
   - class: mask
@@ -44,7 +44,7 @@
     const: 0 # Just a placeholder, will be overwritten by const imm porting.
   out:
   - class: mask
-- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked
   asm: "VPCMPU[BWDQ]"
   in:
   - class: mask
@@ -71,7 +71,7 @@
   - go: $t # We still need the output to be the same shape as inputs.
     overwriteBase: int
     overwriteClass: mask
-- go: Masked(Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked
   asm: "VCMPP[SD]"
   in:
   - class: mask
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 51dfd04d..356b06d3 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -4,51 +4,51 @@
   extension: "AVX.*"
   documentation: !string |-
     // Div divides elements of two vectors.
-- go: MaskedDiv
+- go: DivMasked
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedDiv divides elements of two vectors.
+    // DivMasked divides elements of two vectors.
 - go: Sqrt
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // Sqrt computes the square root of each element.
-- go: MaskedSqrt
+- go: SqrtMasked
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSqrt computes the square root of each element.
+    // SqrtMasked computes the square root of each element.
 - go: ApproximateReciprocal
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocal computes an approximate reciprocal of each element.
-- go: MaskedApproximateReciprocal
+- go: ApproximateReciprocalMasked
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedApproximateReciprocal computes an approximate reciprocal of each element.
+    // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-- go: MaskedApproximateReciprocalOfSqrt
+- go: ApproximateReciprocalOfSqrtMasked
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
-- go: MaskedMulByPowOf2 # This operation is all after AVX512, the unmasked version will be generated.
+    // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
+- go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
   commutative: "false"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMulByPowOf2 multiplies elements by a power of 2.
+    // MulByPowOf2Masked multiplies elements by a power of 2.
 
 - go: Round
   commutative: "false"
@@ -56,20 +56,20 @@
   constImm: 0
   documentation: !string |-
     // Round rounds elements to the nearest integer.
-- go: MaskedRoundWithPrecision
+- go: RoundWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 0
   masked: "true"
   documentation: !string |-
-    // MaskedRoundWithPrecision rounds elements with specified precision.
-- go: MaskedDiffWithRoundWithPrecision
+    // RoundWithPrecisionMasked rounds elements with specified precision.
+- go: DiffWithRoundWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 0
   masked: "true"
   documentation: !string |-
-    // MaskedDiffWithRoundWithPrecision computes the difference after rounding with specified precision.
+    // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 
 - go: Floor
   commutative: "false"
@@ -77,20 +77,20 @@
   constImm: 1
   documentation: !string |-
     // Floor rounds elements down to the nearest integer.
-- go: MaskedFloorWithPrecision
+- go: FloorWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 1
   masked: "true"
   documentation: !string |-
-    // MaskedFloorWithPrecision rounds elements down with specified precision, masked.
-- go: MaskedDiffWithFloorWithPrecision
+    // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
+- go: DiffWithFloorWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 1
   masked: "true"
   documentation: !string |-
-    // MaskedDiffWithFloorWithPrecision computes the difference after flooring with specified precision.
+    // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 
 - go: Ceil
   commutative: "false"
@@ -98,20 +98,20 @@
   constImm: 2
   documentation: !string |-
     // Ceil rounds elements up to the nearest integer.
-- go: MaskedCeilWithPrecision
+- go: CeilWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 2
   masked: "true"
   documentation: !string |-
-    // MaskedCeilWithPrecision rounds elements up with specified precision, masked.
-- go: MaskedDiffWithCeilWithPrecision
+    // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
+- go: DiffWithCeilWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 2
   masked: "true"
   documentation: !string |-
-    // MaskedDiffWithCeilWithPrecision computes the difference after ceiling with specified precision.
+    // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 
 - go: Trunc
   commutative: "false"
@@ -119,20 +119,20 @@
   constImm: 3
   documentation: !string |-
     // Trunc truncates elements towards zero.
-- go: MaskedTruncWithPrecision
+- go: TruncWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 3
   masked: "true"
   documentation: !string |-
-    // MaskedTruncWithPrecision truncates elements with specified precision.
-- go: MaskedDiffWithTruncWithPrecision
+    // TruncWithPrecisionMasked truncates elements with specified precision.
+- go: DiffWithTruncWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
   constImm: 3
   masked: "true"
   documentation: !string |-
-    // MaskedDiffWithTruncWithPrecision computes the difference after truncating with specified precision.
+    // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 
 - go: AddSub
   commutative: "false"
diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml
index 48e071ec..29a7f43b 100644
--- a/internal/simdgen/ops/FPonlyArith/go.yaml
+++ b/internal/simdgen/ops/FPonlyArith/go.yaml
@@ -8,7 +8,7 @@
   - *fp
   out: &1fp
   - *fp
-- go: MaskedDiv
+- go: DivMasked
   asm: "V?DIVP[SD]"
   in: &1mask2fp
   - class: mask
@@ -19,13 +19,13 @@
   asm: "V?SQRTP[SD]"
   in: *1fp
   out: *1fp
-- go: MaskedSqrt
+- go: SqrtMasked
   asm: "V?SQRTP[SD]"
   in: &1mask1fp
   - class: mask
   - *fp
   out: *1fp
-- go: MaskedApproximateReciprocal
+- go: ApproximateReciprocalMasked
   asm: "VRCP14P[SD]"
   in: *1mask1fp
   out: *1fp
@@ -33,11 +33,11 @@
   asm: "V?RSQRTPS"
   in: *1fp
   out: *1fp
-- go: MaskedApproximateReciprocalOfSqrt
+- go: ApproximateReciprocalOfSqrtMasked
   asm: "VRSQRT14P[SD]"
   in: *1mask1fp
   out: *1fp
-- go: MaskedMulByPowOf2
+- go: MulByPowOf2Masked
   asm: "VSCALEFP[SD]"
   in: *1mask2fp
   out: *1fp
@@ -50,7 +50,7 @@
     const: 0 # place holder
   out: *1fp
 
-- go: "Masked(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
+- go: "(Round|Ceil|Floor|Trunc)WithPrecisionMasked"
   asm: "VRNDSCALEP[SD]"
   in:
   - class: mask
@@ -59,7 +59,7 @@
     const: 0 # place holder
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
   out: *1fp
-- go: "MaskedDiffWith(Round|Ceil|Floor|Trunc)(SuppressException)?WithPrecision"
+- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked"
   asm: "VREDUCEP[SD]"
   in:
   - class: mask
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
index 0b3978a4..84b64cc1 100644
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -1,27 +1,27 @@
 !sum
-- go: MaskedGaloisFieldAffineTransform
+- go: GaloisFieldAffineTransformMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8):
+    // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
-- go: MaskedGaloisFieldAffineTransformInversed
+- go: GaloisFieldAffineTransformInversedMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedGaloisFieldAffineTransform computes an affine transformation in GF(2^8),
+    // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8),
     // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
-- go: MaskedGaloisFieldMul
+- go: GaloisFieldMulMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedGaloisFieldMul computes element-wise GF(2^8) multiplication with
+    // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
     // reduction polynomial x^8 + x^4 + x^3 + x + 1.
\ No newline at end of file
diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml
index c4d02e17..84dc1619 100644
--- a/internal/simdgen/ops/GaloisField/go.yaml
+++ b/internal/simdgen/ops/GaloisField/go.yaml
@@ -1,5 +1,5 @@
 !sum
-- go: MaskedGaloisFieldAffineTransform
+- go: GaloisFieldAffineTransformMasked
   asm: VGF2P8AFFINEQB
   operandOrder: 2I # 2nd operand, then immediate
   in: &AffineArgs
@@ -18,14 +18,14 @@
   out:
   - *uint8
 
-- go: MaskedGaloisFieldAffineTransformInversed
+- go: GaloisFieldAffineTransformInversedMasked
   asm: VGF2P8AFFINEINVQB
   operandOrder: 2I # 2nd operand, then immediate
   in: *AffineArgs
   out:
   - *uint8
 
-- go: MaskedGaloisFieldMul
+- go: GaloisFieldMulMasked
   asm: VGF2P8MULB
   in:
   - class: mask
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
index b6c83bf3..96015d28 100644
--- a/internal/simdgen/ops/IntOnlyArith/categories.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml
@@ -4,12 +4,12 @@
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
     // Average computes the rounded average of corresponding elements.
-- go: MaskedAverage
+- go: AverageMasked
   commutative: "true"
   masked: "true"
   extension: "AVX512.*" # Masked operations are typically AVX512
   documentation: !string |-
-    // MaskedAverage computes the rounded average of corresponding elements.
+    // AverageMasked computes the rounded average of corresponding elements.
 
 - go: Absolute
   commutative: "false"
@@ -17,12 +17,12 @@
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
     // Absolute computes the absolute value of each element.
-- go: MaskedAbsolute
+- go: AbsoluteMasked
   commutative: "false"
   masked: "true"
   extension: "AVX512.*"
   documentation: !string |-
-    // MaskedAbsolute computes the absolute value of each element.
+    // AbsoluteMasked computes the absolute value of each element.
 
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
@@ -33,9 +33,9 @@
     // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
 
-- go: MaskedPopCount
+- go: PopCountMasked
   commutative: "false"
   masked: "true"
   extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
   documentation: !string |-
-    // MaskedPopCount counts the number of set bits in each element.
\ No newline at end of file
+    // PopCountMasked counts the number of set bits in each element.
\ No newline at end of file
diff --git a/internal/simdgen/ops/IntOnlyArith/go.yaml b/internal/simdgen/ops/IntOnlyArith/go.yaml
index e8aca3c6..3ccce6f0 100644
--- a/internal/simdgen/ops/IntOnlyArith/go.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/go.yaml
@@ -10,7 +10,7 @@
   - *uint_t
   out:
   - *uint_t
-- go: MaskedAverage
+- go: AverageMasked
   asm: "VPAVG[BW]"
   in:
   - class: mask
@@ -29,7 +29,7 @@
     base: int
   out:
   - *int_t # Output is magnitude, fits in the same signed type
-- go: MaskedAbsolute
+- go: AbsoluteMasked
   asm: "VPABS[BWDQ]"
   in:
   - class: mask
@@ -51,7 +51,7 @@
 # Population Count (count set bits in each element)
 # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
 #               VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
-- go: MaskedPopCount
+- go: PopCountMasked
   asm: "VPOPCNT[BWDQ]"
   in:
   - class: mask
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 54911b16..343b8f54 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -5,12 +5,12 @@
   documentation: !string |-
     // PairDotProd multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
-- go: MaskedPairDotProd
+- go: PairDotProdMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedPairDotProd multiplies the elements and add the pairs together,
+    // PairDotProdMasked multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: "false"
@@ -18,12 +18,12 @@
   documentation: !string |-
     // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
-- go: MaskedSaturatedUnsignedSignedPairDotProd
+- go: SaturatedUnsignedSignedPairDotProdMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
@@ -37,60 +37,60 @@
   extension: "AVX.*"
   documentation: !string |-
     // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
-- go: MaskedUnsignedSignedQuadDotProdAccumulate
+- go: UnsignedSignedQuadDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedUnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
-- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
+- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
 - go: PairDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
-- go: MaskedPairDotProdAccumulate
+- go: PairDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+    // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
 - go: SaturatedPairDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
-- go: MaskedSaturatedPairDotProdAccumulate
+- go: SaturatedPairDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedSaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
-- go: MaskedFusedMultiplyAdd
+    // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
+- go: FusedMultiplyAddMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplyAdd performs `(v1 * v2) + v3`.
-- go: MaskedFusedMultiplyAddSub
+    // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
+- go: FusedMultiplyAddSubMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
-- go: MaskedFusedMultiplySubAdd
+    // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
+- go: FusedMultiplySubAddMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedFusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
+    // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
index da894ac7..fb6b4fd1 100644
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -10,7 +10,7 @@
   - &int2 # The elemBits are different
     go: $t2
     base: int
-- go: MaskedPairDotProd
+- go: PairDotProdMasked
   asm: VPMADDWD
   in:
   - class: mask
@@ -29,7 +29,7 @@
     base: int
   out:
   - *int2
-- go: MaskedSaturatedUnsignedSignedPairDotProd
+- go: SaturatedUnsignedSignedPairDotProdMasked
   asm: VPMADDUBSW
   in:
   - class: mask
@@ -70,7 +70,7 @@
     overwriteElementBits: 8
   out:
   - *qdpa_acc
-- go: MaskedUnsignedSignedQuadDotProdAccumulate
+- go: UnsignedSignedQuadDotProdAccumulateMasked
   asm: "VPDPBUSD"
   in:
   - *qdpa_acc
@@ -87,7 +87,7 @@
   - *qdpa_src2
   out:
   - *qdpa_acc
-- go: MaskedSaturatedUnsignedSignedQuadDotProdAccumulate
+- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   asm: "VPDPBUSDS"
   in:
   - *qdpa_acc
@@ -113,7 +113,7 @@
     overwriteElementBits: 16
   out:
   - *pdpa_acc
-- go: MaskedPairDotProdAccumulate
+- go: PairDotProdAccumulateMasked
   asm: "VPDPWSSD"
   in:
   - *pdpa_acc
@@ -130,7 +130,7 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
-- go: MaskedSaturatedPairDotProdAccumulate
+- go: SaturatedPairDotProdAccumulateMasked
   asm: "VPDPWSSDS"
   in:
   - *pdpa_acc
@@ -139,7 +139,7 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
-- go: MaskedFusedMultiplyAdd
+- go: FusedMultiplyAddMasked
   asm: "VFMADD213PS|VFMADD213PD"
   in:
   - &fma_op
@@ -150,7 +150,7 @@
   - *fma_op
   out:
   - *fma_op
-- go: MaskedFusedMultiplyAddSub
+- go: FusedMultiplyAddSubMasked
   asm: "VFMADDSUB213PS|VFMADDSUB213PD"
   in:
   - *fma_op
@@ -159,7 +159,7 @@
   - *fma_op
   out:
   - *fma_op
-- go: MaskedFusedMultiplySubAdd
+- go: FusedMultiplySubAddMasked
   asm: "VFMSUBADD213PS|VFMSUBADD213PD"
   in:
   - *fma_op
diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml
index c64eb24e..33578ee4 100644
--- a/internal/simdgen/ops/MinMax/categories.yaml
+++ b/internal/simdgen/ops/MinMax/categories.yaml
@@ -4,20 +4,20 @@
   extension: "AVX.*"
   documentation: !string |-
     // Max computes the maximum of corresponding elements.
-- go: MaskedMax
+- go: MaxMasked
   commutative: "true"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMax computes the maximum of corresponding elements.
+    // MaxMasked computes the maximum of corresponding elements.
 - go: Min
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
     // Min computes the minimum of corresponding elements.
-- go: MaskedMin
+- go: MinMasked
   commutative: "true"
   masked: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMin computes the minimum of corresponding elements.
+    // MinMasked computes the minimum of corresponding elements.
diff --git a/internal/simdgen/ops/MinMax/go.yaml b/internal/simdgen/ops/MinMax/go.yaml
index f307e6b6..db4286f3 100644
--- a/internal/simdgen/ops/MinMax/go.yaml
+++ b/internal/simdgen/ops/MinMax/go.yaml
@@ -17,14 +17,14 @@
   - *uint
   out: &1uint
   - *uint
-- go: MaskedMax
+- go: MaxMasked
   asm: "V?PMAXS[BWDQ]"
   in: &1mask2int
   - class: mask
   - *int
   - *int
   out: *1int
-- go: MaskedMax
+- go: MaxMasked
   asm: "V?PMAXU[BWDQ]"
   in: &1mask2uint
   - class: mask
@@ -40,11 +40,11 @@
   asm: "V?PMINU[BWDQ]"
   in: *2uint
   out: *1uint
-- go: MaskedMin
+- go: MinMasked
   asm: "V?PMINS[BWDQ]"
   in: *1mask2int
   out: *1int
-- go: MaskedMin
+- go: MinMasked
   asm: "V?PMINU[BWDQ]"
   in: *1mask2uint
   out: *1uint
@@ -58,7 +58,7 @@
   - *float
   out: &1float
   - *float
-- go: MaskedMax
+- go: MaxMasked
   asm: "V?MAXP[SD]"
   in: &1mask2float
   - class: mask
@@ -69,7 +69,7 @@
   asm: "V?MINP[SD]"
   in: *2float
   out: *1float
-- go: MaskedMin
+- go: MinMasked
   asm: "V?MINP[SD]"
   in: *1mask2float
   out: *1float
\ No newline at end of file
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index c0f87beb..34b3ab56 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -20,28 +20,28 @@
   extension: "AVX.*"
   documentation: !string |-
     // MulLow multiplies elements and stores the low part of the result.
-- go: MaskedMul
+- go: MulMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMul multiplies corresponding elements of two vectors, masked.
-- go: MaskedMulEvenWiden
+    // MulMasked multiplies corresponding elements of two vectors, masked.
+- go: MulEvenWidenMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMulEvenWiden multiplies even-indexed elements, widening the result, masked.
+    // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
     // Result[i] = v1.Even[i] * v2.Even[i].
-- go: MaskedMulHigh
+- go: MulHighMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMulHigh multiplies elements and stores the high part of the result, masked.
-- go: MaskedMulLow
+    // MulHighMasked multiplies elements and stores the high part of the result, masked.
+- go: MulLowMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedMulLow multiplies elements and stores the low part of the result, masked.
+    // MulLowMasked multiplies elements and stores the low part of the result, masked.
diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml
index a75f4188..9ae3a850 100644
--- a/internal/simdgen/ops/Mul/go.yaml
+++ b/internal/simdgen/ops/Mul/go.yaml
@@ -10,7 +10,7 @@
   - *fp
   out:
   - *fp
-- go: MaskedMul
+- go: MulMasked
   asm: "VMULP[SD]"
   in:
   - class: mask
@@ -45,7 +45,7 @@
   - &uint2
     go: $t2
     base: uint
-- go: MaskedMulEvenWiden
+- go: MulEvenWidenMasked
   asm: "VPMULDQ"
   in:
   - class: mask
@@ -53,7 +53,7 @@
   - *int
   out:
   - *int2
-- go: MaskedMulEvenWiden
+- go: MulEvenWidenMasked
   asm: "VPMULUDQ"
   in:
   - class: mask
@@ -79,7 +79,7 @@
   - *uint
   out:
   - *uint2
-- go: MaskedMulHigh
+- go: MulHighMasked
   asm: "VPMULHW"
   in:
   - class: mask
@@ -87,7 +87,7 @@
   - *int
   out:
   - *int2
-- go: MaskedMulHigh
+- go: MulHighMasked
   asm: "VPMULHUW"
   in:
   - class: mask
@@ -106,7 +106,7 @@
   - *int
   out:
   - *int2
-- go: MaskedMulLow
+- go: MulLowMasked
   asm: "VPMULL[WDQ]"
   in:
   - class: mask
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
index 91a0e3d0..b8bcb28e 100644
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -5,39 +5,39 @@
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-- go: MaskedShiftAllLeft
+- go: ShiftAllLeftMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+    // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-- go: MaskedShiftAllRight
+- go: ShiftAllRightMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRightSignExtended
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-- go: MaskedShiftAllRightSignExtended
+- go: ShiftAllRightSignExtendedMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+    // ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 
 - go: ShiftLeft
   nameAndSizeCheck: "true"
@@ -45,98 +45,98 @@
   extension: "AVX.*"
   documentation: !string |-
     // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-- go: MaskedShiftLeft
+- go: ShiftLeftMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+    // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-- go: MaskedShiftRight
+- go: ShiftRightMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRightSignExtended
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-- go: MaskedShiftRightSignExtended
+- go: ShiftRightSignExtendedMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+    // ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 
-- go: MaskedRotateAllLeft
+- go: RotateAllLeftMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedRotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
-- go: MaskedRotateLeft
+    // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
+- go: RotateLeftMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedRotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-- go: MaskedRotateAllRight
+    // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+- go: RotateAllRightMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedRotateAllRight rotates each element to the right by the number of bits specified by the immediate.
-- go: MaskedRotateRight
+    // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
+- go: RotateRightMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedRotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
+    // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 
-- go: MaskedShiftAllLeftAndFillUpperFrom
+- go: ShiftAllLeftAndFillUpperFromMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+    // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-- go: MaskedShiftAllRightAndFillUpperFrom
+- go: ShiftAllRightAndFillUpperFromMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+    // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-- go: MaskedShiftLeftAndFillUpperFrom
+- go: ShiftLeftAndFillUpperFromMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+    // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-- go: MaskedShiftRightAndFillUpperFrom
+- go: ShiftRightAndFillUpperFromMasked
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // MaskedShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+    // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml
index 7205bab3..a42241db 100644
--- a/internal/simdgen/ops/ShiftRotate/go.yaml
+++ b/internal/simdgen/ops/ShiftRotate/go.yaml
@@ -11,7 +11,7 @@
     go: Uint64x2
   out:
   - *any
-- go: MaskedShiftAllLeft
+- go: ShiftAllLeftMasked
   asm: "VPSLL[WDQ]"
   in:
   - class: mask
@@ -26,7 +26,7 @@
   - *vecAsScalar64
   out:
   - *any
-- go: MaskedShiftAllRight
+- go: ShiftAllRightMasked
   asm: "VPSRL[WDQ]"
   in:
   - class: mask
@@ -43,7 +43,7 @@
   - *vecAsScalar64
   out:
   - *int
-- go: MaskedShiftAllRightSignExtended
+- go: ShiftAllRightSignExtendedMasked
   asm: "VPSRA[WDQ]"
   in:
   - class: mask
@@ -60,7 +60,7 @@
   - *any
   out:
   - *any
-- go: MaskedShiftLeft
+- go: ShiftLeftMasked
   asm: "VPSLLV[WD]"
   in:
   - class: mask
@@ -79,7 +79,7 @@
   - *anyOverwriteElemBits
   out:
   - *anyOverwriteElemBits
-- go: MaskedShiftLeft
+- go: ShiftLeftMasked
   asm: "VPSLLVQ"
   in:
   - class: mask
@@ -94,7 +94,7 @@
   - *any
   out:
   - *any
-- go: MaskedShiftRight
+- go: ShiftRightMasked
   asm: "VPSRLV[WD]"
   in:
   - class: mask
@@ -110,7 +110,7 @@
   - *anyOverwriteElemBits
   out:
   - *anyOverwriteElemBits
-- go: MaskedShiftRight
+- go: ShiftRightMasked
   asm: "VPSRLVQ"
   in:
   - class: mask
@@ -125,7 +125,7 @@
   - *any
   out:
   - *any
-- go: MaskedShiftRightSignExtended
+- go: ShiftRightSignExtendedMasked
   asm: "VPSRAV[WDQ]"
   in:
   - class: mask
@@ -135,7 +135,7 @@
   - *any
 
 # Rotate
-- go: MaskedRotateAllLeft
+- go: RotateAllLeftMasked
   asm: "VPROL[DQ]"
   in:
   - class: mask
@@ -145,7 +145,7 @@
     immOffset: 0
   out:
   - *any
-- go: MaskedRotateAllRight
+- go: RotateAllRightMasked
   asm: "VPROR[DQ]"
   in:
   - class: mask
@@ -153,7 +153,7 @@
   - *pureImm
   out:
   - *any
-- go: MaskedRotateLeft
+- go: RotateLeftMasked
   asm: "VPROLV[DQ]"
   in:
   - class: mask
@@ -161,7 +161,7 @@
   - *any
   out:
   - *any
-- go: MaskedRotateRight
+- go: RotateRightMasked
   asm: "VPRORV[DQ]"
   in:
   - class: mask
@@ -171,7 +171,7 @@
   - *any
 
 # Bizzare shifts.
-- go: MaskedShiftAllLeftAndFillUpperFrom
+- go: ShiftAllLeftAndFillUpperFromMasked
   asm: "VPSHLD[WDQ]"
   in:
   - class: mask
@@ -180,7 +180,7 @@
   - *pureImm
   out:
   - *any
-- go: MaskedShiftAllRightAndFillUpperFrom
+- go: ShiftAllRightAndFillUpperFromMasked
   asm: "VPSHRD[WDQ]"
   in:
   - class: mask
@@ -189,7 +189,7 @@
   - *pureImm
   out:
   - *any
-- go: MaskedShiftLeftAndFillUpperFrom
+- go: ShiftLeftAndFillUpperFromMasked
   asm: "VPSHLDV[WDQ]"
   in:
   - *any
@@ -198,7 +198,7 @@
   - *any
   out:
   - *any
-- go: MaskedShiftRightAndFillUpperFrom
+- go: ShiftRightAndFillUpperFromMasked
   asm: "VPSHRDV[WDQ]"
   in:
   - *any

From a3ce8a70821471585a478cd4d46099505dedc7f3 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 8 Jul 2025 20:28:25 +0000
Subject: [PATCH 126/200] internal/simdgen: rename register mask fp to v

This CL also makes a special case for VPSLL

This CL generates CL 686476.

Change-Id: I13b13901e44c123a02ad869ce0b84d052047b485
Reviewed-on: https://go-review.googlesource.com/c/arch/+/686556
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdMachineOps.go | 17 +++++++--
 internal/simdgen/gen_simdssa.go        | 48 +++++++++++++-------------
 internal/simdgen/gen_utility.go        |  8 ++---
 3 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 7e4f1d1a..509cafcf 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -13,7 +13,7 @@ import (
 const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 package main
 
-func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, fpgp regInfo) []opData {
+func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv regInfo) []opData {
 	return []opData{
 {{- range .OpsData }}
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
@@ -46,7 +46,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 		OpsDataImm []opData
 	}
 	seen := map[string]struct{}{}
-	regInfoSet := map[string]bool{"fp11": true, "fp21": true, "fp2k": true, "fp2kfp": true, "fp2kk": true, "fpkfp": true, "fp31": true, "fp3kfp": true, "fpgpfp": true, "fpgp": true}
+	regInfoSet := map[string]bool{"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true}
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
@@ -69,6 +69,19 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 		if err != nil {
 			panic(err)
 		}
+		idx, err := checkVecAsScalar(op)
+		if err != nil {
+			panic(err)
+		}
+		if idx != -1 {
+			if regInfo == "v21" {
+				regInfo = "vfpv"
+			} else if regInfo == "v2kv" {
+				regInfo = "vfpkv"
+			} else {
+				panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regInfo, op))
+			}
+		}
 		if _, ok := regInfoSet[regInfo]; !ok {
 			panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s.  Op is %s", regInfo, op))
 		}
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index ee30c8eb..d42b264b 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -60,26 +60,26 @@ type tplSSAData struct {
 func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 	var ZeroingMask []string
 	regInfoKeys := []string{
-		"fp11",
-		"fp21",
-		"fp2k",
-		"fp2kfp",
-		"fp2kk",
-		"fpkfp",
-		"fp31",
-		"fp3kfp",
-		"fp11Imm8",
-		"fpkfpImm8",
-		"fp21Imm8",
-		"fp2kImm8",
-		"fp2kkImm8",
-		"fp31ResultInArg0",
-		"fp3kfpResultInArg0",
-		"fpXfp",
-		"fpXkfp",
-		"fpgpfpImm8",
-		"fpgpImm8",
-		"fp2kfpImm8",
+		"v11",
+		"v21",
+		"v2k",
+		"v2kv",
+		"v2kk",
+		"vkv",
+		"v31",
+		"v3kv",
+		"v11Imm8",
+		"vkvImm8",
+		"v21Imm8",
+		"v2kImm8",
+		"v2kkImm8",
+		"v31ResultInArg0",
+		"v3kvResultInArg0",
+		"vfpv",
+		"vfpkv",
+		"vgpvImm8",
+		"vgpImm8",
+		"v2kvImm8",
 	}
 	regInfoSet := map[string][]string{}
 	for _, key := range regInfoKeys {
@@ -121,10 +121,10 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 			panic(err)
 		}
 		if idx != -1 {
-			if regShape == "fp21" {
-				regShape = "fpXfp"
-			} else if regShape == "fp2kfp" {
-				regShape = "fpXkfp"
+			if regShape == "v21" {
+				regShape = "vfpv"
+			} else if regShape == "v2kv" {
+				regShape = "vfpkv"
 			} else {
 				panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regShape, op))
 			}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 42aab212..2f25d420 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -266,17 +266,17 @@ func (op *Operation) regShape() (string, error) {
 
 	}
 
-	inRegs = rmAbbrev("fp", vRegInCnt)
+	inRegs = rmAbbrev("v", vRegInCnt)
 	inRegs += rmAbbrev("gp", gRegInCnt)
 	inMasks = rmAbbrev("k", kMaskInCnt)
 
-	outRegs = rmAbbrev("fp", vRegOutCnt)
+	outRegs = rmAbbrev("v", vRegOutCnt)
 	outRegs += rmAbbrev("gp", gRegOutCnt)
 	outMasks = rmAbbrev("k", kMaskOutCnt)
 
 	if kMaskInCnt == 0 && kMaskOutCnt == 0 && gRegInCnt == 0 && gRegOutCnt == 0 {
-		// For pure fp we can abbreviate it as fp%d%d.
-		regInfo = fmt.Sprintf("fp%d%d", vRegInCnt, vRegOutCnt)
+		// For pure v we can abbreviate it as v%d%d.
+		regInfo = fmt.Sprintf("v%d%d", vRegInCnt, vRegOutCnt)
 	} else if kMaskInCnt == 0 && kMaskOutCnt == 0 {
 		regInfo = fmt.Sprintf("%s%s", inRegs, outRegs)
 	} else {

From 098cba1c797e6f9d663cccb864393a05bb0fa87a Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Wed, 9 Jul 2025 04:09:07 +0000
Subject: [PATCH 127/200] internal/simdgen: make AVX512 op use upper registers

This CL generates CL 686695.

Change-Id: I3397d2b63f80fb797778fbcb84f22a6c9e09a5a2
Reviewed-on: https://go-review.googlesource.com/c/arch/+/686775
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdMachineOps.go | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 509cafcf..05a191c5 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -8,12 +8,13 @@ import (
 	"bytes"
 	"fmt"
 	"sort"
+	"strings"
 )
 
 const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 package main
 
-func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv regInfo) []opData {
+func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw regInfo) []opData {
 	return []opData{
 {{- range .OpsData }}
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
@@ -46,7 +47,9 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 		OpsDataImm []opData
 	}
 	seen := map[string]struct{}{}
-	regInfoSet := map[string]bool{"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true}
+	regInfoSet := map[string]bool{
+		"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true,
+		"w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true}
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
@@ -82,6 +85,10 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 				panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regInfo, op))
 			}
 		}
+		// Makes AVX512 operations use upper registers
+		if strings.Contains(op.Extension, "AVX512") {
+			regInfo = strings.ReplaceAll(regInfo, "v", "w")
+		}
 		if _, ok := regInfoSet[regInfo]; !ok {
 			panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s.  Op is %s", regInfo, op))
 		}

From c69f75918652a2d4ce0b652c65e7dfa948055631 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Wed, 9 Jul 2025 16:23:00 +0000
Subject: [PATCH 128/200] internal/simdgen: Int64x2 Greater and Uint* Equals

1. Fix XED data error for Int64x2 Greater compare, add overwrite defs.
2. Uint* equals could just use Int* equals, relaxed the defs.

This CL generates CL 686876.

Change-Id: Ib110e2547246a4e197348912c77793ab2bfc9466
Reviewed-on: https://go-review.googlesource.com/c/arch/+/686817
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/go.yaml              | 46 +++++++++++++++++++--------
 internal/simdgen/ops/Compares/go.yaml | 46 +++++++++++++++++++--------
 2 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 16dbf1e6..dd61308f 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -204,32 +204,52 @@
 # Ints
 - go: Equal
   asm: "V?PCMPEQ[BWDQ]"
-  in: &int2
-  - &int
+  in:
+  - &any
     go: $t
-    base: int # Looks like PCMP is on signed integers - but for equals does it really matters?
-  - *int
+  - *any
   out:
   - &anyvregToMask
-    go: $t # We still need the output to be the same shape as inputs.
+    go: $t
     overwriteBase: int
     overwriteClass: mask
 - go: Greater
   asm: "V?PCMPGT[BWDQ]"
-  in: *int2
+  in:
+  - &int
+    go: $t
+    base: int
+  - *int
   out:
   - *anyvregToMask
+# 256-bit VCMPGTQ's output elemBits is marked 32-bit in the XED data, we
+# believe this is an error, so add this definition to overwrite.
+- go: Greater
+  asm: "VPCMPGTQ"
+  in:
+  - &int64
+    go: $t
+    base: int
+    elemBits: 64
+  - *int64
+  out:
+  - base: int
+    elemBits: 32
+    overwriteElementBits: 64
 - go: EqualMasked
   asm: "V?PCMPEQ[BWDQ]"
-  in: &maskint2
+  in:
   - class: mask
-  - *int
-  - *int
+  - *any
+  - *any
   out:
   - class: mask
 - go: GreaterMasked
   asm: "V?PCMPGT[BWDQ]"
-  in: *maskint2
+  in:
+  - class: mask
+  - *int
+  - *int
   out:
   - class: mask
 # The const imm predicated compares after AVX512, please see categories.yaml
@@ -238,9 +258,7 @@
   asm: "VPCMP[BWDQ]"
   in:
   - class: mask
-  - &int
-    go: $t
-    base: int
+  - *int
   - *int
   - class: immediate
     const: 0 # Just a placeholder, will be overwritten by const imm porting.
@@ -270,7 +288,7 @@
   - class: immediate
     const: 0
   out:
-  - go: $t # We still need the output to be the same shape as inputs.
+  - go: $t
     overwriteBase: int
     overwriteClass: mask
 - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked
diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml
index 8e46cdbd..d8bef2d9 100644
--- a/internal/simdgen/ops/Compares/go.yaml
+++ b/internal/simdgen/ops/Compares/go.yaml
@@ -2,32 +2,52 @@
 # Ints
 - go: Equal
   asm: "V?PCMPEQ[BWDQ]"
-  in: &int2
-  - &int
+  in:
+  - &any
     go: $t
-    base: int # Looks like PCMP is on signed integers - but for equals does it really matters?
-  - *int
+  - *any
   out:
   - &anyvregToMask
-    go: $t # We still need the output to be the same shape as inputs.
+    go: $t
     overwriteBase: int
     overwriteClass: mask
 - go: Greater
   asm: "V?PCMPGT[BWDQ]"
-  in: *int2
+  in:
+  - &int
+    go: $t
+    base: int
+  - *int
   out:
   - *anyvregToMask
+# 256-bit VCMPGTQ's output elemBits is marked 32-bit in the XED data, we
+# believe this is an error, so add this definition to overwrite.
+- go: Greater
+  asm: "VPCMPGTQ"
+  in:
+  - &int64
+    go: $t
+    base: int
+    elemBits: 64
+  - *int64
+  out:
+  - base: int
+    elemBits: 32
+    overwriteElementBits: 64
 - go: EqualMasked
   asm: "V?PCMPEQ[BWDQ]"
-  in: &maskint2
+  in:
   - class: mask
-  - *int
-  - *int
+  - *any
+  - *any
   out:
   - class: mask
 - go: GreaterMasked
   asm: "V?PCMPGT[BWDQ]"
-  in: *maskint2
+  in:
+  - class: mask
+  - *int
+  - *int
   out:
   - class: mask
 # The const imm predicated compares after AVX512, please see categories.yaml
@@ -36,9 +56,7 @@
   asm: "VPCMP[BWDQ]"
   in:
   - class: mask
-  - &int
-    go: $t
-    base: int
+  - *int
   - *int
   - class: immediate
     const: 0 # Just a placeholder, will be overwritten by const imm porting.
@@ -68,7 +86,7 @@
   - class: immediate
     const: 0
   out:
-  - go: $t # We still need the output to be the same shape as inputs.
+  - go: $t
     overwriteBase: int
     overwriteClass: mask
 - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked

From f999474202702cf6673d03edb26d8ceb28d9eb16 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 8 Jul 2025 12:54:02 -0400
Subject: [PATCH 129/200] internal/simdgen: emit more "..." rules for SSA
 rewriter

This removes a bnunch of ssa rewrite generator warnings
and also generates better (shorter and more efficient)
rewrite rules.

Paired with go.simd CL 686495

Change-Id: I14643670c4fba7ac26f309cdbc45ac8c59d58a08
Reviewed-on: https://go-review.googlesource.com/c/arch/+/686378
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdrules.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index 6f84b912..c52ff50b 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -112,8 +112,6 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 			case PureVregIn:
 				tplName = "pureVreg"
 				data.GoType = *gOp.In[0].Go
-				data.Args = "..."
-				data.ArgsOut = "..."
 			case OneKmaskImmIn:
 				fallthrough
 			case OneKmaskIn:
@@ -149,6 +147,11 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 			}
 		}
 
+		if tplName == "pureVreg" && data.Args == data.ArgsOut {
+			data.Args = "..."
+			data.ArgsOut = "..."
+		}
+
 		data.tplName = tplName
 		allData = append(allData, data)
 	}

From 24c76b94abdf3b44badf7a0d05dd38132af44a82 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Wed, 9 Jul 2025 18:56:19 +0000
Subject: [PATCH 130/200] internal/simdgen: cleanup unneeded return value from
 shape

There is no difference between opNoImm and opNoImmConstMask

Change-Id: Ic4be860cf65d0b2f78ea39b7bcb3608267b42feb
Reviewed-on: https://go-review.googlesource.com/c/arch/+/686956
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdGenericOps.go |  2 +-
 internal/simdgen/gen_simdMachineOps.go |  2 +-
 internal/simdgen/gen_simdTypes.go      |  2 +-
 internal/simdgen/gen_simdrules.go      |  2 +-
 internal/simdgen/gen_simdssa.go        |  2 +-
 internal/simdgen/gen_utility.go        | 15 ++++++---------
 6 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index 1be01810..7fd04b7c 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -43,7 +43,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 	}
 	var opsData opData
 	for _, op := range ops {
-		_, _, _, immType, _, gOp := op.shape()
+		_, _, _, immType, gOp := op.shape()
 		genericNames := gOp.Go + *gOp.In[0].Go
 		gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative}
 		if immType == VarImm || immType == ConstVarImm {
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 05a191c5..cca7d945 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -53,7 +53,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 	opsData := make([]opData, 0)
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
-		shapeIn, shapeOut, maskType, _, _, gOp := op.shape()
+		shapeIn, shapeOut, maskType, _, gOp := op.shape()
 
 		asm := gOp.Asm
 		if maskType == OneMask {
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 7dcbc145..552bf51d 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -272,7 +272,7 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 	opsByShape := make(map[string]opData)
 	opsSkipped := map[string]struct{}{}
 	for _, o := range ops {
-		_, _, _, immType, _, gOp := o.shape()
+		_, _, _, immType, gOp := o.shape()
 
 		if immType == VarImm || immType == ConstVarImm {
 			// Operations with variable immediates should be called directly
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index c52ff50b..bddcab43 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -65,7 +65,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 	var allData []tplRuleData
 
 	for _, opr := range ops {
-		opInShape, opOutShape, maskType, immType, _, gOp := opr.shape()
+		opInShape, opOutShape, maskType, immType, gOp := opr.shape()
 
 		vregInCnt := len(gOp.In)
 		asm := gOp.Asm
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index d42b264b..b664b0f4 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -90,7 +90,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 	allUnseen := make(map[string][]Operation)
 	for _, op := range ops {
 		asm := op.Asm
-		shapeIn, shapeOut, maskType, _, _, gOp := op.shape()
+		shapeIn, shapeOut, maskType, _, gOp := op.shape()
 
 		if maskType == 2 {
 			asm += "Masked"
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 2f25d420..d5c2492e 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -117,11 +117,10 @@ const (
 // and modified versions of the op:
 //
 // opNoImm is op with its inputs excluding the const imm.
-// opNoConstImmMask is op with its inputs excluding the const imm and mask.
 //
 // This function does not modify op.
 func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskShape, immType immShape,
-	opNoImm Operation, opNoImmConstMask Operation) {
+	opNoImm Operation) {
 	if len(op.Out) > 1 {
 		panic(fmt.Errorf("simdgen only supports 1 output: %s", op))
 	}
@@ -168,14 +167,12 @@ func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskS
 		}
 	}
 	opNoImm = *op
-	opNoImmConstMask = *op
 
 	removeImm := func(o *Operation) {
 		o.In = o.In[1:]
 	}
 	if hasImm {
 		removeImm(&opNoImm)
-		removeImm(&opNoImmConstMask)
 		if op.In[0].Const != nil {
 			if op.In[0].ImmOffset != nil {
 				immType = ConstVarImm
@@ -231,7 +228,7 @@ func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskS
 
 // regShape returns a string representation of the register shape.
 func (op *Operation) regShape() (string, error) {
-	_, _, _, _, _, gOp := op.shape()
+	_, _, _, _, gOp := op.shape()
 	var regInfo string
 	var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int
 	for _, in := range gOp.In {
@@ -431,7 +428,7 @@ var classes []string = []string{"BAD0", "op1", "op2", "op3", "op4"}
 // The classification string is used to select a template or a clause of a template
 // for intrinsics declaration and the ssagen intrinisics glue code in the compiler.
 func classifyOp(op Operation) (string, Operation, error) {
-	_, _, _, immType, _, gOp := op.shape()
+	_, _, _, immType, gOp := op.shape()
 
 	var class string
 
@@ -515,7 +512,7 @@ func splitMask(ops []Operation) ([]Operation, error) {
 		if op.Masked == nil || *op.Masked != "true" {
 			continue
 		}
-		shapeIn, _, _, _, _, _ := op.shape()
+		shapeIn, _, _, _, _ := op.shape()
 
 		if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn {
 			op2 := op
@@ -544,7 +541,7 @@ func splitMask(ops []Operation) ([]Operation, error) {
 func dedupGodef(ops []Operation) ([]Operation, error) {
 	seen := map[string][]Operation{}
 	for _, op := range ops {
-		_, _, _, _, _, gOp := op.shape()
+		_, _, _, _, gOp := op.shape()
 
 		genericNames := gOp.Go + *gOp.In[0].Go
 		seen[genericNames] = append(seen[genericNames], op)
@@ -588,7 +585,7 @@ func copyConstImm(ops []Operation) error {
 		if op.ConstImm == nil {
 			continue
 		}
-		_, _, _, immType, _, _ := op.shape()
+		_, _, _, immType, _ := op.shape()
 
 		if immType == ConstImm || immType == ConstVarImm {
 			op.In[0].Const = op.ConstImm

From 2de10e9ab058ee449ec2d60ee71e7f3910526c1e Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Wed, 9 Jul 2025 21:15:38 +0000
Subject: [PATCH 131/200] internal/simdgen: fix Int64x2 Greater output type to
 mask

This CL generates CL 686998.

Change-Id: I050a79b01a089102ff2e8b1d1f7340e3b8c83b1a
Reviewed-on: https://go-review.googlesource.com/c/arch/+/686821
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_utility.go       | 16 ++++++++--------
 internal/simdgen/go.yaml              |  2 ++
 internal/simdgen/ops/Compares/go.yaml |  2 ++
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index d5c2492e..75a8713f 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -615,6 +615,14 @@ func capitalizeFirst(s string) string {
 func overwrite(ops []Operation) error {
 	hasClassOverwrite := false
 	overwrite := func(op []Operand, idx int, o Operation) error {
+		if op[idx].OverwriteElementBits != nil {
+			if op[idx].ElemBits == nil {
+				panic(fmt.Errorf("ElemBits is nil at operand %d of %v", idx, o))
+			}
+			*op[idx].ElemBits = *op[idx].OverwriteElementBits
+			*op[idx].Lanes = *op[idx].Bits / *op[idx].ElemBits
+			*op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Lanes)
+		}
 		if op[idx].OverwriteClass != nil {
 			if op[idx].OverwriteBase == nil {
 				panic(fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx]))
@@ -639,14 +647,6 @@ func overwrite(ops []Operation) error {
 			*op[idx].Go = strings.ReplaceAll(*op[idx].Go, capitalizeFirst(*op[idx].Base), capitalizeFirst(oBase))
 			*op[idx].Base = oBase
 		}
-		if op[idx].OverwriteElementBits != nil {
-			if op[idx].ElemBits == nil {
-				panic(fmt.Errorf("ElemBits is nil at operand %d of %v", idx, o))
-			}
-			*op[idx].ElemBits = *op[idx].OverwriteElementBits
-			*op[idx].Lanes = *op[idx].Bits / *op[idx].ElemBits
-			*op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Lanes)
-		}
 		return nil
 	}
 	for i, o := range ops {
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index dd61308f..cc097f8f 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -236,6 +236,8 @@
   - base: int
     elemBits: 32
     overwriteElementBits: 64
+    overwriteClass: mask
+    overwriteBase: int
 - go: EqualMasked
   asm: "V?PCMPEQ[BWDQ]"
   in:
diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml
index d8bef2d9..c1ea2061 100644
--- a/internal/simdgen/ops/Compares/go.yaml
+++ b/internal/simdgen/ops/Compares/go.yaml
@@ -34,6 +34,8 @@
   - base: int
     elemBits: 32
     overwriteElementBits: 64
+    overwriteClass: mask
+    overwriteBase: int
 - go: EqualMasked
   asm: "V?PCMPEQ[BWDQ]"
   in:

From 8033e000cd3118dfd40e031dc3603b80f9ea1bae Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 11 Jul 2025 02:10:18 +0000
Subject: [PATCH 132/200] internal/simdgen: fix documentations

This CL fixes some errors of op name in the documentation, make sure
they are consistent; This CL also fix the documentation for masked
operations.

This CL generates CL 687376.

Change-Id: I272de4ae9043345f33c4417c92cc542abfbdc127
Reviewed-on: https://go-review.googlesource.com/c/arch/+/687415
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml                 | 11 ++++++-----
 internal/simdgen/gen_utility.go                  |  5 ++---
 internal/simdgen/go.yaml                         |  2 +-
 internal/simdgen/ops/GaloisField/categories.yaml |  6 +++---
 internal/simdgen/ops/GaloisField/go.yaml         |  2 +-
 internal/simdgen/ops/MLOps/categories.yaml       |  5 +++--
 6 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index bfb0ff80..942d4d41 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -361,13 +361,13 @@
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
-- go: GaloisFieldAffineTransformInversedMasked
+- go: GaloisFieldAffineTransformInverseMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8),
-    // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
+    // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
+    // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
@@ -431,18 +431,19 @@
   documentation: !string |-
     // PairDotProdMasked multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
+# TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProdMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
+    // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 75a8713f..dbd7d6aa 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -523,9 +523,8 @@ func splitMask(ops []Operation) ([]Operation, error) {
 			}
 			maskedOpName := op2.Go
 			op2.Go = strings.TrimSuffix(op2.Go, "Masked")
-			if op2.Documentation != nil {
-				*op2.Documentation = strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go)
-			}
+			op2Doc := strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go)
+			op2.Documentation = &op2Doc
 			splited = append(splited, op2)
 		} else {
 			return nil, fmt.Errorf("simdgen only recognizes masked operations with exactly one mask input: %s", op)
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index cc097f8f..4828bf01 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -399,7 +399,7 @@
   out:
   - *uint8
 
-- go: GaloisFieldAffineTransformInversedMasked
+- go: GaloisFieldAffineTransformInverseMasked
   asm: VGF2P8AFFINEINVQB
   operandOrder: 2I # 2nd operand, then immediate
   in: *AffineArgs
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
index 84b64cc1..3caa13cf 100644
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -8,13 +8,13 @@
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
-- go: GaloisFieldAffineTransformInversedMasked
+- go: GaloisFieldAffineTransformInverseMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8),
-    // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1:
+    // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
+    // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml
index 84dc1619..68875d17 100644
--- a/internal/simdgen/ops/GaloisField/go.yaml
+++ b/internal/simdgen/ops/GaloisField/go.yaml
@@ -18,7 +18,7 @@
   out:
   - *uint8
 
-- go: GaloisFieldAffineTransformInversedMasked
+- go: GaloisFieldAffineTransformInverseMasked
   asm: VGF2P8AFFINEINVQB
   operandOrder: 2I # 2nd operand, then immediate
   in: *AffineArgs
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 343b8f54..17c318a9 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -12,18 +12,19 @@
   documentation: !string |-
     // PairDotProdMasked multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
+# TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProdMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
+    // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.

From 505fade1723f01045821dc090b32006ec40f7078 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 11 Jul 2025 02:14:46 +0000
Subject: [PATCH 133/200] internal/simdgen: change imm param name to constant

This CL generates CL 687377.

Change-Id: I7d0c742aad91f2e6219ac90137a6e7adc6cd48bc
Reviewed-on: https://go-review.googlesource.com/c/arch/+/687416
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdTypes.go        | 12 ++++++++++++
 internal/simdgen/gen_utility.go          |  5 ++---
 internal/simdgen/go.yaml                 |  5 +++++
 internal/simdgen/ops/FPonlyArith/go.yaml |  2 ++
 internal/simdgen/ops/Moves/go.yaml       |  2 ++
 internal/simdgen/ops/ShiftRotate/go.yaml |  1 +
 6 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 552bf51d..d8a4de63 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -148,6 +148,8 @@ func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType
 {{define "op1Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
+// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+//
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
 {{end}}
@@ -155,6 +157,8 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
 {{define "op2Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
+// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+//
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
 {{end}}
@@ -162,6 +166,8 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}
 {{define "op2Imm8_2I"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
+// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+//
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
 {{end}}
@@ -170,6 +176,8 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint
 {{define "op3Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
+// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+//
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
 {{end}}
@@ -177,6 +185,8 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}
 {{define "op3Imm8_2I"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
+// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+//
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
 {{end}}
@@ -185,6 +195,8 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint
 {{define "op4Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
+// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+//
 // Asm: {{.Asm}}, CPU Feature: {{.Extension}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
 {{end}}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index dbd7d6aa..fc5f296b 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -342,10 +342,9 @@ func (op Operation) GoType() string {
 
 // ImmName returns the name to use for an operation's immediate operand.
 // This can be overriden in the yaml with "name" on an operand,
-// otherwise, for now, it is "imm" but
-// TODO come up with a better default immediate parameter name.
+// otherwise, for now, "constant"
 func (op Operation) ImmName() string {
-	return op.Op0Name("imm")
+	return op.Op0Name("constant")
 }
 
 func (o Operand) OpName(s string) string {
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 4828bf01..b1401bde 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -362,6 +362,7 @@
   - class: immediate
     const: 0 # place holder
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
+    name: prec
   out: *1fp
 - go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked"
   asm: "VREDUCEP[SD]"
@@ -371,6 +372,7 @@
   - class: immediate
     const: 0 # place holder
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
+    name: prec
   out: *1fp
 
 - go: "AddSub"
@@ -729,6 +731,7 @@
   - &imm
     class: immediate
     immOffset: 0
+    name: index
   out:
   - *t
 
@@ -761,6 +764,7 @@
   - &imm01 # This immediate should be only 0 or 1
     class: immediate
     immOffset: 0
+    name: index
   out:
   - *i8x32
 
@@ -1161,6 +1165,7 @@
   - &pureImm
     class: immediate
     immOffset: 0
+    name: shift
   out:
   - *any
 - go: RotateAllRightMasked
diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml
index 29a7f43b..d35610df 100644
--- a/internal/simdgen/ops/FPonlyArith/go.yaml
+++ b/internal/simdgen/ops/FPonlyArith/go.yaml
@@ -58,6 +58,7 @@
   - class: immediate
     const: 0 # place holder
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
+    name: prec
   out: *1fp
 - go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked"
   asm: "VREDUCEP[SD]"
@@ -67,6 +68,7 @@
   - class: immediate
     const: 0 # place holder
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
+    name: prec
   out: *1fp
 
 - go: "AddSub"
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index e6cd40f6..dd9ae79d 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -11,6 +11,7 @@
   - &imm
     class: immediate
     immOffset: 0
+    name: index
   out:
   - *t
 
@@ -43,6 +44,7 @@
   - &imm01 # This immediate should be only 0 or 1
     class: immediate
     immOffset: 0
+    name: index
   out:
   - *i8x32
 
diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml
index a42241db..398047f2 100644
--- a/internal/simdgen/ops/ShiftRotate/go.yaml
+++ b/internal/simdgen/ops/ShiftRotate/go.yaml
@@ -143,6 +143,7 @@
   - &pureImm
     class: immediate
     immOffset: 0
+    name: shift
   out:
   - *any
 - go: RotateAllRightMasked

From d3b287a03afd8f9f0d27bb4b7a20ca296468741a Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 11 Jul 2025 17:57:56 +0000
Subject: [PATCH 134/200] internal/simdgen: adjust Shift.* operations

This CL does:
1. Removes ShiftRightSignExtended, default signed vectors to shift
   arithmetic, and unsigned to shift logical.
2. Add the missing Shifts which were left out by YAML error in the
   generator.

This CL generates CL 687596.

Change-Id: I42e21d12cb64e325fe15f44d732353fd6b3b0bf5
Reviewed-on: https://go-review.googlesource.com/c/arch/+/687595
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml              | 24 ++++---
 internal/simdgen/go.yaml                      | 69 +++++++++++--------
 .../simdgen/ops/ShiftRotate/categories.yaml   | 24 ++++---
 internal/simdgen/ops/ShiftRotate/go.yaml      | 69 +++++++++++--------
 4 files changed, 116 insertions(+), 70 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 942d4d41..802dc9eb 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -616,31 +616,35 @@
   documentation: !string |-
     // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
+  signed: false
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRightMasked
+  signed: false
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-- go: ShiftAllRightSignExtended
+- go: ShiftAllRight
+  signed: true
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-- go: ShiftAllRightSignExtendedMasked
+    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+- go: ShiftAllRightMasked
+  signed: true
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 
 - go: ShiftLeft
   nameAndSizeCheck: "true"
@@ -656,31 +660,35 @@
   documentation: !string |-
     // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
+  signed: false
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRightMasked
+  signed: false
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-- go: ShiftRightSignExtended
+- go: ShiftRight
+  signed: true
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-- go: ShiftRightSignExtendedMasked
+    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+- go: ShiftRightMasked
+  signed: true
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 
 - go: RotateAllLeftMasked
   nameAndSizeCheck: "true"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index b1401bde..27c45900 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -1029,8 +1029,8 @@
   - &any
     go: $t
   - &vecAsScalar64
+    go: "Uint.*"
     treatLikeAScalarOfSize: 64
-    go: Uint64x2
   out:
   - *any
 - go: ShiftAllLeftMasked
@@ -1042,21 +1042,26 @@
   out:
   - *any
 - go: ShiftAllRight
+  signed: false
   asm: "VPSRL[WDQ]"
   in:
-  - *any
+  - &uint
+    go: $t
+    base: uint
   - *vecAsScalar64
   out:
-  - *any
+  - *uint
 - go: ShiftAllRightMasked
+  signed: false
   asm: "VPSRL[WDQ]"
   in:
   - class: mask
-  - *any
+  - *uint
   - *vecAsScalar64
   out:
-  - *any
-- go: ShiftAllRightSignExtended
+  - *uint
+- go: ShiftAllRight
+  signed: true
   asm: "VPSRA[WDQ]"
   in:
   - &int
@@ -1065,7 +1070,8 @@
   - *vecAsScalar64
   out:
   - *int
-- go: ShiftAllRightSignExtendedMasked
+- go: ShiftAllRightMasked
+  signed: true
   asm: "VPSRA[WDQ]"
   in:
   - class: mask
@@ -1110,51 +1116,60 @@
   out:
   - *anyOverwriteElemBits
 - go: ShiftRight
+  signed: false
   asm: "VPSRLV[WD]"
   in:
-  - *any
-  - *any
+  - *uint
+  - *uint
   out:
-  - *any
+  - *uint
 - go: ShiftRightMasked
+  signed: false
   asm: "VPSRLV[WD]"
   in:
   - class: mask
-  - *any
-  - *any
+  - *uint
+  - *uint
   out:
-  - *any
+  - *uint
 # XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
 - go: ShiftRight
+  signed: false
   asm: "VPSRLVQ"
   in:
-  - *anyOverwriteElemBits
-  - *anyOverwriteElemBits
+  - &uintOverwriteElemBits
+    go: $t
+    base: uint
+    overwriteElementBits: 64
+  - *uintOverwriteElemBits
   out:
-  - *anyOverwriteElemBits
+  - *uintOverwriteElemBits
 - go: ShiftRightMasked
+  signed: false
   asm: "VPSRLVQ"
   in:
   - class: mask
-  - *anyOverwriteElemBits
-  - *anyOverwriteElemBits
+  - *uintOverwriteElemBits
+  - *uintOverwriteElemBits
   out:
-  - *anyOverwriteElemBits
-- go: ShiftRightSignExtended
+  - *uintOverwriteElemBits
+- go: ShiftRight
+  signed: true
   asm: "VPSRAV[WDQ]"
   in:
-  - *any
-  - *any
+  - *int
+  - *int
   out:
-  - *any
-- go: ShiftRightSignExtendedMasked
+  - *int
+- go: ShiftRightMasked
+  signed: true
   asm: "VPSRAV[WDQ]"
   in:
   - class: mask
-  - *any
-  - *any
+  - *int
+  - *int
   out:
-  - *any
+  - *int
 
 # Rotate
 - go: RotateAllLeftMasked
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
index b8bcb28e..09c04dfa 100644
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -13,31 +13,35 @@
   documentation: !string |-
     // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
+  signed: false
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRightMasked
+  signed: false
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-- go: ShiftAllRightSignExtended
+- go: ShiftAllRight
+  signed: true
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-- go: ShiftAllRightSignExtendedMasked
+    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+- go: ShiftAllRightMasked
+  signed: true
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 
 - go: ShiftLeft
   nameAndSizeCheck: "true"
@@ -53,31 +57,35 @@
   documentation: !string |-
     // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
+  signed: false
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRightMasked
+  signed: false
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-- go: ShiftRightSignExtended
+- go: ShiftRight
+  signed: true
   nameAndSizeCheck: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-- go: ShiftRightSignExtendedMasked
+    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+- go: ShiftRightMasked
+  signed: true
   nameAndSizeCheck: "true"
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 
 - go: RotateAllLeftMasked
   nameAndSizeCheck: "true"
diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml
index 398047f2..637de935 100644
--- a/internal/simdgen/ops/ShiftRotate/go.yaml
+++ b/internal/simdgen/ops/ShiftRotate/go.yaml
@@ -7,8 +7,8 @@
   - &any
     go: $t
   - &vecAsScalar64
+    go: "Uint.*"
     treatLikeAScalarOfSize: 64
-    go: Uint64x2
   out:
   - *any
 - go: ShiftAllLeftMasked
@@ -20,21 +20,26 @@
   out:
   - *any
 - go: ShiftAllRight
+  signed: false
   asm: "VPSRL[WDQ]"
   in:
-  - *any
+  - &uint
+    go: $t
+    base: uint
   - *vecAsScalar64
   out:
-  - *any
+  - *uint
 - go: ShiftAllRightMasked
+  signed: false
   asm: "VPSRL[WDQ]"
   in:
   - class: mask
-  - *any
+  - *uint
   - *vecAsScalar64
   out:
-  - *any
-- go: ShiftAllRightSignExtended
+  - *uint
+- go: ShiftAllRight
+  signed: true
   asm: "VPSRA[WDQ]"
   in:
   - &int
@@ -43,7 +48,8 @@
   - *vecAsScalar64
   out:
   - *int
-- go: ShiftAllRightSignExtendedMasked
+- go: ShiftAllRightMasked
+  signed: true
   asm: "VPSRA[WDQ]"
   in:
   - class: mask
@@ -88,51 +94,60 @@
   out:
   - *anyOverwriteElemBits
 - go: ShiftRight
+  signed: false
   asm: "VPSRLV[WD]"
   in:
-  - *any
-  - *any
+  - *uint
+  - *uint
   out:
-  - *any
+  - *uint
 - go: ShiftRightMasked
+  signed: false
   asm: "VPSRLV[WD]"
   in:
   - class: mask
-  - *any
-  - *any
+  - *uint
+  - *uint
   out:
-  - *any
+  - *uint
 # XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
 - go: ShiftRight
+  signed: false
   asm: "VPSRLVQ"
   in:
-  - *anyOverwriteElemBits
-  - *anyOverwriteElemBits
+  - &uintOverwriteElemBits
+    go: $t
+    base: uint
+    overwriteElementBits: 64
+  - *uintOverwriteElemBits
   out:
-  - *anyOverwriteElemBits
+  - *uintOverwriteElemBits
 - go: ShiftRightMasked
+  signed: false
   asm: "VPSRLVQ"
   in:
   - class: mask
-  - *anyOverwriteElemBits
-  - *anyOverwriteElemBits
+  - *uintOverwriteElemBits
+  - *uintOverwriteElemBits
   out:
-  - *anyOverwriteElemBits
-- go: ShiftRightSignExtended
+  - *uintOverwriteElemBits
+- go: ShiftRight
+  signed: true
   asm: "VPSRAV[WDQ]"
   in:
-  - *any
-  - *any
+  - *int
+  - *int
   out:
-  - *any
-- go: ShiftRightSignExtendedMasked
+  - *int
+- go: ShiftRightMasked
+  signed: true
   asm: "VPSRAV[WDQ]"
   in:
   - class: mask
-  - *any
-  - *any
+  - *int
+  - *int
   out:
-  - *any
+  - *int
 
 # Rotate
 - go: RotateAllLeftMasked

From 17f47198bffe0d3559e613f7e5b39ecd1c45d4b3 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 11 Jul 2025 20:03:26 +0000
Subject: [PATCH 135/200] internal/simdgen: updates CPU Feature in doc

This CL picks ISA set whenever available for CPU Feature, otherwise
picks Extension.

This CL generates CL 687675.

Change-Id: I6eebd730c65dad8e3557b5bbd2fbb2de01bd18f7
Reviewed-on: https://go-review.googlesource.com/c/arch/+/687655
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdTypes.go | 24 +++++++-------
 internal/simdgen/gen_utility.go   | 52 ++++++++++++++++++++++++++++---
 internal/simdgen/godefs.go        |  8 +++++
 internal/simdgen/xed.go           |  3 +-
 4 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index d8a4de63..412c7126 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -106,42 +106,42 @@ package simd
 {{define "op1"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 0).Go}}) {{.Go}}() {{.GoType}}
 {{end}}
 
 {{define "op2"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
 {{end}}
 
 {{define "op3"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
 {{end}}
 
 {{define "op2VecAsScalar"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}}
 {{end}}
 
 {{define "op3VecAsScalar"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}}
 {{end}}
 
 {{define "op4"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
 {{end}}
 
@@ -150,7 +150,7 @@ func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType
 //{{end}}
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
 {{end}}
 
@@ -159,7 +159,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
 //{{end}}
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
 {{end}}
 
@@ -168,7 +168,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}
 //{{end}}
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
 {{end}}
 
@@ -178,7 +178,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint
 //{{end}}
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
 {{end}}
 
@@ -187,7 +187,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}
 //{{end}}
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
 {{end}}
 
@@ -197,7 +197,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint
 //{{end}}
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: {{.Asm}}, CPU Feature: {{.Extension}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
 {{end}}
 
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index fc5f296b..25503510 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -502,6 +502,44 @@ func dedup(ops []Operation) (deduped []Operation) {
 	return
 }
 
+func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation) {
+	for _, op := range ops {
+		if op.ISASet == "" {
+			newS := op.Extension
+			op.CPUFeature = &newS
+		} else {
+			newS := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(op.ISASet, "_128"), "_256"), "_512")
+			newS = strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(newS, "_128N"), "_256N"), "_512N")
+			op.CPUFeature = &newS
+		}
+		if *op.CPUFeature == "AVX" || *op.CPUFeature == "AVX2" || strings.HasPrefix(*op.CPUFeature, "AVX512") ||
+			strings.HasPrefix(*op.CPUFeature, "AVX_") || strings.HasPrefix(*op.CPUFeature, "AVX2_") {
+			// This excludes instructions from CPU Features like AVX10.1, which usually are rebrandings of AVX512.
+			filled = append(filled, op)
+			if strings.Contains(*op.CPUFeature, "_") {
+				*op.CPUFeature = strings.ReplaceAll(*op.CPUFeature, "_", "")
+			}
+		} else {
+			excluded = append(excluded, op)
+		}
+	}
+	// Sanity check, make sure we are not excluding the only definition of an operation
+	filledSeen := map[string]struct{}{}
+	excludedSeen := map[string]Operation{}
+	for _, op := range filled {
+		filledSeen[op.Go+*op.In[0].Go] = struct{}{}
+	}
+	for _, op := range excluded {
+		excludedSeen[op.Go+*op.In[0].Go] = op
+	}
+	for k, op := range excludedSeen {
+		if _, ok := filledSeen[k]; !ok {
+			panic(fmt.Sprintf("simdgen is excluding the only def of op: %s", op))
+		}
+	}
+	return
+}
+
 // splitMask splits operations with a single mask vreg input to be masked and unmasked(const: K0).
 // It also remove the "Masked" keyword from the name.
 func splitMask(ops []Operation) ([]Operation, error) {
@@ -561,13 +599,15 @@ func dedupGodef(ops []Operation) ([]Operation, error) {
 	deduped := []Operation{}
 	for _, dup := range seen {
 		if len(dup) > 1 {
-			sort.Slice(dup, func(i, j int) bool {
+			slices.SortFunc(dup, func(i, j Operation) int {
 				// Put non-AVX512 candidates at the beginning
-				if !isAVX512(dup[i]) && isAVX512(dup[j]) {
-					return true
+				if !isAVX512(i) && isAVX512(j) {
+					return -1
+				}
+				if isAVX512(i) && !isAVX512(j) {
+					return 1
 				}
-				// TODO: make the sorting logic finer-grained.
-				return false
+				return strings.Compare(*i.CPUFeature, *j.CPUFeature)
 			})
 		}
 		deduped = append(deduped, dup[0])
@@ -741,6 +781,8 @@ func (o Operation) String() string {
 	str("Asm", o.Asm)
 	str("Commutative", o.Commutative)
 	str("Extension", o.Extension)
+	str("ISASet", o.ISASet)
+	optStr("CPUFeature", o.CPUFeature)
 	optStr("ConstImm", o.ConstImm)
 	optStr("Masked", o.Masked)
 	optStr("Zeroing", o.Zeroing)
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 52cfd1e8..803e5306 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -25,6 +25,8 @@ type Operation struct {
 	Out           []Operand // Results
 	Commutative   string    // Commutativity
 	Extension     string    // Extension
+	ISASet        string    // ISASet
+	CPUFeature    *string   // If ISASet is empty, then Extension, otherwise ISASet
 	Zeroing       *string   // Zeroing is a flag for asm prefix "Z", if non-nil it will always be "false"
 	Documentation *string   // Documentation will be appended to the stubs comments.
 	// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
@@ -245,6 +247,11 @@ func writeGoDefs(path string, cl unify.Closure) error {
 	// The parsed XED data might contain duplicates, like
 	// 512 bits VPADDP.
 	deduped := dedup(ops)
+	var excluded []Operation
+	deduped, excluded = fillCPUFeature(deduped)
+	if *Verbose {
+		log.Printf("excluded len: %d\n", len(excluded))
+	}
 
 	if *Verbose {
 		log.Printf("dedup len: %d\n", len(ops))
@@ -280,6 +287,7 @@ func writeGoDefs(path string, cl unify.Closure) error {
 	if *Verbose {
 		log.Printf("dedup len: %d\n", len(deduped))
 	}
+	reportXEDInconsistency(deduped)
 	typeMap := parseSIMDTypes(deduped)
 
 	formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go")
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index 387db08a..5f348cbc 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -58,13 +58,14 @@ func loadXED(xedPath string) []*unify.Value {
 			return
 		}
 		// TODO: "feature"
-		fields := []string{"goarch", "asm", "in", "out", "extension"}
+		fields := []string{"goarch", "asm", "in", "out", "extension", "isaset"}
 		values := []*unify.Value{
 			unify.NewValue(unify.NewStringExact("amd64")),
 			unify.NewValue(unify.NewStringExact(inst.Opcode())),
 			unify.NewValue(ins),
 			unify.NewValue(outs),
 			unify.NewValue(unify.NewStringExact(inst.Extension)),
+			unify.NewValue(unify.NewStringExact(inst.ISASet)),
 		}
 		if strings.Contains(inst.Pattern, "ZEROING=0") {
 			fields = append(fields, "zeroing")

From 6a376630333dcf66417cd7c87ab5a0fca5e9bfbe Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 14 Jul 2025 17:23:25 +0000
Subject: [PATCH 136/200] internal/simdgen: add VDPPS

This is a missing instruction that was left out.

This CL generates CL 687916.

Change-Id: I0361e7a1ecda67792b315887c996d72af404cd85
Reviewed-on: https://go-review.googlesource.com/c/arch/+/687915
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/go.yaml           | 5 +----
 internal/simdgen/ops/MLOps/go.yaml | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 27c45900..8fadf2a9 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -519,13 +519,10 @@
   out:
   - *int3
 - go: DotProdBroadcast
-  asm: VDPPD
+  asm: VDPP[SD]
   in:
   - &dpb_src
     go: $t
-    base: float
-    elemBits: 64
-    bits: $bits
   - *dpb_src
   - class: immediate
     const: 127
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
index fb6b4fd1..278daa87 100644
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -42,13 +42,10 @@
   out:
   - *int3
 - go: DotProdBroadcast
-  asm: VDPPD
+  asm: VDPP[SD]
   in:
   - &dpb_src
     go: $t
-    base: float
-    elemBits: 64
-    bits: $bits
   - *dpb_src
   - class: immediate
     const: 127

From cd05644f3eb9d5e2a7fe014a0cdb95561554ed0d Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 14 Jul 2025 19:09:54 +0000
Subject: [PATCH 137/200] internal/simdgen: add variable Permute

This CL only adds the "PermuteVar"(namings borrowed from C#) variant of permutes.
The immediate variant of permute will be in another CL.

This CL generates CL 687939.

Change-Id: I3072ede18d623f23a007f66114ee0e429dd2aa0d
Reviewed-on: https://go-review.googlesource.com/c/arch/+/687919
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml           | 28 ++++++++++
 internal/simdgen/gen_simdGenericOps.go     |  3 +-
 internal/simdgen/gen_simdIntrinsics.go     |  8 +++
 internal/simdgen/gen_simdTypes.go          | 65 ++++++++++++++++++----
 internal/simdgen/gen_simdrules.go          | 24 ++++++--
 internal/simdgen/gen_utility.go            | 17 +++++-
 internal/simdgen/go.yaml                   | 39 +++++++++++--
 internal/simdgen/ops/Moves/categories.yaml | 28 ++++++++++
 internal/simdgen/ops/Moves/go.yaml         | 41 ++++++++++++--
 9 files changed, 220 insertions(+), 33 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 802dc9eb..a6dfaf19 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -556,6 +556,34 @@
   extension: "AVX.*"
   documentation: !string |-
     // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+
+
+- go: Permute
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // Permute performs a full permutation of vector x using indices:
+    // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+    // Only the needed bits to represent x's index are used in indices' elements.
+
+- go: PermuteMasked
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+  documentation: !string |-
+    // PermuteMasked performs a full permutation of vector y using indices:
+    // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+    // Only the needed bits to represent x's index are used in indices' elements.
+
+- go: Permute2Masked # Permute2Masked is only available on or after AVX512
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+  documentation: !string |-
+    // Permute2Masked performs a full permutation of vector x, y using indices:
+    // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+    // where xy is x appending y.
+    // Only the needed bits to represent xy's index are used in indices' elements.
 - go: Mul
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index 7fd04b7c..f6c7a4a6 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -44,8 +44,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 	var opsData opData
 	for _, op := range ops {
 		_, _, _, immType, gOp := op.shape()
-		genericNames := gOp.Go + *gOp.In[0].Go
-		gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericNames, len(gOp.In), op.Commutative}
+		gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericName(gOp), len(gOp.In), op.Commutative}
 		if immType == VarImm || immType == ConstVarImm {
 			opsData.OpsImm = append(opsData.OpsImm, gOpData)
 		} else {
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 3c40856b..244f2360 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -29,10 +29,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 {{end}}
 {{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
+{{define "op2_21Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{end}}
 {{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
+{{define "op3_21Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{end}}
+{{define "op3_231Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{end}}
 {{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
+{{define "op4_231Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{end}}
 {{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
 {{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 412c7126..76de4f02 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -107,42 +107,70 @@ package simd
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 0).Go}}) {{.Go}}() {{.GoType}}
+func ({{.Op0NameAndType "x"}}) {{.Go}}() {{.GoType}}
 {{end}}
 
 {{define "op2"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
+func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
+{{end}}
+
+{{define "op2_21Uint"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
 {{end}}
 
 {{define "op3"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
+func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
+{{end}}
+
+{{define "op3_21Uint"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
+{{end}}
+
+{{define "op3_231Uint"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}}
 {{end}}
 
 {{define "op2VecAsScalar"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}}
+func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}}
 {{end}}
 
 {{define "op3VecAsScalar"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 0).Go}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}}
+func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}}
 {{end}}
 
 {{define "op4"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
+func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
+{{end}}
+
+{{define "op4_231Uint"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
 {{end}}
 
 {{define "op1Imm8"}}
@@ -151,7 +179,7 @@ func (x {{(index .In 0).Go}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
 {{end}}
 
 {{define "op2Imm8"}}
@@ -160,7 +188,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
 {{end}}
 
 {{define "op2Imm8_2I"}}
@@ -169,7 +197,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
 {{end}}
 
 
@@ -179,7 +207,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
 {{end}}
 
 {{define "op3Imm8_2I"}}
@@ -188,7 +216,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
 {{end}}
 
 
@@ -198,7 +226,7 @@ func (x {{(index .In 1).Go}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint
 // {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func (x {{(index .In 1).Go}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
 {{end}}
 
 {{define "vectorConversion"}}
@@ -283,6 +311,7 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 
 	opsByShape := make(map[string]opData)
 	opsSkipped := map[string]struct{}{}
+outerLoop:
 	for _, o := range ops {
 		_, _, _, immType, gOp := o.shape()
 
@@ -299,6 +328,18 @@ func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
 			opsSkipped[o.Go] = struct{}{}
 			continue
 		}
+		if o.OperandOrder != nil {
+			// We need to check if the customize order change the function signature.
+			// It is only safe to proceed generating the test wrappers if the function
+			// signature stays the same.
+			// Filtering out unqualified cases as a hack now, this test wrapper
+			// infrastrcuture should be changing soon so it should be fine.
+			switch *o.OperandOrder {
+			default:
+				opsSkipped[o.Go] = struct{}{}
+				continue outerLoop
+			}
+		}
 
 		var shape string
 		var baseArgDefList []string
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index bddcab43..e684058d 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -102,21 +102,33 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 			data.ArgsOut = fmt.Sprintf("[a+%s] %s", *opr.In[0].Const, data.ArgsOut)
 		}
 
+		goType := func(op Operation) string {
+			if op.OperandOrder != nil {
+				switch *op.OperandOrder {
+				case "21Uint":
+					fallthrough
+				case "231Uint":
+					// Permute
+					return *op.In[1].Go
+				}
+			}
+			return *op.In[0].Go
+		}
 		var tplName string
 		// If class overwrite is happening, that's not really a mask but a vreg.
 		if opOutShape == OneVregOut || opOutShape == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil {
 			switch opInShape {
 			case OneImmIn:
 				tplName = "pureVreg"
-				data.GoType = *gOp.In[0].Go
+				data.GoType = goType(gOp)
 			case PureVregIn:
 				tplName = "pureVreg"
-				data.GoType = *gOp.In[0].Go
+				data.GoType = goType(gOp)
 			case OneKmaskImmIn:
 				fallthrough
 			case OneKmaskIn:
 				tplName = "maskIn"
-				data.GoType = *gOp.In[0].Go
+				data.GoType = goType(gOp)
 				rearIdx := len(gOp.In) - 1
 				// Mask is at the end.
 				data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes)
@@ -125,7 +137,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 			}
 		} else if opOutShape == OneGregOut {
 			tplName = "pureVreg" // TODO this will be wrong
-			data.GoType = *gOp.In[0].Go
+			data.GoType = goType(gOp)
 		} else {
 			// OneKmaskOut case
 			data.MaskOutConvert = fmt.Sprintf("VPMOVMToVec%dx%d", *gOp.Out[0].ElemBits, *gOp.In[0].Lanes)
@@ -134,12 +146,12 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 				fallthrough
 			case PureVregIn:
 				tplName = "maskOut"
-				data.GoType = *gOp.In[0].Go
+				data.GoType = goType(gOp)
 			case OneKmaskImmIn:
 				fallthrough
 			case OneKmaskIn:
 				tplName = "maskInMaskOut"
-				data.GoType = *gOp.In[0].Go
+				data.GoType = goType(gOp)
 				rearIdx := len(gOp.In) - 1
 				data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes)
 			case PureKmaskIn:
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 25503510..11c5e75a 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -570,6 +570,19 @@ func splitMask(ops []Operation) ([]Operation, error) {
 	return splited, nil
 }
 
+func genericName(op Operation) string {
+	if op.OperandOrder != nil {
+		switch *op.OperandOrder {
+		case "21Uint":
+			fallthrough
+		case "231Uint":
+			// Permute
+			return op.Go + *op.In[1].Go
+		}
+	}
+	return op.Go + *op.In[0].Go
+}
+
 // dedupGodef is deduping operations in [Op.Go]+[*Op.In[0].Go] level.
 // By deduping, it means picking the least advanced architecture that satisfy the requirement:
 // AVX512 will be least preferred.
@@ -579,8 +592,8 @@ func dedupGodef(ops []Operation) ([]Operation, error) {
 	for _, op := range ops {
 		_, _, _, _, gOp := op.shape()
 
-		genericNames := gOp.Go + *gOp.In[0].Go
-		seen[genericNames] = append(seen[genericNames], op)
+		gN := genericName(gOp)
+		seen[gN] = append(seen[gN], op)
 	}
 	if *FlagReportDup {
 		for gName, dup := range seen {
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 8fadf2a9..0b894ab2 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -896,13 +896,42 @@
   out:
   - *f64x2
 
+- go: Permute
+  asm: "VPERM[BWDQ]|VPERMP[SD]"
+  operandOrder: "21Uint"
+  in:
+  - &anyindices
+    go: $t
+    name: indices
+    overwriteBase: uint
+  - &any
+    go: $t
+  out:
+  - *any
 
+- go: PermuteMasked
+  asm: "VPERM[BWDQ]|VPERMP[SD]"
+  operandOrder: "21Uint"
+  in:
+  - class: mask
+  - *anyindices
+  - *any
+  out:
+  - *any
 
-
-
-
-
-
+- go: Permute2Masked
+  asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
+  # Because we are overwriting the receiver's type, we 
+  # have to move the receiver to be a parameter so that
+  # we can have no duplication.
+  operandOrder: "231Uint" 
+  in:
+  - *anyindices # result in arg 0
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
 # "Normal" multiplication is only available for floats.
 # This only covers the single and double precision.
 - go: Mul
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index db36efd4..bb9fae8d 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -19,3 +19,31 @@
   extension: "AVX.*"
   documentation: !string |-
     // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+
+
+- go: Permute
+  commutative: "false"
+  extension: "AVX.*"
+  documentation: !string |-
+    // Permute performs a full permutation of vector x using indices:
+    // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+    // Only the needed bits to represent x's index are used in indices' elements.
+
+- go: PermuteMasked
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+  documentation: !string |-
+    // PermuteMasked performs a full permutation of vector y using indices:
+    // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+    // Only the needed bits to represent x's index are used in indices' elements.
+
+- go: Permute2Masked # Permute2Masked is only available on or after AVX512
+  commutative: "false"
+  masked: "true"
+  extension: "AVX.*"
+  documentation: !string |-
+    // Permute2Masked performs a full permutation of vector x, y using indices:
+    // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+    // where xy is x appending y.
+    // Only the needed bits to represent xy's index are used in indices' elements.
\ No newline at end of file
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index dd9ae79d..49b67a28 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -179,10 +179,39 @@
   out:
   - *f64x2
 
+- go: Permute
+  asm: "VPERM[BWDQ]|VPERMP[SD]"
+  operandOrder: "21Uint"
+  in:
+  - &anyindices
+    go: $t
+    name: indices
+    overwriteBase: uint
+  - &any
+    go: $t
+  out:
+  - *any
 
-
-
-
-
-
-
+- go: PermuteMasked
+  asm: "VPERM[BWDQ]|VPERMP[SD]"
+  operandOrder: "21Uint"
+  in:
+  - class: mask
+  - *anyindices
+  - *any
+  out:
+  - *any
+
+- go: Permute2Masked
+  asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
+  # Because we are overwriting the receiver's type, we 
+  # have to move the receiver to be a parameter so that
+  # we can have no duplication.
+  operandOrder: "231Uint" 
+  in:
+  - *anyindices # result in arg 0
+  - class: mask
+  - *any
+  - *any
+  out:
+  - *any
\ No newline at end of file

From d0fd62e6cd0f5368bfd48c862170e8143c19da42 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 14 Jul 2025 19:45:20 +0000
Subject: [PATCH 138/200] internal/simdgen: default mask param's name to mask

This CL generates CL 687955.

Change-Id: I6606d6857c9fc9d9ed0f1025fdb1c3c45238aa04
Reviewed-on: https://go-review.googlesource.com/c/arch/+/687920
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_utility.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 11c5e75a..136223a1 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -351,6 +351,9 @@ func (o Operand) OpName(s string) string {
 	if n := o.Name; n != nil {
 		return *n
 	}
+	if o.Class == "mask" {
+		return "mask"
+	}
 	return s
 }
 

From ca6dc8eff52c568a7bfbd4f18734f048eb8604a3 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 14 Jul 2025 20:30:36 +0000
Subject: [PATCH 139/200] internal/simdgen: add Compress

This CL generates CL 687995.

Change-Id: I889a065743936a592037032c67b6df161bcb3cde
Reviewed-on: https://go-review.googlesource.com/c/arch/+/687975
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml           |  8 ++++++++
 internal/simdgen/go.yaml                   |  8 ++++++++
 internal/simdgen/ops/Moves/categories.yaml | 10 +++++++++-
 internal/simdgen/ops/Moves/go.yaml         |  8 ++++++++
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index a6dfaf19..947d8456 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -584,6 +584,14 @@
     // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
     // where xy is x appending y.
     // Only the needed bits to represent xy's index are used in indices' elements.
+
+- go: Compress
+  commutative: "false"
+  # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
+  extension: "AVX.*"
+  documentation: !string |-
+    // Compress performs a compression on vector x using mask by
+    // selecting elements as indicated by mask, and pack them to lower indexed elements.
 - go: Mul
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 0b894ab2..c158204d 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -932,6 +932,14 @@
   - *any
   out:
   - *any
+
+- go: Compress
+  asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]"
+  in:
+  - class: mask
+  - *any
+  out:
+  - *any
 # "Normal" multiplication is only available for floats.
 # This only covers the single and double precision.
 - go: Mul
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index bb9fae8d..8dfe372a 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -46,4 +46,12 @@
     // Permute2Masked performs a full permutation of vector x, y using indices:
     // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
     // where xy is x appending y.
-    // Only the needed bits to represent xy's index are used in indices' elements.
\ No newline at end of file
+    // Only the needed bits to represent xy's index are used in indices' elements.
+
+- go: Compress
+  commutative: "false"
+  # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
+  extension: "AVX.*"
+  documentation: !string |-
+    // Compress performs a compression on vector x using mask by
+    // selecting elements as indicated by mask, and pack them to lower indexed elements.
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index 49b67a28..cf5608f2 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -214,4 +214,12 @@
   - *any
   - *any
   out:
+  - *any
+
+- go: Compress
+  asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]"
+  in:
+  - class: mask
+  - *any
+  out:
   - *any
\ No newline at end of file

From 7928003cb8e91526da3f5b6aa96fb6b8afbd001c Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 14 Jul 2025 22:01:22 +0000
Subject: [PATCH 140/200] internal/simdgen: adjust param order for AndNot

This CL generates CL 687996

Change-Id: I21fd71c40177b06660a075e4cb157a3f3b92ae74
Reviewed-on: https://go-review.googlesource.com/c/arch/+/687977
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml                 |  4 ++--
 internal/simdgen/gen_simdIntrinsics.go           |  4 ++++
 internal/simdgen/gen_simdTypes.go                | 16 ++++++++++++++++
 internal/simdgen/go.yaml                         |  2 ++
 .../simdgen/ops/BitwiseLogic/categories.yaml     |  4 ++--
 internal/simdgen/ops/BitwiseLogic/go.yaml        |  2 ++
 6 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 947d8456..8f4ffbe1 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -93,13 +93,13 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // AndNot performs a bitwise AND NOT operation between two vectors.
+    // AndNot performs a bitwise x &^ y.
 - go: AndNotMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+    // AndNotMasked performs a bitwise x &^ y.
 - go: Xor
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 244f2360..0dce757f 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -29,10 +29,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 {{end}}
 {{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
+{{define "op2_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{end}}
 {{define "op2_21Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
 {{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
+{{define "op3_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{end}}
 {{define "op3_21Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
 {{define "op3_231Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 76de4f02..a5aaf1b3 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -117,6 +117,13 @@ func ({{.Op0NameAndType "x"}}) {{.Go}}() {{.GoType}}
 func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
 {{end}}
 
+{{define "op2_21"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
+{{end}}
+
 {{define "op2_21Uint"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
@@ -131,6 +138,13 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
 func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
 {{end}}
 
+{{define "op3_21"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
+func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
+{{end}}
+
 {{define "op3_21Uint"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
@@ -335,6 +349,8 @@ outerLoop:
 			// Filtering out unqualified cases as a hack now, this test wrapper
 			// infrastrcuture should be changing soon so it should be fine.
 			switch *o.OperandOrder {
+			case "21":
+				// No op because it's only set in AndNot, and opr[2] and opr[1] has the same shape
 			default:
 				opsSkipped[o.Go] = struct{}{}
 				continue outerLoop
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index c158204d..bcfb97c1 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -156,6 +156,7 @@
 
 - go: AndNot
   asm: "VPANDN"
+  operandOrder: "21" # switch the arg order
   in:
   - *any
   - *any
@@ -163,6 +164,7 @@
   - *any
 - go: AndNotMasked
   asm: "VPANDN[DQ]"
+  operandOrder: "21"
   in:
   - class: mask
   - *any
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index 1ef1d360..afda77b8 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -25,13 +25,13 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // AndNot performs a bitwise AND NOT operation between two vectors.
+    // AndNot performs a bitwise x &^ y.
 - go: AndNotMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // AndNotMasked performs a masked bitwise AND NOT operation between two vectors.
+    // AndNotMasked performs a bitwise x &^ y.
 - go: Xor
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml
index 49e2dbc9..acc7a51e 100644
--- a/internal/simdgen/ops/BitwiseLogic/go.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/go.yaml
@@ -32,6 +32,7 @@
 
 - go: AndNot
   asm: "VPANDN"
+  operandOrder: "21" # switch the arg order
   in:
   - *any
   - *any
@@ -39,6 +40,7 @@
   - *any
 - go: AndNotMasked
   asm: "VPANDN[DQ]"
+  operandOrder: "21"
   in:
   - class: mask
   - *any

From ad6e2ac689d7249505ad024dd62a9ad3b69499fc Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 15 Jul 2025 21:51:00 +0000
Subject: [PATCH 141/200] internal/simdgen: cleans up the shape of Permute

This CL addressed comments in CL 687919.

Change-Id: I77d488f6128658c6f0ad27f2fa9565335f6829a9
Reviewed-on: https://go-review.googlesource.com/c/arch/+/688295
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdIntrinsics.go | 8 ++++----
 internal/simdgen/gen_simdTypes.go      | 8 ++++----
 internal/simdgen/gen_simdrules.go      | 6 ++----
 internal/simdgen/gen_utility.go        | 6 ++----
 internal/simdgen/go.yaml               | 6 +++---
 internal/simdgen/ops/Moves/go.yaml     | 6 +++---
 6 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 0dce757f..bf2a180f 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -31,19 +31,19 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 {{end}}
 {{define "op2_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op2_21Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op2_21Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
 {{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
 {{define "op3_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op3_21Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op3_21Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op3_231Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op3_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
 {{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op4_231Uint"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op4_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
 {{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index a5aaf1b3..6832f751 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -124,7 +124,7 @@ func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
 {{end}}
 
-{{define "op2_21Uint"}}
+{{define "op2_21Type1"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
@@ -145,14 +145,14 @@ func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndTyp
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
 {{end}}
 
-{{define "op3_21Uint"}}
+{{define "op3_21Type1"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
 {{end}}
 
-{{define "op3_231Uint"}}
+{{define "op3_231Type1"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
@@ -180,7 +180,7 @@ func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfS
 func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
 {{end}}
 
-{{define "op4_231Uint"}}
+{{define "op4_231Type1"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index e684058d..81aba7a0 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -105,10 +105,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 		goType := func(op Operation) string {
 			if op.OperandOrder != nil {
 				switch *op.OperandOrder {
-				case "21Uint":
-					fallthrough
-				case "231Uint":
-					// Permute
+				case "21Type1", "231Type1":
+					// Permute uses operand[1] for method receiver.
 					return *op.In[1].Go
 				}
 			}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 136223a1..698caef3 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -576,10 +576,8 @@ func splitMask(ops []Operation) ([]Operation, error) {
 func genericName(op Operation) string {
 	if op.OperandOrder != nil {
 		switch *op.OperandOrder {
-		case "21Uint":
-			fallthrough
-		case "231Uint":
-			// Permute
+		case "21Type1", "231Type1":
+			// Permute uses operand[1] for method receiver.
 			return op.Go + *op.In[1].Go
 		}
 	}
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index bcfb97c1..3369e669 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -900,7 +900,7 @@
 
 - go: Permute
   asm: "VPERM[BWDQ]|VPERMP[SD]"
-  operandOrder: "21Uint"
+  operandOrder: "21Type1"
   in:
   - &anyindices
     go: $t
@@ -913,7 +913,7 @@
 
 - go: PermuteMasked
   asm: "VPERM[BWDQ]|VPERMP[SD]"
-  operandOrder: "21Uint"
+  operandOrder: "21Type1"
   in:
   - class: mask
   - *anyindices
@@ -926,7 +926,7 @@
   # Because we are overwriting the receiver's type, we 
   # have to move the receiver to be a parameter so that
   # we can have no duplication.
-  operandOrder: "231Uint" 
+  operandOrder: "231Type1" 
   in:
   - *anyindices # result in arg 0
   - class: mask
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index cf5608f2..44a1c3c3 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -181,7 +181,7 @@
 
 - go: Permute
   asm: "VPERM[BWDQ]|VPERMP[SD]"
-  operandOrder: "21Uint"
+  operandOrder: "21Type1"
   in:
   - &anyindices
     go: $t
@@ -194,7 +194,7 @@
 
 - go: PermuteMasked
   asm: "VPERM[BWDQ]|VPERMP[SD]"
-  operandOrder: "21Uint"
+  operandOrder: "21Type1"
   in:
   - class: mask
   - *anyindices
@@ -207,7 +207,7 @@
   # Because we are overwriting the receiver's type, we 
   # have to move the receiver to be a parameter so that
   # we can have no duplication.
-  operandOrder: "231Uint" 
+  operandOrder: "231Type1" 
   in:
   - *anyindices # result in arg 0
   - class: mask

From 4344dd88a4a859fc9b3c16a814cb8f18ea06adf7 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 15 Jul 2025 05:15:30 +0000
Subject: [PATCH 142/200] internal/simdgen: reorder PairDotProdAccumulate

This CL reorders the param order of PairDotProdAccumulate family.

This CL also updates some other ML Ops documentation.

This CL generates CL 688095.

Change-Id: I1b493852209c5370083ebf7ac75ad2b6d8b501b3
Reviewed-on: https://go-review.googlesource.com/c/arch/+/688115
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml           | 22 +++++++++++-----------
 internal/simdgen/gen_simdIntrinsics.go     |  4 ++++
 internal/simdgen/gen_simdTypes.go          | 14 ++++++++++++++
 internal/simdgen/go.yaml                   |  9 +++++++++
 internal/simdgen/ops/MLOps/categories.yaml | 22 +++++++++++-----------
 internal/simdgen/ops/MLOps/go.yaml         |  9 +++++++++
 6 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 8f4ffbe1..01881ab5 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -456,64 +456,64 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
 - go: UnsignedSignedQuadDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: PairDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+    // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
 - go: PairDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
+    // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+    // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
+    // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 - go: FusedMultiplyAddMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
+    // FusedMultiplyAddMasked performs (x * y) + z.
 - go: FusedMultiplyAddSubMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
+    // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 - go: FusedMultiplySubAddMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
+    // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 - go: Max
   commutative: "true"
   extension: "AVX.*"
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index bf2a180f..6d7a75d1 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -41,10 +41,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 {{end}}
 {{define "op3_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
+{{define "op3_31"}}	addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen3_31(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{end}}
 {{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
 {{define "op4_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
 {{end}}
+{{define "op4_31"}}	addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen4_31(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{end}}
 {{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
 {{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 6832f751..f43e1eb7 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -138,6 +138,13 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
 func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
 {{end}}
 
+{{define "op3_31"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
+func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}}
+{{end}}
+
 {{define "op3_21"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
@@ -187,6 +194,13 @@ func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndTyp
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
 {{end}}
 
+{{define "op4_31"}}
+{{if .Documentation}}{{.Documentation}}
+//{{end}}
+// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
+func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
+{{end}}
+
 {{define "op1Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 3369e669..8ef04b8e 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -532,9 +532,11 @@
   - *dpb_src
 - go: UnsignedSignedQuadDotProdAccumulate
   asm: "VPDPBUSD"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - &qdpa_acc
     go: $t_acc
+    base: int
     elemBits: 32
   - &qdpa_src1
     go: $t_src1
@@ -548,6 +550,7 @@
   - *qdpa_acc
 - go: UnsignedSignedQuadDotProdAccumulateMasked
   asm: "VPDPBUSD"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *qdpa_acc
   - class: mask
@@ -557,6 +560,7 @@
   - *qdpa_acc
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   asm: "VPDPBUSDS"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *qdpa_acc
   - *qdpa_src1
@@ -565,6 +569,7 @@
   - *qdpa_acc
 - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   asm: "VPDPBUSDS"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *qdpa_acc
   - class: mask
@@ -574,6 +579,7 @@
   - *qdpa_acc
 - go: PairDotProdAccumulate
   asm: "VPDPWSSD"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - &pdpa_acc
     go: $t_acc
@@ -591,6 +597,7 @@
   - *pdpa_acc
 - go: PairDotProdAccumulateMasked
   asm: "VPDPWSSD"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *pdpa_acc
   - class: mask
@@ -600,6 +607,7 @@
   - *pdpa_acc
 - go: SaturatedPairDotProdAccumulate
   asm: "VPDPWSSDS"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *pdpa_acc
   - *pdpa_src1
@@ -608,6 +616,7 @@
   - *pdpa_acc
 - go: SaturatedPairDotProdAccumulateMasked
   asm: "VPDPWSSDS"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *pdpa_acc
   - class: mask
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 17c318a9..962ae50d 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -37,61 +37,61 @@
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
 - go: UnsignedSignedQuadDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+    // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: PairDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+    // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
 - go: PairDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
+    // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulate
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+    // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulateMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of y and z and accumulates the results to x.
+    // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 - go: FusedMultiplyAddMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddMasked performs `(v1 * v2) + v3`.
+    // FusedMultiplyAddMasked performs (x * y) + z.
 - go: FusedMultiplyAddSubMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddSubMasked performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements.
+    // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 - go: FusedMultiplySubAddMasked
   masked: "true"
   commutative: "false"
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplySubAddMasked performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements.
+    // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
index 278daa87..76512b1e 100644
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -53,9 +53,11 @@
   - *dpb_src
 - go: UnsignedSignedQuadDotProdAccumulate
   asm: "VPDPBUSD"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - &qdpa_acc
     go: $t_acc
+    base: int
     elemBits: 32
   - &qdpa_src1
     go: $t_src1
@@ -69,6 +71,7 @@
   - *qdpa_acc
 - go: UnsignedSignedQuadDotProdAccumulateMasked
   asm: "VPDPBUSD"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *qdpa_acc
   - class: mask
@@ -78,6 +81,7 @@
   - *qdpa_acc
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   asm: "VPDPBUSDS"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *qdpa_acc
   - *qdpa_src1
@@ -86,6 +90,7 @@
   - *qdpa_acc
 - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   asm: "VPDPBUSDS"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *qdpa_acc
   - class: mask
@@ -95,6 +100,7 @@
   - *qdpa_acc
 - go: PairDotProdAccumulate
   asm: "VPDPWSSD"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - &pdpa_acc
     go: $t_acc
@@ -112,6 +118,7 @@
   - *pdpa_acc
 - go: PairDotProdAccumulateMasked
   asm: "VPDPWSSD"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *pdpa_acc
   - class: mask
@@ -121,6 +128,7 @@
   - *pdpa_acc
 - go: SaturatedPairDotProdAccumulate
   asm: "VPDPWSSDS"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *pdpa_acc
   - *pdpa_src1
@@ -129,6 +137,7 @@
   - *pdpa_acc
 - go: SaturatedPairDotProdAccumulateMasked
   asm: "VPDPWSSDS"
+  operandOrder: "31" # switch operand 3 and 1
   in:
   - *pdpa_acc
   - class: mask

From c81c31d188992d3d6293b31ccd9fc8c97b2837f2 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 15 Jul 2025 19:52:36 +0000
Subject: [PATCH 143/200] internal/simdgen: add logging of all CPU Features

This CL makes -v print all CPU Features that will appear
in the API. This is useful for us to add CPU Feature check.

Change-Id: I19cc964b531abcb3a4730b45206ffd9752e49653
Reviewed-on: https://go-review.googlesource.com/c/arch/+/688216
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_utility.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 698caef3..439a5e14 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -506,6 +506,7 @@ func dedup(ops []Operation) (deduped []Operation) {
 }
 
 func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation) {
+	allCPUFeatures := map[string]struct{}{}
 	for _, op := range ops {
 		if op.ISASet == "" {
 			newS := op.Extension
@@ -522,6 +523,7 @@ func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation)
 			if strings.Contains(*op.CPUFeature, "_") {
 				*op.CPUFeature = strings.ReplaceAll(*op.CPUFeature, "_", "")
 			}
+			allCPUFeatures[*op.CPUFeature] = struct{}{}
 		} else {
 			excluded = append(excluded, op)
 		}
@@ -540,6 +542,10 @@ func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation)
 			panic(fmt.Sprintf("simdgen is excluding the only def of op: %s", op))
 		}
 	}
+	if *Verbose {
+		// It might contain
+		log.Printf("All CPU Features: %v\n", allCPUFeatures)
+	}
 	return
 }
 

From d50ebdc696d024df4e9e0fa2d670442cdeba2d86 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Wed, 16 Jul 2025 17:01:22 +0000
Subject: [PATCH 144/200] internal/simdgen: clean up masked op doc

This CL makes masked and un-masked op doc identical, and then apply a
mechanism to append a write-mask description to the doc uniformly.

The previous version contains some inconsistency, "masked" might appear
in unmasked op's documentation.

This CL generates CL 688396.

Change-Id: Ia52ed6606a129f2bda15315b3a87d5d1dcef24e9
Reviewed-on: https://go-review.googlesource.com/c/arch/+/688395
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              | 20 +++++++++----------
 internal/simdgen/gen_utility.go               | 11 ++++++++++
 internal/simdgen/godefs.go                    |  1 +
 .../simdgen/ops/BitwiseLogic/categories.yaml  |  6 +++---
 internal/simdgen/ops/Compares/categories.yaml |  2 +-
 .../simdgen/ops/FPonlyArith/categories.yaml   |  4 ++--
 internal/simdgen/ops/Mul/categories.yaml      |  8 ++++----
 7 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 01881ab5..5b0e5597 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -77,7 +77,7 @@
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // AndMasked performs a masked bitwise AND operation between two vectors.
+    // AndMasked performs a bitwise AND operation between two vectors.
 - go: Or
   commutative: "true"
   extension: "AVX.*"
@@ -88,7 +88,7 @@
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // OrMasked performs a masked bitwise OR operation between two vectors.
+    // OrMasked performs a bitwise OR operation between two vectors.
 - go: AndNot
   commutative: "false"
   extension: "AVX.*"
@@ -110,7 +110,7 @@
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // XorMasked performs a masked bitwise XOR operation between two vectors.
+    // XorMasked performs a bitwise XOR operation between two vectors.
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
 # const imm predicate(holds for both float and int|uint):
@@ -169,7 +169,7 @@
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // EqualMasked compares for equality, masked.
+    // EqualMasked compares for equality.
 - go: LessMasked
   constImm: 1
   masked: "true"
@@ -296,7 +296,7 @@
   constImm: 1
   masked: "true"
   documentation: !string |-
-    // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
+    // FloorWithPrecisionMasked rounds elements down with specified precision.
 - go: DiffWithFloorWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
@@ -317,7 +317,7 @@
   constImm: 2
   masked: "true"
   documentation: !string |-
-    // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
+    // CeilWithPrecisionMasked rounds elements up with specified precision.
 - go: DiffWithCeilWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
@@ -618,26 +618,26 @@
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MulMasked multiplies corresponding elements of two vectors, masked.
+    // MulMasked multiplies corresponding elements of two vectors.
 - go: MulEvenWidenMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
+    // MulEvenWidenMasked multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHighMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MulHighMasked multiplies elements and stores the high part of the result, masked.
+    // MulHighMasked multiplies elements and stores the high part of the result.
 - go: MulLowMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MulLowMasked multiplies elements and stores the low part of the result, masked.
+    // MulLowMasked multiplies elements and stores the low part of the result.
 - go: ShiftAllLeft
   nameAndSizeCheck: "true"
   commutative: "false"
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 439a5e14..0be1df38 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -571,6 +571,7 @@ func splitMask(ops []Operation) ([]Operation, error) {
 			op2.Go = strings.TrimSuffix(op2.Go, "Masked")
 			op2Doc := strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go)
 			op2.Documentation = &op2Doc
+			op2.Masked = nil // It's no longer masked.
 			splited = append(splited, op2)
 		} else {
 			return nil, fmt.Errorf("simdgen only recognizes masked operations with exactly one mask input: %s", op)
@@ -579,6 +580,16 @@ func splitMask(ops []Operation) ([]Operation, error) {
 	return splited, nil
 }
 
+func insertMaskDescToDoc(ops []Operation) {
+	for i, _ := range ops {
+		if ops[i].Masked != nil && *ops[i].Masked == "true" {
+			if ops[i].Documentation != nil {
+				*ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask."
+			}
+		}
+	}
+}
+
 func genericName(op Operation) string {
 	if op.OperandOrder != nil {
 		switch *op.OperandOrder {
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 803e5306..9a40f83b 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -268,6 +268,7 @@ func writeGoDefs(path string, cl unify.Closure) error {
 			return err
 		}
 	}
+	insertMaskDescToDoc(deduped)
 	if *Verbose {
 		log.Printf("dedup len: %d\n", len(deduped))
 	}
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index afda77b8..d6ea3ed1 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -9,7 +9,7 @@
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // AndMasked performs a masked bitwise AND operation between two vectors.
+    // AndMasked performs a bitwise AND operation between two vectors.
 - go: Or
   commutative: "true"
   extension: "AVX.*"
@@ -20,7 +20,7 @@
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // OrMasked performs a masked bitwise OR operation between two vectors.
+    // OrMasked performs a bitwise OR operation between two vectors.
 - go: AndNot
   commutative: "false"
   extension: "AVX.*"
@@ -42,6 +42,6 @@
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // XorMasked performs a masked bitwise XOR operation between two vectors.
+    // XorMasked performs a bitwise XOR operation between two vectors.
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index 3b021e4c..f7383555 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -55,7 +55,7 @@
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // EqualMasked compares for equality, masked.
+    // EqualMasked compares for equality.
 - go: LessMasked
   constImm: 1
   masked: "true"
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 356b06d3..e0d5836d 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -83,7 +83,7 @@
   constImm: 1
   masked: "true"
   documentation: !string |-
-    // FloorWithPrecisionMasked rounds elements down with specified precision, masked.
+    // FloorWithPrecisionMasked rounds elements down with specified precision.
 - go: DiffWithFloorWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
@@ -104,7 +104,7 @@
   constImm: 2
   masked: "true"
   documentation: !string |-
-    // CeilWithPrecisionMasked rounds elements up with specified precision, masked.
+    // CeilWithPrecisionMasked rounds elements up with specified precision.
 - go: DiffWithCeilWithPrecisionMasked
   commutative: "false"
   extension: "AVX.*"
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index 34b3ab56..b466eb01 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -25,23 +25,23 @@
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MulMasked multiplies corresponding elements of two vectors, masked.
+    // MulMasked multiplies corresponding elements of two vectors.
 - go: MulEvenWidenMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MulEvenWidenMasked multiplies even-indexed elements, widening the result, masked.
+    // MulEvenWidenMasked multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHighMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MulHighMasked multiplies elements and stores the high part of the result, masked.
+    // MulHighMasked multiplies elements and stores the high part of the result.
 - go: MulLowMasked
   masked: "true"
   commutative: "true"
   extension: "AVX.*"
   documentation: !string |-
-    // MulLowMasked multiplies elements and stores the low part of the result, masked.
+    // MulLowMasked multiplies elements and stores the low part of the result.

From d0d5d5ba49324d2e8e5df13e68c4939a3b337d23 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 18 Jul 2025 04:24:37 +0000
Subject: [PATCH 145/200] internal/simdgen: support load from bits for mask

This CL adds the code generation to construct K masks from bits.

This will enable more flexible and performant SIMD programming.

This CL generates CL 688875.

Change-Id: Idb576a5e2343b1dd0762ea3e52cf8f06a4e3af13
Reviewed-on: https://go-review.googlesource.com/c/arch/+/688855
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdIntrinsics.go |  7 +++++--
 internal/simdgen/gen_simdTypes.go      | 17 +++++++++++++----
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 6d7a75d1..355c8d14 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -73,6 +73,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
 	addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
 	addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
+	addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
 {{end}}
 
 {{define "footer"}}}
@@ -109,8 +110,10 @@ func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 	}
 
 	for _, typ := range typesFromTypeMap(typeMap) {
-		if err := t.ExecuteTemplate(buffer, "loadStore", typ); err != nil {
-			panic(fmt.Errorf("failed to execute loadStore template: %w", err))
+		if typ.Type != "mask" {
+			if err := t.ExecuteTemplate(buffer, "loadStore", typ); err != nil {
+				panic(fmt.Errorf("failed to execute loadStore template: %w", err))
+			}
 		}
 	}
 
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index f43e1eb7..50553b4e 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -21,6 +21,7 @@ type simdType struct {
 	VectorCounterpart       string // For mask use only: just replacing the "Mask" in [simdType.Name] with "Int"
 	ReshapedVectorWithAndOr string // For mask use only: vector AND and OR are only available in some shape with element width 32.
 	Size                    int    // The size of the type
+	ElemBits                int    // Size / Lanes
 }
 
 func compareSimdTypes(x, y simdType) int {
@@ -92,7 +93,15 @@ func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}}
 //go:noescape
 func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
 
-{{- end}}
+{{- else}}
+
+// {{.Name}}FromBits constructs a {{.Name}} from an a bitmap, where 1 means set for the indexed element, 0 means unset.
+// Only the lower {{.Lanes}} bits of y are used.
+//
+//go:noescape
+func Load{{.Name}}FromBits(y *uint64) {{.Name}}
+
+{{end}}
 {{end}}
 `
 
@@ -528,14 +537,14 @@ func parseSIMDTypes(ops []Operation) simdTypeMap {
 		if arg.Class == "mask" {
 			vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int")
 			reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32)
-			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits})
+			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits, *arg.Bits / lanes})
 			// In case the vector counterpart of a mask is not present, put its vector counterpart typedef into the map as well.
 			if _, ok := seen[vectorCounterpart]; !ok {
 				seen[vectorCounterpart] = struct{}{}
-				ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits})
+				ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits, *arg.Bits / lanes})
 			}
 		} else {
-			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits})
+			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits, *arg.Bits / lanes})
 		}
 	}
 	for _, op := range ops {

From d63b4ec3794a220ec4e2d0f90e458c5d2c48c004 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 22 Jul 2025 21:48:20 -0400
Subject: [PATCH 146/200] internal/unify: fix round-tripping strings with
 regexp metacharacters

Change-Id: I92956b13c7532b9a96386947ee19aa61142337c8
Reviewed-on: https://go-review.googlesource.com/c/arch/+/689478
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/unify/domain.go    |  4 ++--
 internal/unify/yaml.go      |  9 +++++++-
 internal/unify/yaml_test.go | 44 +++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/internal/unify/domain.go b/internal/unify/domain.go
index c59bd621..00c80902 100644
--- a/internal/unify/domain.go
+++ b/internal/unify/domain.go
@@ -270,8 +270,8 @@ func NewStringRegex(exprs ...string) (String, error) {
 			continue
 		}
 
-		if _, complete := re.LiteralPrefix(); complete {
-			v = String{kind: stringExact, exact: expr}
+		if exact, complete := re.LiteralPrefix(); complete {
+			v = String{kind: stringExact, exact: exact}
 		} else {
 			v.kind = stringRegex
 			v.re = append(v.re, re)
diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go
index 4731140b..6782b313 100644
--- a/internal/unify/yaml.go
+++ b/internal/unify/yaml.go
@@ -430,7 +430,14 @@ func (enc *yamlEncoder) value(v *Value) *yaml.Node {
 				n.Tag = "tag:yaml.org,2002:int"
 				return &n
 			}
-			n.SetString(regexp.QuoteMeta(d.exact))
+			// If this doesn't require escaping, leave it as a str node to avoid
+			// the annoying YAML tags. Otherwise, mark it as an exact string.
+			// Alternatively, we could always emit a str node with regexp
+			// quoting.
+			n.SetString(d.exact)
+			if d.exact != regexp.QuoteMeta(d.exact) {
+				n.Tag = "!string"
+			}
 			return &n
 		case stringRegex:
 			o := make([]string, 0, 1)
diff --git a/internal/unify/yaml_test.go b/internal/unify/yaml_test.go
index af73001d..05a26beb 100644
--- a/internal/unify/yaml_test.go
+++ b/internal/unify/yaml_test.go
@@ -8,6 +8,9 @@ import (
 	"bytes"
 	"fmt"
 	"iter"
+	"log"
+	"strings"
+	"testing"
 
 	"gopkg.in/yaml.v3"
 )
@@ -20,6 +23,19 @@ func mustParse(expr string) Closure {
 	return c
 }
 
+func oneValue(t *testing.T, c Closure) *Value {
+	t.Helper()
+	var v *Value
+	var i int
+	for v = range c.All() {
+		i++
+	}
+	if i != 1 {
+		t.Fatalf("expected 1 value, got %d", i)
+	}
+	return v
+}
+
 func printYaml(val any) {
 	b, err := yaml.Marshal(val)
 	if err != nil {
@@ -89,3 +105,31 @@ func allYamlNodes(n *yaml.Node) iter.Seq[*yaml.Node] {
 		}
 	}
 }
+
+func TestRoundTripString(t *testing.T) {
+	// Check that we can round-trip a string with regexp meta-characters in it.
+	const y = `!string test*`
+	t.Logf("input:\n%s", y)
+
+	v1 := oneValue(t, mustParse(y))
+	var buf1 strings.Builder
+	enc := yaml.NewEncoder(&buf1)
+	if err := enc.Encode(v1); err != nil {
+		log.Fatal(err)
+	}
+	enc.Close()
+	t.Logf("after parse 1:\n%s", buf1.String())
+
+	v2 := oneValue(t, mustParse(buf1.String()))
+	var buf2 strings.Builder
+	enc = yaml.NewEncoder(&buf2)
+	if err := enc.Encode(v2); err != nil {
+		log.Fatal(err)
+	}
+	enc.Close()
+	t.Logf("after parse 2:\n%s", buf2.String())
+
+	if buf1.String() != buf2.String() {
+		t.Fatal("parse 1 and parse 2 differ")
+	}
+}

From e4b518043dabc2fde86eadba7095fd1c1af7a538 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 22 Jul 2025 22:11:41 -0400
Subject: [PATCH 147/200] internal/unify: fix parsing of empty string nodes

Currently, if a node is just "", we incorrectly parse that as a regexp
instead of an exact string. This happens because we build it into the
regexp "\A(?:)\z" and then test if it's "exact" by asking if it has a
"literal prefix". But for the empty string and exactly the empty
string, it's literal but has no prefix, so this check fails and we
treat this as a regexp instead of an exact string.

Fix this by special-casing "".

I believe this bug is harmless for unification, since either way it
describes the same set of strings, but it affects any "exactness" test
and affects YAML round-tripping.

Change-Id: I3223ef9a27c3cb6bfd2f5a0be9a0b7b71059840f
Reviewed-on: https://go-review.googlesource.com/c/arch/+/689479
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
---
 internal/unify/domain.go    |  8 ++++++++
 internal/unify/yaml_test.go | 12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/internal/unify/domain.go b/internal/unify/domain.go
index 00c80902..7f573826 100644
--- a/internal/unify/domain.go
+++ b/internal/unify/domain.go
@@ -259,6 +259,14 @@ func NewStringRegex(exprs ...string) (String, error) {
 	}
 	v := String{kind: -1}
 	for _, expr := range exprs {
+		if expr == "" {
+			// Skip constructing the regexp. It won't have a "literal prefix"
+			// and so we wind up thinking this is a regexp instead of an exact
+			// (empty) string.
+			v = String{kind: stringExact, exact: ""}
+			continue
+		}
+
 		re, err := regexp.Compile(`\A(?:` + expr + `)\z`)
 		if err != nil {
 			return String{}, fmt.Errorf("parsing value: %s", err)
diff --git a/internal/unify/yaml_test.go b/internal/unify/yaml_test.go
index 05a26beb..e04d05d4 100644
--- a/internal/unify/yaml_test.go
+++ b/internal/unify/yaml_test.go
@@ -133,3 +133,15 @@ func TestRoundTripString(t *testing.T) {
 		t.Fatal("parse 1 and parse 2 differ")
 	}
 }
+
+func TestEmptyString(t *testing.T) {
+	// Regression test. Make sure an empty string is parsed as an exact string,
+	// not a regexp.
+	const y = `""`
+	t.Logf("input:\n%s", y)
+
+	v1 := oneValue(t, mustParse(y))
+	if !v1.Exact() {
+		t.Fatal("expected exact string")
+	}
+}

From 414be249652c489008b5d41abc8003127c8652d9 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 22 Jul 2025 20:07:05 -0400
Subject: [PATCH 148/200] internal/simdgen: fix YAML round-tripping

There were some errors in the YAML format output that prevented it
from being read back in by the unifier. Fix these.

This lets you, for example, capture the full XED unification with:

    go run . -xedPath $XED go.yaml types.yaml categories.yaml > /tmp/unified.yaml

Then work directly with this pre-unified result:

    go run . -o godefs /tmp/unified.yaml

The results are identical to running godefs on the original inputs.

Change-Id: I9a8130aaa494819b4ce7e0802d93efec38431451
Reviewed-on: https://go-review.googlesource.com/c/arch/+/689480
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/main.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index db77d8c3..b48c600e 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -186,6 +186,7 @@ func main() {
 	switch *flagO {
 	case "yaml":
 		// Produce a result that looks like encoding a slice, but stream it.
+		fmt.Println("!sum")
 		var val1 [1]*unify.Value
 		for val := range unified.All() {
 			val1[0] = val
@@ -203,11 +204,11 @@ func main() {
 		}
 	}
 
-	if !*Verbose {
+	if !*Verbose && *xedPath != "" {
 		if operandRemarks == 0 {
-			fmt.Printf("XED decoding generated no errors, which is unusual.\n")
+			fmt.Fprintf(os.Stderr, "XED decoding generated no errors, which is unusual.\n")
 		} else {
-			fmt.Printf("XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks)
+			fmt.Fprintf(os.Stderr, "XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks)
 		}
 	}
 

From 0e925773f47b6a9cff83429f9ffcd5a05b90e78a Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 23 Jul 2025 15:58:34 -0400
Subject: [PATCH 149/200] internal/simdgen: fix or clarify some ISA
 misconceptions

This doesn't affect the generated output (in some cases we were doing
the right thing for the wrong reasons).

Change-Id: If2c3fc760eb3363487bc3be858229dfa6988d31b
Reviewed-on: https://go-review.googlesource.com/c/arch/+/690015
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/godefs.go |  2 +-
 internal/simdgen/main.go   |  2 +-
 internal/simdgen/xed.go    | 27 +++++++++++++++++++++++----
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 9a40f83b..6179d98d 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -27,7 +27,7 @@ type Operation struct {
 	Extension     string    // Extension
 	ISASet        string    // ISASet
 	CPUFeature    *string   // If ISASet is empty, then Extension, otherwise ISASet
-	Zeroing       *string   // Zeroing is a flag for asm prefix "Z", if non-nil it will always be "false"
+	Zeroing       *string   // nil => use asm suffix ".Z"; "false" => do not use asm suffix ".Z"
 	Documentation *string   // Documentation will be appended to the stubs comments.
 	// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
 	// If present, it will be copied to [In[0].Const].
diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index b48c600e..2b0e65f7 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -40,7 +40,7 @@
 // simdgen can also generate Go definitions of SIMD mappings:
 // To generate go files to the go root, run:
 //
-//	go run . -xedPath $XEDPATH -godefroot $/PATH/TO/go go.yaml categories.yaml types.yaml
+//	go run . -xedPath $XEDPATH -o godefs -goroot $PATH/TO/go go.yaml categories.yaml types.yaml
 //
 // types.yaml is already written, it specifies the shapes of vectors.
 // categories.yaml and go.yaml contains definitions that unifies with types.yaml and XED
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index 5f348cbc..4a1b93b2 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -68,6 +68,21 @@ func loadXED(xedPath string) []*unify.Value {
 			unify.NewValue(unify.NewStringExact(inst.ISASet)),
 		}
 		if strings.Contains(inst.Pattern, "ZEROING=0") {
+			// This is an EVEX instruction, but the ".Z" (zero-merging)
+			// instruction flag is NOT valid. EVEX.z must be zero.
+			//
+			// This can mean a few things:
+			//
+			// - The output of an instruction is a mask, so merging modes don't
+			// make any sense. E.g., VCMPPS.
+			//
+			// - There are no masks involved anywhere. (Maybe MASK=0 is also set
+			// in this case?) E.g., VINSERTPS.
+			//
+			// - The operation inherently performs merging. E.g., VCOMPRESSPS
+			// with a mem operand.
+			//
+			// There may be other reasons.
 			fields = append(fields, "zeroing")
 			values = append(values, unify.NewValue(unify.NewStringExact("false")))
 		}
@@ -220,14 +235,18 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
 		fmt.Printf("  %+v\n", op)
 	}
 
+	if strings.HasPrefix(op.Name, "EMX_BROADCAST") {
+		// This refers to a set of macros defined in all-state.txt that set a
+		// BCAST operand to various fixed values. But the BCAST operand is
+		// itself suppressed and "internal", so I think we can just ignore this
+		// operand.
+		return nil, nil
+	}
+
 	// TODO: See xed_decoded_inst_operand_action. This might need to be more
 	// complicated.
 	action, ok := actionEncoding[op.Action]
 	if !ok {
-		if strings.HasPrefix(op.Name, "EMX_BROADCAST") {
-			// BROADCAST looks like to contain an obsolete operand.
-			return nil, nil
-		}
 		return nil, fmt.Errorf("unknown action %q", op.Action)
 	}
 	common := operandCommon{action: action}

From 60f586f2dc983eab70ee8ce24c0fd82e2e40bcde Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 23 Jul 2025 16:52:29 -0400
Subject: [PATCH 150/200] internal/simdgen: replace hand-written formatters
 with reflect

This is actually less code, and will transparently keep up with any
changes to these types.

Change-Id: I3a869898be51f06a4649b4d5bc3f877381afbcd2
Reviewed-on: https://go-review.googlesource.com/c/arch/+/690016
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_utility.go | 99 +--------------------------------
 internal/simdgen/pprint.go      | 73 ++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 97 deletions(-)
 create mode 100644 internal/simdgen/pprint.go

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 0be1df38..e3bf45a9 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -790,104 +790,9 @@ func reportXEDInconsistency(ops []Operation) error {
 }
 
 func (o Operation) String() string {
-	var sb strings.Builder
-	var nils string
-
-	optStr := func(field string, ps *string) {
-		if ps != nil {
-			fmt.Fprintf(&sb, "  %s: %s\n", field, *ps)
-		} else {
-			nils += " " + field
-		}
-	}
-
-	// two spaces then field: value
-	str := func(field string, value string) {
-		fmt.Fprintf(&sb, "  %s: %s\n", field, value)
-	}
-
-	sb.WriteString("Operation {\n")
-	str("Go", o.Go)
-	str("GoArch", o.GoArch)
-	str("Asm", o.Asm)
-	str("Commutative", o.Commutative)
-	str("Extension", o.Extension)
-	str("ISASet", o.ISASet)
-	optStr("CPUFeature", o.CPUFeature)
-	optStr("ConstImm", o.ConstImm)
-	optStr("Masked", o.Masked)
-	optStr("Zeroing", o.Zeroing)
-	optStr("OperandOrder", o.OperandOrder)
-
-	sb.WriteString("  In: [\n")
-	for _, op := range o.In {
-		fmt.Fprintf(&sb, "    %s,\n", op.String())
-	}
-	sb.WriteString("  ]\n")
-
-	sb.WriteString("  Out: [\n")
-	for _, op := range o.Out {
-		fmt.Fprintf(&sb, "    %s,\n", op.String())
-	}
-	sb.WriteString("  ]\n")
-
-	optStr("Documentation", o.Documentation)
-	if len(nils) != 0 {
-		sb.WriteString("  nils = " + nils[1:] + "\n")
-	}
-
-	sb.WriteString("}\n")
-	return sb.String()
+	return pprints(o)
 }
 
-// String returns a string representation of the Operand.
 func (op Operand) String() string {
-	var sb strings.Builder
-	var nils string
-
-	optStr := func(field string, ps *string) {
-		if ps != nil {
-			fmt.Fprintf(&sb, "    %s: %s\n", field, *ps)
-		} else {
-			nils += " " + field
-		}
-	}
-
-	optNum := func(field string, pi *int) {
-		if pi != nil {
-			fmt.Fprintf(&sb, "    %s: %d\n", field, *pi)
-		} else {
-			nils += " " + field
-		}
-	}
-
-	// four spaces then field: value
-	str := func(field string, value string) {
-		fmt.Fprintf(&sb, "    %s: %s\n", field, value)
-	}
-	num := func(field string, value int) {
-		fmt.Fprintf(&sb, "    %s: %d\n", field, value)
-	}
-	sb.WriteString("Operand {\n")
-	str("Class", op.Class)
-	optStr("Go", op.Go)
-	num("AsmPos", op.AsmPos)
-	optStr("Base", op.Base)
-	optNum("ElemBits", op.ElemBits)
-	optNum("Bits", op.Bits)
-	optStr("Const", op.Const)
-	optStr("ImmOffset", op.ImmOffset)
-	optNum("Lanes", op.Lanes)
-	optStr("Name", op.Name)
-	optNum("TreatLikeAScalarOfSize", op.TreatLikeAScalarOfSize)
-	optStr("OverwriteClass", op.OverwriteClass)
-	optStr("OverwriteBase", op.OverwriteBase)
-	optNum("OverwriteElementBits", op.OverwriteElementBits)
-
-	if len(nils) != 0 {
-		sb.WriteString("    nils = " + nils[1:] + "\n")
-	}
-
-	sb.WriteString("  }\n")
-	return sb.String()
+	return pprints(op)
 }
diff --git a/internal/simdgen/pprint.go b/internal/simdgen/pprint.go
new file mode 100644
index 00000000..054b5176
--- /dev/null
+++ b/internal/simdgen/pprint.go
@@ -0,0 +1,73 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"reflect"
+	"strconv"
+)
+
+func pprints(v any) string {
+	var pp pprinter
+	pp.val(reflect.ValueOf(v), 0)
+	return string(pp.buf)
+}
+
+type pprinter struct {
+	buf []byte
+}
+
+func (p *pprinter) indent(by int) {
+	for range by {
+		p.buf = append(p.buf, '\t')
+	}
+}
+
+func (p *pprinter) val(v reflect.Value, indent int) {
+	switch v.Kind() {
+	default:
+		p.buf = fmt.Appendf(p.buf, "unsupported kind %v", v.Kind())
+
+	case reflect.Bool:
+		p.buf = strconv.AppendBool(p.buf, v.Bool())
+
+	case reflect.Int, reflect.Int16, reflect.Int32, reflect.Int64:
+		p.buf = strconv.AppendInt(p.buf, v.Int(), 10)
+
+	case reflect.String:
+		p.buf = strconv.AppendQuote(p.buf, v.String())
+
+	case reflect.Pointer:
+		if v.IsNil() {
+			p.buf = append(p.buf, "nil"...)
+		} else {
+			p.buf = append(p.buf, "&"...)
+			p.val(v.Elem(), indent)
+		}
+
+	case reflect.Slice, reflect.Array:
+		p.buf = append(p.buf, "[\n"...)
+		for i := range v.Len() {
+			p.indent(indent + 1)
+			p.val(v.Index(i), indent+1)
+			p.buf = append(p.buf, ",\n"...)
+		}
+		p.indent(indent)
+		p.buf = append(p.buf, ']')
+
+	case reflect.Struct:
+		vt := v.Type()
+		p.buf = append(append(p.buf, vt.String()...), "{\n"...)
+		for f := range v.NumField() {
+			p.indent(indent + 1)
+			p.buf = append(append(p.buf, vt.Field(f).Name...), ": "...)
+			p.val(v.Field(f), indent+1)
+			p.buf = append(p.buf, ",\n"...)
+		}
+		p.indent(indent)
+		p.buf = append(p.buf, '}')
+	}
+}

From a2126014d85ecda5c3a0f2ae64c0495ecf06156b Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 23 Jul 2025 17:00:21 -0400
Subject: [PATCH 151/200] internal/simdgen: use bools for bools

There are several bool flags that are strings right now. Make them
bools.

The changes to internal/simdgen/ops were automated with:

    sed -i "" -e 's/"true"/true/g;s/"false"/false/g' $(find -name '*.yaml' ops)
    go generate

Change-Id: I6b4c4baf98736c4c05705953543263628a2fd829
Reviewed-on: https://go-review.googlesource.com/c/arch/+/690017
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: Austin Clements <austin@google.com>
---
 internal/simdgen/categories.yaml              | 412 +++++++++---------
 internal/simdgen/gen_simdGenericOps.go        |   2 +-
 internal/simdgen/gen_simdMachineOps.go        |   8 +-
 internal/simdgen/gen_utility.go               |   4 +-
 internal/simdgen/godefs.go                    |   8 +-
 internal/simdgen/ops/AddSub/categories.yaml   |  32 +-
 .../simdgen/ops/BitwiseLogic/categories.yaml  |  24 +-
 internal/simdgen/ops/Compares/categories.yaml |  42 +-
 .../simdgen/ops/FPonlyArith/categories.yaml   |  70 +--
 .../simdgen/ops/GaloisField/categories.yaml   |  12 +-
 .../simdgen/ops/IntOnlyArith/categories.yaml  |  18 +-
 internal/simdgen/ops/MLOps/categories.yaml    |  50 +--
 internal/simdgen/ops/MinMax/categories.yaml   |  12 +-
 internal/simdgen/ops/Moves/categories.yaml    |  20 +-
 internal/simdgen/ops/Mul/categories.yaml      |  24 +-
 .../simdgen/ops/ShiftRotate/categories.yaml   | 108 ++---
 internal/unify/domain.go                      |  11 +-
 internal/unify/yaml.go                        |  15 +-
 18 files changed, 442 insertions(+), 430 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 5b0e5597..f839f69b 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -1,113 +1,113 @@
 !sum
 - go: Add
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Add adds corresponding elements of two vectors.
 - go: SaturatedAdd
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedAdd adds corresponding elements of two vectors with saturation.
 - go: AddMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // AddMasked adds corresponding elements of two vectors.
 - go: SaturatedAddMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 - go: Sub
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Sub subtracts corresponding elements of two vectors.
 - go: SaturatedSub
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedSub subtracts corresponding elements of two vectors with saturation.
 - go: SubMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SubMasked subtracts corresponding elements of two vectors.
 - go: SaturatedSubMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairwiseAdd horizontally adds adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: PairwiseSub
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairwiseSub horizontally subtracts adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: SaturatedPairwiseAdd
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: SaturatedPairwiseSub
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: And
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // And performs a bitwise AND operation between two vectors.
 - go: AndMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // AndMasked performs a bitwise AND operation between two vectors.
 - go: Or
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Or performs a bitwise OR operation between two vectors.
 - go: OrMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // OrMasked performs a bitwise OR operation between two vectors.
 - go: AndNot
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // AndNot performs a bitwise x &^ y.
 - go: AndNotMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // AndNotMasked performs a bitwise x &^ y.
 - go: Xor
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Xor performs a bitwise XOR operation between two vectors.
 - go: XorMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // XorMasked performs a bitwise XOR operation between two vectors.
@@ -122,239 +122,239 @@
 # 6: Greater
 - go: Equal
   constImm: 0
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Equal compares for equality.
 - go: Less
   constImm: 1
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Less compares for less than.
 - go: LessEqual
   constImm: 2
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // LessEqual compares for less than or equal.
 - go: IsNan # For float only.
   constImm: 3
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // IsNan checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqual
   constImm: 4
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // NotEqual compares for inequality.
 - go: GreaterEqual
   constImm: 13
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GreaterEqual compares for greater than or equal.
 - go: Greater
   constImm: 14
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Greater compares for greater than.
 
 - go: EqualMasked
   constImm: 0
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // EqualMasked compares for equality.
 - go: LessMasked
   constImm: 1
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // LessMasked compares for less than.
 - go: LessEqualMasked
   constImm: 2
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // LessEqualMasked compares for less than or equal.
 - go: IsNanMasked # For float only.
   constImm: 3
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqualMasked
   constImm: 4
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // NotEqualMasked compares for inequality.
 - go: GreaterEqualMasked
   constImm: 13
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GreaterEqualMasked compares for greater than or equal.
 - go: GreaterMasked
   constImm: 14
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GreaterMasked compares for greater than.
 - go: Div
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Div divides elements of two vectors.
 - go: DivMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // DivMasked divides elements of two vectors.
 - go: Sqrt
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Sqrt computes the square root of each element.
 - go: SqrtMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // SqrtMasked computes the square root of each element.
 - go: ApproximateReciprocal
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocal computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
 - go: ApproximateReciprocalOfSqrtMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 - go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // MulByPowOf2Masked multiplies elements by a power of 2.
 
 - go: Round
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 0
   documentation: !string |-
     // Round rounds elements to the nearest integer.
 - go: RoundWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 0
-  masked: "true"
+  masked: true
   documentation: !string |-
     // RoundWithPrecisionMasked rounds elements with specified precision.
 - go: DiffWithRoundWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 0
-  masked: "true"
+  masked: true
   documentation: !string |-
     // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 
 - go: Floor
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 1
   documentation: !string |-
     // Floor rounds elements down to the nearest integer.
 - go: FloorWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 1
-  masked: "true"
+  masked: true
   documentation: !string |-
     // FloorWithPrecisionMasked rounds elements down with specified precision.
 - go: DiffWithFloorWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 1
-  masked: "true"
+  masked: true
   documentation: !string |-
     // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 
 - go: Ceil
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 2
   documentation: !string |-
     // Ceil rounds elements up to the nearest integer.
 - go: CeilWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 2
-  masked: "true"
+  masked: true
   documentation: !string |-
     // CeilWithPrecisionMasked rounds elements up with specified precision.
 - go: DiffWithCeilWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 2
-  masked: "true"
+  masked: true
   documentation: !string |-
     // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 
 - go: Trunc
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 3
   documentation: !string |-
     // Trunc truncates elements towards zero.
 - go: TruncWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 3
-  masked: "true"
+  masked: true
   documentation: !string |-
     // TruncWithPrecisionMasked truncates elements with specified precision.
 - go: DiffWithTruncWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 3
-  masked: "true"
+  masked: true
   documentation: !string |-
     // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 
 - go: AddSub
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // AddSub subtracts even elements and adds odd elements of two vectors.
 - go: GaloisFieldAffineTransformMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
@@ -362,8 +362,8 @@
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
 - go: GaloisFieldAffineTransformInverseMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
@@ -372,40 +372,40 @@
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
 - go: GaloisFieldMulMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
     // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 - go: Average
-  commutative: "true"
+  commutative: true
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
     // Average computes the rounded average of corresponding elements.
 - go: AverageMasked
-  commutative: "true"
-  masked: "true"
+  commutative: true
+  masked: true
   extension: "AVX512.*" # Masked operations are typically AVX512
   documentation: !string |-
     // AverageMasked computes the rounded average of corresponding elements.
 
 - go: Absolute
-  commutative: "false"
+  commutative: false
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
     // Absolute computes the absolute value of each element.
 - go: AbsoluteMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX512.*"
   documentation: !string |-
     // AbsoluteMasked computes the absolute value of each element.
 
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Sign returns the product of the first operand with -1, 0, or 1,
@@ -413,34 +413,34 @@
   # Sign does not have masked version
 
 - go: PopCountMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
   documentation: !string |-
     // PopCountMasked counts the number of set bits in each element.
 - go: PairDotProd
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProd multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 - go: PairDotProdMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProdMasked multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: SaturatedUnsignedSignedPairDotProd
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProdMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
@@ -448,118 +448,118 @@
 
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 - go: DotProdBroadcast
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // DotProdBroadcast multiplies all elements and broadcasts the sum.
 - go: UnsignedSignedQuadDotProdAccumulate
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
 - go: UnsignedSignedQuadDotProdAccumulateMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: PairDotProdAccumulate
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
 - go: PairDotProdAccumulateMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulate
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulateMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 - go: FusedMultiplyAddMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // FusedMultiplyAddMasked performs (x * y) + z.
 - go: FusedMultiplyAddSubMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 - go: FusedMultiplySubAddMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 - go: Max
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Max computes the maximum of corresponding elements.
 - go: MaxMasked
-  commutative: "true"
-  masked: "true"
+  commutative: true
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // MaxMasked computes the maximum of corresponding elements.
 - go: Min
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Min computes the minimum of corresponding elements.
 - go: MinMasked
-  commutative: "true"
-  masked: "true"
+  commutative: true
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // MinMasked computes the minimum of corresponding elements.
 - go: SetElem
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SetElem sets a single constant-indexed element's value.
 - go: GetElem
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GetElem retrieves a single constant-indexed element's value.
 - go: Set128
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
 - go: Get128
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
 
 
 - go: Permute
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Permute performs a full permutation of vector x using indices:
@@ -567,8 +567,8 @@
     // Only the needed bits to represent x's index are used in indices' elements.
 
 - go: PermuteMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // PermuteMasked performs a full permutation of vector y using indices:
@@ -576,8 +576,8 @@
     // Only the needed bits to represent x's index are used in indices' elements.
 
 - go: Permute2Masked # Permute2Masked is only available on or after AVX512
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // Permute2Masked performs a full permutation of vector x, y using indices:
@@ -586,203 +586,203 @@
     // Only the needed bits to represent xy's index are used in indices' elements.
 
 - go: Compress
-  commutative: "false"
+  commutative: false
   # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
   extension: "AVX.*"
   documentation: !string |-
     // Compress performs a compression on vector x using mask by
     // selecting elements as indicated by mask, and pack them to lower indexed elements.
 - go: Mul
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Mul multiplies corresponding elements of two vectors.
 - go: MulEvenWiden
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulEvenWiden multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHigh
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulHigh multiplies elements and stores the high part of the result.
 - go: MulLow
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulLow multiplies elements and stores the low part of the result.
 - go: MulMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulMasked multiplies corresponding elements of two vectors.
 - go: MulEvenWidenMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulEvenWidenMasked multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHighMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulHighMasked multiplies elements and stores the high part of the result.
 - go: MulLowMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulLowMasked multiplies elements and stores the low part of the result.
 - go: ShiftAllLeft
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllLeftMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
   signed: false
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRightMasked
   signed: false
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
   signed: true
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftAllRightMasked
   signed: true
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 
 - go: ShiftLeft
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftLeftMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   signed: false
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRightMasked
   signed: false
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRight
   signed: true
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: ShiftRightMasked
   signed: true
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 
 - go: RotateAllLeftMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 - go: RotateLeftMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 - go: RotateAllRightMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 - go: RotateRightMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 
 - go: ShiftAllLeftAndFillUpperFromMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 - go: ShiftAllRightAndFillUpperFromMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 - go: ShiftLeftAndFillUpperFromMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 - go: ShiftRightAndFillUpperFromMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index f6c7a4a6..4eb47b44 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -35,7 +35,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 		sortKey string
 		OpName  string
 		OpInLen int
-		Comm    string
+		Comm    bool
 	}
 	type opData struct {
 		Ops    []genericOpsData
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index cca7d945..39bf2ec1 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -38,9 +38,9 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 		Asm          string
 		OpInLen      int
 		RegInfo      string
-		Comm         string
+		Comm         bool
 		Type         string
-		ResultInArg0 string
+		ResultInArg0 bool
 	}
 	type machineOpsData struct {
 		OpsData    []opData
@@ -103,9 +103,9 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 		} else {
 			panic(fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut))
 		}
-		resultInArg0 := "false"
+		resultInArg0 := false
 		if shapeOut == OneVregOutAtIn {
-			resultInArg0 = "true"
+			resultInArg0 = true
 		}
 		if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
 			opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index e3bf45a9..ea4d56ac 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -555,7 +555,7 @@ func splitMask(ops []Operation) ([]Operation, error) {
 	splited := []Operation{}
 	for _, op := range ops {
 		splited = append(splited, op)
-		if op.Masked == nil || *op.Masked != "true" {
+		if op.Masked == nil || !*op.Masked {
 			continue
 		}
 		shapeIn, _, _, _, _ := op.shape()
@@ -582,7 +582,7 @@ func splitMask(ops []Operation) ([]Operation, error) {
 
 func insertMaskDescToDoc(ops []Operation) {
 	for i, _ := range ops {
-		if ops[i].Masked != nil && *ops[i].Masked == "true" {
+		if ops[i].Masked != nil && *ops[i].Masked {
 			if ops[i].Documentation != nil {
 				*ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask."
 			}
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 6179d98d..32dc29cf 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -23,20 +23,20 @@ type Operation struct {
 
 	In            []Operand // Arguments
 	Out           []Operand // Results
-	Commutative   string    // Commutativity
+	Commutative   bool      // Commutativity
 	Extension     string    // Extension
 	ISASet        string    // ISASet
 	CPUFeature    *string   // If ISASet is empty, then Extension, otherwise ISASet
-	Zeroing       *string   // nil => use asm suffix ".Z"; "false" => do not use asm suffix ".Z"
+	Zeroing       *bool     // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
 	Documentation *string   // Documentation will be appended to the stubs comments.
 	// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
 	// If present, it will be copied to [In[0].Const].
 	ConstImm *string
 	// Masked indicates that this is a masked operation, this field has to be set for masked operations
 	// otherwise simdgen won't recognize it in [splitMask].
-	Masked *string
+	Masked *bool
 	// NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits.
-	NameAndSizeCheck *string
+	NameAndSizeCheck *bool
 }
 
 func (o *Operation) VectorWidth() int {
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index 95775bb8..9bae42e9 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -1,68 +1,68 @@
 !sum
 - go: Add
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Add adds corresponding elements of two vectors.
 - go: SaturatedAdd
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedAdd adds corresponding elements of two vectors with saturation.
 - go: AddMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // AddMasked adds corresponding elements of two vectors.
 - go: SaturatedAddMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
 - go: Sub
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Sub subtracts corresponding elements of two vectors.
 - go: SaturatedSub
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedSub subtracts corresponding elements of two vectors with saturation.
 - go: SubMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SubMasked subtracts corresponding elements of two vectors.
 - go: SaturatedSubMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairwiseAdd horizontally adds adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: PairwiseSub
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairwiseSub horizontally subtracts adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: SaturatedPairwiseAdd
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: SaturatedPairwiseSub
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index d6ea3ed1..c6a00cc2 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -1,45 +1,45 @@
 !sum
 - go: And
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // And performs a bitwise AND operation between two vectors.
 - go: AndMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // AndMasked performs a bitwise AND operation between two vectors.
 - go: Or
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Or performs a bitwise OR operation between two vectors.
 - go: OrMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // OrMasked performs a bitwise OR operation between two vectors.
 - go: AndNot
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // AndNot performs a bitwise x &^ y.
 - go: AndNotMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // AndNotMasked performs a bitwise x &^ y.
 - go: Xor
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Xor performs a bitwise XOR operation between two vectors.
 - go: XorMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // XorMasked performs a bitwise XOR operation between two vectors.
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index f7383555..d1080513 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -8,93 +8,93 @@
 # 6: Greater
 - go: Equal
   constImm: 0
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Equal compares for equality.
 - go: Less
   constImm: 1
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Less compares for less than.
 - go: LessEqual
   constImm: 2
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // LessEqual compares for less than or equal.
 - go: IsNan # For float only.
   constImm: 3
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // IsNan checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqual
   constImm: 4
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // NotEqual compares for inequality.
 - go: GreaterEqual
   constImm: 13
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GreaterEqual compares for greater than or equal.
 - go: Greater
   constImm: 14
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Greater compares for greater than.
 
 - go: EqualMasked
   constImm: 0
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // EqualMasked compares for equality.
 - go: LessMasked
   constImm: 1
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // LessMasked compares for less than.
 - go: LessEqualMasked
   constImm: 2
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // LessEqualMasked compares for less than or equal.
 - go: IsNanMasked # For float only.
   constImm: 3
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqualMasked
   constImm: 4
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // NotEqualMasked compares for inequality.
 - go: GreaterEqualMasked
   constImm: 13
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GreaterEqualMasked compares for greater than or equal.
 - go: GreaterMasked
   constImm: 14
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GreaterMasked compares for greater than.
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index e0d5836d..1347b533 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -1,141 +1,141 @@
 !sum
 - go: Div
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Div divides elements of two vectors.
 - go: DivMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // DivMasked divides elements of two vectors.
 - go: Sqrt
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Sqrt computes the square root of each element.
 - go: SqrtMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // SqrtMasked computes the square root of each element.
 - go: ApproximateReciprocal
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocal computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
 - go: ApproximateReciprocalOfSqrtMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
 - go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // MulByPowOf2Masked multiplies elements by a power of 2.
 
 - go: Round
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 0
   documentation: !string |-
     // Round rounds elements to the nearest integer.
 - go: RoundWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 0
-  masked: "true"
+  masked: true
   documentation: !string |-
     // RoundWithPrecisionMasked rounds elements with specified precision.
 - go: DiffWithRoundWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 0
-  masked: "true"
+  masked: true
   documentation: !string |-
     // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
 
 - go: Floor
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 1
   documentation: !string |-
     // Floor rounds elements down to the nearest integer.
 - go: FloorWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 1
-  masked: "true"
+  masked: true
   documentation: !string |-
     // FloorWithPrecisionMasked rounds elements down with specified precision.
 - go: DiffWithFloorWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 1
-  masked: "true"
+  masked: true
   documentation: !string |-
     // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
 
 - go: Ceil
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 2
   documentation: !string |-
     // Ceil rounds elements up to the nearest integer.
 - go: CeilWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 2
-  masked: "true"
+  masked: true
   documentation: !string |-
     // CeilWithPrecisionMasked rounds elements up with specified precision.
 - go: DiffWithCeilWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 2
-  masked: "true"
+  masked: true
   documentation: !string |-
     // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
 
 - go: Trunc
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 3
   documentation: !string |-
     // Trunc truncates elements towards zero.
 - go: TruncWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 3
-  masked: "true"
+  masked: true
   documentation: !string |-
     // TruncWithPrecisionMasked truncates elements with specified precision.
 - go: DiffWithTruncWithPrecisionMasked
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   constImm: 3
-  masked: "true"
+  masked: true
   documentation: !string |-
     // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
 
 - go: AddSub
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // AddSub subtracts even elements and adds odd elements of two vectors.
\ No newline at end of file
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
index 3caa13cf..4184c5e4 100644
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -1,7 +1,7 @@
 !sum
 - go: GaloisFieldAffineTransformMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
@@ -9,8 +9,8 @@
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
 - go: GaloisFieldAffineTransformInverseMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
@@ -19,8 +19,8 @@
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
 - go: GaloisFieldMulMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
index 96015d28..fc277f81 100644
--- a/internal/simdgen/ops/IntOnlyArith/categories.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml
@@ -1,32 +1,32 @@
 !sum
 - go: Average
-  commutative: "true"
+  commutative: true
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
     // Average computes the rounded average of corresponding elements.
 - go: AverageMasked
-  commutative: "true"
-  masked: "true"
+  commutative: true
+  masked: true
   extension: "AVX512.*" # Masked operations are typically AVX512
   documentation: !string |-
     // AverageMasked computes the rounded average of corresponding elements.
 
 - go: Absolute
-  commutative: "false"
+  commutative: false
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
     // Absolute computes the absolute value of each element.
 - go: AbsoluteMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX512.*"
   documentation: !string |-
     // AbsoluteMasked computes the absolute value of each element.
 
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Sign returns the product of the first operand with -1, 0, or 1,
@@ -34,8 +34,8 @@
   # Sign does not have masked version
 
 - go: PopCountMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
   documentation: !string |-
     // PopCountMasked counts the number of set bits in each element.
\ No newline at end of file
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 962ae50d..d26b846d 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -1,27 +1,27 @@
 !sum
 - go: PairDotProd
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProd multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 - go: PairDotProdMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProdMasked multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: SaturatedUnsignedSignedPairDotProd
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProdMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
@@ -29,69 +29,69 @@
 
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 - go: DotProdBroadcast
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // DotProdBroadcast multiplies all elements and broadcasts the sum.
 - go: UnsignedSignedQuadDotProdAccumulate
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
 - go: UnsignedSignedQuadDotProdAccumulateMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: PairDotProdAccumulate
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
 - go: PairDotProdAccumulateMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulate
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulateMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
 - go: FusedMultiplyAddMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // FusedMultiplyAddMasked performs (x * y) + z.
 - go: FusedMultiplyAddSubMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 - go: FusedMultiplySubAddMasked
-  masked: "true"
-  commutative: "false"
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml
index 33578ee4..929bfadd 100644
--- a/internal/simdgen/ops/MinMax/categories.yaml
+++ b/internal/simdgen/ops/MinMax/categories.yaml
@@ -1,23 +1,23 @@
 !sum
 - go: Max
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Max computes the maximum of corresponding elements.
 - go: MaxMasked
-  commutative: "true"
-  masked: "true"
+  commutative: true
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // MaxMasked computes the maximum of corresponding elements.
 - go: Min
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Min computes the minimum of corresponding elements.
 - go: MinMasked
-  commutative: "true"
-  masked: "true"
+  commutative: true
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // MinMasked computes the minimum of corresponding elements.
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index 8dfe372a..d6c4d5da 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -1,28 +1,28 @@
 !sum
 - go: SetElem
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // SetElem sets a single constant-indexed element's value.
 - go: GetElem
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // GetElem retrieves a single constant-indexed element's value.
 - go: Set128
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
 - go: Get128
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
 
 
 - go: Permute
-  commutative: "false"
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // Permute performs a full permutation of vector x using indices:
@@ -30,8 +30,8 @@
     // Only the needed bits to represent x's index are used in indices' elements.
 
 - go: PermuteMasked
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // PermuteMasked performs a full permutation of vector y using indices:
@@ -39,8 +39,8 @@
     // Only the needed bits to represent x's index are used in indices' elements.
 
 - go: Permute2Masked # Permute2Masked is only available on or after AVX512
-  commutative: "false"
-  masked: "true"
+  commutative: false
+  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // Permute2Masked performs a full permutation of vector x, y using indices:
@@ -49,7 +49,7 @@
     // Only the needed bits to represent xy's index are used in indices' elements.
 
 - go: Compress
-  commutative: "false"
+  commutative: false
   # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
   extension: "AVX.*"
   documentation: !string |-
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index b466eb01..1884d660 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -1,47 +1,47 @@
 !sum
 - go: Mul
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // Mul multiplies corresponding elements of two vectors.
 - go: MulEvenWiden
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulEvenWiden multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHigh
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulHigh multiplies elements and stores the high part of the result.
 - go: MulLow
-  commutative: "true"
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulLow multiplies elements and stores the low part of the result.
 - go: MulMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulMasked multiplies corresponding elements of two vectors.
 - go: MulEvenWidenMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulEvenWidenMasked multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHighMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulHighMasked multiplies elements and stores the high part of the result.
 - go: MulLowMasked
-  masked: "true"
-  commutative: "true"
+  masked: true
+  commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // MulLowMasked multiplies elements and stores the low part of the result.
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
index 09c04dfa..b9e2a634 100644
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -1,149 +1,149 @@
 !sum
 - go: ShiftAllLeft
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllLeftMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
   signed: false
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRightMasked
   signed: false
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
   signed: true
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftAllRightMasked
   signed: true
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 
 - go: ShiftLeft
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftLeftMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   signed: false
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRightMasked
   signed: false
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRight
   signed: true
-  nameAndSizeCheck: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: ShiftRightMasked
   signed: true
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 
 - go: RotateAllLeftMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
 - go: RotateLeftMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 - go: RotateAllRightMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
 - go: RotateRightMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 
 - go: ShiftAllLeftAndFillUpperFromMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 - go: ShiftAllRightAndFillUpperFromMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 - go: ShiftLeftAndFillUpperFromMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 - go: ShiftRightAndFillUpperFromMasked
-  nameAndSizeCheck: "true"
-  masked: "true"
-  commutative: "false"
+  nameAndSizeCheck: true
+  masked: true
+  commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
diff --git a/internal/unify/domain.go b/internal/unify/domain.go
index 7f573826..7386ea2c 100644
--- a/internal/unify/domain.go
+++ b/internal/unify/domain.go
@@ -310,10 +310,19 @@ func (d String) decode(rv reflect.Value) error {
 	if err == nil {
 		i, err := strconv.Atoi(d.exact)
 		if err != nil {
-			return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err)
+			return fmt.Errorf("cannot decode string into %s: %s", rv.Type(), err)
 		}
 		rv2.SetInt(int64(i))
 		return nil
 	}
+	rv2, err = preDecode(rv, reflect.Bool, "Bool")
+	if err == nil {
+		b, err := strconv.ParseBool(d.exact)
+		if err != nil {
+			return fmt.Errorf("cannot decode string into %s: %s", rv.Type(), err)
+		}
+		rv2.SetBool(b)
+		return nil
+	}
 	return err
 }
diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go
index 6782b313..08b060d1 100644
--- a/internal/unify/yaml.go
+++ b/internal/unify/yaml.go
@@ -424,18 +424,21 @@ func (enc *yamlEncoder) value(v *Value) *yaml.Node {
 	case String:
 		switch d.kind {
 		case stringExact:
+			n.SetString(d.exact)
+			switch {
 			// Make this into a "nice" !!int node if I can.
-			if yamlIntRe.MatchString(d.exact) {
-				n.SetString(d.exact)
+			case yamlIntRe.MatchString(d.exact):
 				n.Tag = "tag:yaml.org,2002:int"
-				return &n
-			}
+
+			// Or a "nice" !!bool node.
+			case d.exact == "false" || d.exact == "true":
+				n.Tag = "tag:yaml.org,2002:bool"
+
 			// If this doesn't require escaping, leave it as a str node to avoid
 			// the annoying YAML tags. Otherwise, mark it as an exact string.
 			// Alternatively, we could always emit a str node with regexp
 			// quoting.
-			n.SetString(d.exact)
-			if d.exact != regexp.QuoteMeta(d.exact) {
+			case d.exact != regexp.QuoteMeta(d.exact):
 				n.Tag = "!string"
 			}
 			return &n

From f9e09651d0a171d6f5e87cae6ba63c50c82c6f2a Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 23 Jul 2025 17:43:19 -0400
Subject: [PATCH 152/200] internal/simdgen: refactor decodeOperands and loadXED

These functions were getting too big and doing too many things at
once. Split them up and slightly rejigger the relationship between
them. decodeOperands now stops at parsing the XED to []operand, and
all translation to unifier values is done in the new instToUVal
functions and its helpers.

No effect on generated godefs.

Change-Id: Idf97ebd521b8963d5703caf695fcfffc422633cd
Reviewed-on: https://go-review.googlesource.com/c/arch/+/691336
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/xed.go | 107 +++++++++++++++++++++++-----------------
 1 file changed, 61 insertions(+), 46 deletions(-)

diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index 4a1b93b2..26d0adb2 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -49,7 +49,7 @@ func loadXED(xedPath string) []*unify.Value {
 			fmt.Printf("%s:\n%+v\n", inst.Pos, inst)
 		}
 
-		ins, outs, err := decodeOperands(db, strings.Fields(inst.Operands))
+		ops, err := decodeOperands(db, strings.Fields(inst.Operands))
 		if err != nil {
 			operandRemarks++
 			if *Verbose {
@@ -57,39 +57,12 @@ func loadXED(xedPath string) []*unify.Value {
 			}
 			return
 		}
-		// TODO: "feature"
-		fields := []string{"goarch", "asm", "in", "out", "extension", "isaset"}
-		values := []*unify.Value{
-			unify.NewValue(unify.NewStringExact("amd64")),
-			unify.NewValue(unify.NewStringExact(inst.Opcode())),
-			unify.NewValue(ins),
-			unify.NewValue(outs),
-			unify.NewValue(unify.NewStringExact(inst.Extension)),
-			unify.NewValue(unify.NewStringExact(inst.ISASet)),
-		}
-		if strings.Contains(inst.Pattern, "ZEROING=0") {
-			// This is an EVEX instruction, but the ".Z" (zero-merging)
-			// instruction flag is NOT valid. EVEX.z must be zero.
-			//
-			// This can mean a few things:
-			//
-			// - The output of an instruction is a mask, so merging modes don't
-			// make any sense. E.g., VCMPPS.
-			//
-			// - There are no masks involved anywhere. (Maybe MASK=0 is also set
-			// in this case?) E.g., VINSERTPS.
-			//
-			// - The operation inherently performs merging. E.g., VCOMPRESSPS
-			// with a mem operand.
-			//
-			// There may be other reasons.
-			fields = append(fields, "zeroing")
-			values = append(values, unify.NewValue(unify.NewStringExact("false")))
-		}
-		pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line}
-		defs = append(defs, unify.NewValuePos(unify.NewDef(fields, values), pos))
+
+		uval := instToUVal(inst, ops)
+		defs = append(defs, uval)
+
 		if *flagDebugXED {
-			y, _ := yaml.Marshal(defs[len(defs)-1])
+			y, _ := yaml.Marshal(uval)
 			fmt.Printf("==>\n%s\n", y)
 		}
 	})
@@ -305,17 +278,12 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
 	return nil, fmt.Errorf("unknown operand LHS %q in %q", lhs, operand)
 }
 
-func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tuple, err error) {
-	fail := func(err error) (unify.Tuple, unify.Tuple, error) {
-		return unify.Tuple{}, unify.Tuple{}, err
-	}
-
-	// Decode all of the operands.
-	var ops []operand
+func decodeOperands(db *xeddata.Database, operands []string) (ops []operand, err error) {
+	// Decode the XED operand descriptions.
 	for _, o := range operands {
 		op, err := decodeOperand(db, o)
 		if err != nil {
-			return unify.Tuple{}, unify.Tuple{}, err
+			return nil, err
 		}
 		if op != nil {
 			ops = append(ops, op)
@@ -324,7 +292,14 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 
 	// XED doesn't encode the size of mask operands. If there are mask operands,
 	// try to infer their sizes from other operands.
-	//
+	if err := inferMaskSizes(ops); err != nil {
+		return nil, fmt.Errorf("%w in operands %+v", err, operands)
+	}
+
+	return ops, nil
+}
+
+func inferMaskSizes(ops []operand) error {
 	// This is a heuristic and it falls apart in some cases:
 	//
 	// - Mask operations like KAND[BWDQ] have *nothing* in the XED to indicate
@@ -394,7 +369,7 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 				}
 				return nil
 			}
-			return fmt.Errorf("cannot infer mask size: no register operands: %+v", operands)
+			return fmt.Errorf("cannot infer mask size: no register operands")
 		}
 		shape, ok := singular(sizes)
 		if !ok {
@@ -414,12 +389,15 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 		return nil
 	}
 	if err := inferMask(true, false); err != nil {
-		return fail(err)
+		return err
 	}
 	if err := inferMask(false, true); err != nil {
-		return fail(err)
+		return err
 	}
+	return nil
+}
 
+func operandsToUVals(ops []operand) (in, out unify.Tuple) {
 	var inVals, outVals []*unify.Value
 	for asmPos, op := range ops {
 		fields, values := op.toValue()
@@ -444,7 +422,44 @@ func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tu
 		}
 	}
 
-	return unify.NewTuple(inVals...), unify.NewTuple(outVals...), nil
+	return unify.NewTuple(inVals...), unify.NewTuple(outVals...)
+}
+
+func instToUVal(inst *xeddata.Inst, ops []operand) *unify.Value {
+	// Map operands to unifier values.
+	ins, outs := operandsToUVals(ops)
+
+	// TODO: "feature"
+	fields := []string{"goarch", "asm", "in", "out", "extension", "isaset"}
+	values := []*unify.Value{
+		unify.NewValue(unify.NewStringExact("amd64")),
+		unify.NewValue(unify.NewStringExact(inst.Opcode())),
+		unify.NewValue(ins),
+		unify.NewValue(outs),
+		unify.NewValue(unify.NewStringExact(inst.Extension)),
+		unify.NewValue(unify.NewStringExact(inst.ISASet)),
+	}
+	if strings.Contains(inst.Pattern, "ZEROING=0") {
+		// This is an EVEX instruction, but the ".Z" (zero-merging)
+		// instruction flag is NOT valid. EVEX.z must be zero.
+		//
+		// This can mean a few things:
+		//
+		// - The output of an instruction is a mask, so merging modes don't
+		// make any sense. E.g., VCMPPS.
+		//
+		// - There are no masks involved anywhere. (Maybe MASK=0 is also set
+		// in this case?) E.g., VINSERTPS.
+		//
+		// - The operation inherently performs merging. E.g., VCOMPRESSPS
+		// with a mem operand.
+		//
+		// There may be other reasons.
+		fields = append(fields, "zeroing")
+		values = append(values, unify.NewValue(unify.NewStringExact("false")))
+	}
+	pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line}
+	return unify.NewValuePos(unify.NewDef(fields, values), pos)
 }
 
 func singular[T comparable](xs []T) (T, bool) {

From ed8e6937ca944f2bcf5dd134a6d1bb30c423e3f2 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 24 Jul 2025 14:20:09 -0400
Subject: [PATCH 153/200] internal/{unify,simdgen}: replace NewDef with
 DefBuilder

NewDef requires two separate slices of field names and field values,
which is really awkward to use, both because you have to pair up the
i'th positions in two slices when reading code, and because it makes
it really awkward to conditionally add fields.

Fix this by replacing NewDef with a DefBuilder type that lets you add
field/value pairs one by one to build a Def.

No effect on generated godefs.

Change-Id: I75dfb6ac798585e717965ab9d5d0f1bc6a157aef
Reviewed-on: https://go-review.googlesource.com/c/arch/+/691337
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/xed.go  | 94 ++++++++++++++++++----------------------
 internal/unify/domain.go | 30 +++++++------
 internal/unify/yaml.go   |  8 ++--
 3 files changed, 63 insertions(+), 69 deletions(-)

diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index 26d0adb2..dddf5395 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -119,7 +119,8 @@ type operandMask struct {
 	operandCommon
 	vecShape
 	// Bits in the mask is w/bits.
-	allMasks bool
+
+	allMasks bool // If set, size cannot be inferred because all operands are masks.
 }
 
 type operandImm struct {
@@ -129,7 +130,7 @@ type operandImm struct {
 
 type operand interface {
 	common() operandCommon
-	toValue() (fields []string, vals []*unify.Value)
+	addToDef(b *unify.DefBuilder)
 }
 
 func strVal(s any) *unify.Value {
@@ -140,53 +141,52 @@ func (o operandCommon) common() operandCommon {
 	return o
 }
 
-func (o operandMem) toValue() (fields []string, vals []*unify.Value) {
+func (o operandMem) addToDef(b *unify.DefBuilder) {
 	// TODO: w, base
-	return []string{"class"}, []*unify.Value{strVal("memory")}
+	b.Add("class", strVal("memory"))
 }
 
-func (o operandVReg) toValue() (fields []string, vals []*unify.Value) {
+func (o operandVReg) addToDef(b *unify.DefBuilder) {
 	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
 	if err != nil {
 		panic("parsing baseRe: " + err.Error())
 	}
-	fields, vals = []string{"class", "bits", "base"}, []*unify.Value{
-		strVal("vreg"),
-		strVal(o.bits),
-		unify.NewValue(baseDomain)}
+	b.Add("class", strVal("vreg"))
+	b.Add("bits", strVal(o.bits))
+	b.Add("base", unify.NewValue(baseDomain))
+	// If elemBits == bits, then the vector can be ANY shape. This happens with,
+	// for example, logical ops.
 	if o.elemBits != o.bits {
-		fields, vals = append(fields, "elemBits"), append(vals, strVal(o.elemBits))
+		b.Add("elemBits", strVal(o.elemBits))
 	}
-	// otherwise it means the vector could be any shape.
-	return
 }
 
-func (o operandGReg) toValue() (fields []string, vals []*unify.Value) {
+func (o operandGReg) addToDef(b *unify.DefBuilder) {
 	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
 	if err != nil {
 		panic("parsing baseRe: " + err.Error())
 	}
-	fields, vals = []string{"class", "bits", "base"}, []*unify.Value{
-		strVal("greg"),
-		strVal(o.bits),
-		unify.NewValue(baseDomain)}
+	b.Add("class", strVal("greg"))
+	b.Add("bits", strVal(o.bits))
+	b.Add("base", unify.NewValue(baseDomain))
 	if o.elemBits != o.bits {
-		fields, vals = append(fields, "elemBits"), append(vals, strVal(o.elemBits))
+		b.Add("elemBits", strVal(o.elemBits))
 	}
-	// otherwise it means the vector could be any shape.
-	return
-}
-
-func (o operandMask) toValue() (fields []string, vals []*unify.Value) {
-	return []string{"class", "elemBits", "bits"}, []*unify.Value{strVal("mask"), strVal(o.elemBits), strVal(o.bits)}
 }
 
-func (o operandMask) zeroMaskValue() (fields []string, vals []*unify.Value) {
-	return []string{"class"}, []*unify.Value{strVal("mask")}
+func (o operandMask) addToDef(b *unify.DefBuilder) {
+	b.Add("class", strVal("mask"))
+	if o.allMasks {
+		// If all operands are masks, omit sizes and let unification determine mask sizes.
+		return
+	}
+	b.Add("elemBits", strVal(o.elemBits))
+	b.Add("bits", strVal(o.bits))
 }
 
-func (o operandImm) toValue() (fields []string, vals []*unify.Value) {
-	return []string{"class", "bits"}, []*unify.Value{strVal("immediate"), strVal(o.bits)}
+func (o operandImm) addToDef(b *unify.DefBuilder) {
+	b.Add("class", strVal("immediate"))
+	b.Add("bits", strVal(o.bits))
 }
 
 var actionEncoding = map[string]operandAction{
@@ -400,24 +400,18 @@ func inferMaskSizes(ops []operand) error {
 func operandsToUVals(ops []operand) (in, out unify.Tuple) {
 	var inVals, outVals []*unify.Value
 	for asmPos, op := range ops {
-		fields, values := op.toValue()
-		if opm, ok := op.(operandMask); ok {
-			if opm.allMasks {
-				// If all operands are masks, leave the mask inferrence to the users.
-				fields, values = opm.zeroMaskValue()
-			}
-		}
+		var db unify.DefBuilder
+		op.addToDef(&db)
 
-		fields = append(fields, "asmPos")
-		values = append(values, unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))
+		db.Add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))
 
 		action := op.common().action
 		if action.r {
-			inVal := unify.NewValue(unify.NewDef(fields, values))
+			inVal := unify.NewValue(db.Build())
 			inVals = append(inVals, inVal)
 		}
 		if action.w {
-			outVal := unify.NewValue(unify.NewDef(fields, values))
+			outVal := unify.NewValue(db.Build())
 			outVals = append(outVals, outVal)
 		}
 	}
@@ -430,15 +424,14 @@ func instToUVal(inst *xeddata.Inst, ops []operand) *unify.Value {
 	ins, outs := operandsToUVals(ops)
 
 	// TODO: "feature"
-	fields := []string{"goarch", "asm", "in", "out", "extension", "isaset"}
-	values := []*unify.Value{
-		unify.NewValue(unify.NewStringExact("amd64")),
-		unify.NewValue(unify.NewStringExact(inst.Opcode())),
-		unify.NewValue(ins),
-		unify.NewValue(outs),
-		unify.NewValue(unify.NewStringExact(inst.Extension)),
-		unify.NewValue(unify.NewStringExact(inst.ISASet)),
-	}
+	var db unify.DefBuilder
+	db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64")))
+	db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode())))
+	db.Add("in", unify.NewValue(ins))
+	db.Add("out", unify.NewValue(outs))
+	db.Add("extension", unify.NewValue(unify.NewStringExact(inst.Extension)))
+	db.Add("isaset", unify.NewValue(unify.NewStringExact(inst.ISASet)))
+
 	if strings.Contains(inst.Pattern, "ZEROING=0") {
 		// This is an EVEX instruction, but the ".Z" (zero-merging)
 		// instruction flag is NOT valid. EVEX.z must be zero.
@@ -455,11 +448,10 @@ func instToUVal(inst *xeddata.Inst, ops []operand) *unify.Value {
 		// with a mem operand.
 		//
 		// There may be other reasons.
-		fields = append(fields, "zeroing")
-		values = append(values, unify.NewValue(unify.NewStringExact("false")))
+		db.Add("zeroing", unify.NewValue(unify.NewStringExact("false")))
 	}
 	pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line}
-	return unify.NewValuePos(unify.NewDef(fields, values), pos)
+	return unify.NewValuePos(db.Build(), pos)
 }
 
 func singular[T comparable](xs []T) (T, bool) {
diff --git a/internal/unify/domain.go b/internal/unify/domain.go
index 7386ea2c..1cd5af14 100644
--- a/internal/unify/domain.go
+++ b/internal/unify/domain.go
@@ -94,21 +94,25 @@ type Def struct {
 	fields map[string]*Value
 }
 
-// NewDef creates a new [Def].
-//
-// The fields and values slices must have the same length.
-func NewDef(fields []string, values []*Value) Def {
-	if len(fields) != len(values) {
-		panic("fields and values must have the same length")
+// A DefBuilder builds a [Def] one field at a time. The zero value is an empty
+// [Def].
+type DefBuilder struct {
+	fields map[string]*Value
+}
+
+func (b *DefBuilder) Add(name string, v *Value) {
+	if b.fields == nil {
+		b.fields = make(map[string]*Value)
 	}
-	m := make(map[string]*Value, len(fields))
-	for i := range fields {
-		if _, ok := m[fields[i]]; ok {
-			panic(fmt.Sprintf("duplicate field %q", fields[i]))
-		}
-		m[fields[i]] = values[i]
+	if _, ok := b.fields[name]; ok {
+		panic(fmt.Sprintf("duplicate field %q", name))
 	}
-	return Def{m}
+	b.fields[name] = v
+}
+
+// Build constructs a [Def] from the fields added to this builder.
+func (b *DefBuilder) Build() Def {
+	return Def{maps.Clone(b.fields)}
 }
 
 // Exact returns true if all field Values are exact.
diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go
index 08b060d1..1b1c8139 100644
--- a/internal/unify/yaml.go
+++ b/internal/unify/yaml.go
@@ -206,8 +206,7 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
 		return mk2(NewStringRegex(vals...))
 
 	case is(yaml.MappingNode, "tag:yaml.org,2002:map"):
-		var fields []string
-		var vals []*Value
+		var db DefBuilder
 		for i := 0; i < len(node.Content); i += 2 {
 			key := node.Content[i]
 			if key.Kind != yaml.ScalarNode {
@@ -217,10 +216,9 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
 			if err != nil {
 				return nil, err
 			}
-			fields = append(fields, key.Value)
-			vals = append(vals, val)
+			db.Add(key.Value, val)
 		}
-		return mk(NewDef(fields, vals))
+		return mk(db.Build())
 
 	case is(yaml.SequenceNode, "tag:yaml.org,2002:seq"):
 		elts := node.Content

From 213dc9a3a311abffdac206e7fde39fed46dc97ea Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 25 Jul 2025 17:01:19 -0400
Subject: [PATCH 154/200] internal/unify: support custom type decoding

This adds a Decoder interface that types can implement to support
custom decoding from a unify.Value.

No effect on generated godefs.

Change-Id: I7f22d7194670b2c25fba414a165c176931c935cc
Reviewed-on: https://go-review.googlesource.com/c/arch/+/691338
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/unify/domain.go | 44 ++++++++++++++++--------------------
 internal/unify/value.go  | 49 ++++++++++++++++++++++++++++++++--------
 2 files changed, 59 insertions(+), 34 deletions(-)

diff --git a/internal/unify/domain.go b/internal/unify/domain.go
index 1cd5af14..36239054 100644
--- a/internal/unify/domain.go
+++ b/internal/unify/domain.go
@@ -126,10 +126,10 @@ func (d Def) Exact() bool {
 }
 
 func (d Def) decode(rv reflect.Value) error {
-	rv, err := preDecode(rv, reflect.Struct, "Def")
-	if err != nil {
-		return err
+	if rv.Kind() != reflect.Struct {
+		return fmt.Errorf("cannot decode Def into %s", rv.Type())
 	}
+
 	var lowered map[string]string // Lower case -> canonical for d.fields.
 	rt := rv.Type()
 	for fi := range rv.NumField() {
@@ -161,7 +161,7 @@ func (d Def) decode(rv reflect.Value) error {
 				}
 			}
 		}
-		if err := v.Domain.decode(rv.Field(fi)); err != nil {
+		if err := decodeReflect(v, rv.Field(fi)); err != nil {
 			return newDecodeError(fType.Name, err)
 		}
 	}
@@ -224,9 +224,8 @@ func (d Tuple) decode(rv reflect.Value) error {
 		return &inexactError{"repeated tuple", rv.Type().String()}
 	}
 	// TODO: We could also do arrays.
-	rv, err := preDecode(rv, reflect.Slice, "Tuple")
-	if err != nil {
-		return err
+	if rv.Kind() != reflect.Slice {
+		return fmt.Errorf("cannot decode Tuple into %s", rv.Type())
 	}
 	if rv.IsNil() || rv.Cap() < len(d.vs) {
 		rv.Set(reflect.MakeSlice(rv.Type(), len(d.vs), len(d.vs)))
@@ -234,7 +233,7 @@ func (d Tuple) decode(rv reflect.Value) error {
 		rv.SetLen(len(d.vs))
 	}
 	for i, v := range d.vs {
-		if err := v.Domain.decode(rv.Index(i)); err != nil {
+		if err := decodeReflect(v, rv.Index(i)); err != nil {
 			return newDecodeError(fmt.Sprintf("%d", i), err)
 		}
 	}
@@ -305,28 +304,23 @@ func (d String) decode(rv reflect.Value) error {
 	if d.kind != stringExact {
 		return &inexactError{"regex", rv.Type().String()}
 	}
-	rv2, err := preDecode(rv, reflect.String, "String")
-	if err == nil {
-		rv2.SetString(d.exact)
-		return nil
-	}
-	rv2, err = preDecode(rv, reflect.Int, "String")
-	if err == nil {
+	switch rv.Kind() {
+	default:
+		return fmt.Errorf("cannot decode String into %s", rv.Type())
+	case reflect.String:
+		rv.SetString(d.exact)
+	case reflect.Int:
 		i, err := strconv.Atoi(d.exact)
 		if err != nil {
-			return fmt.Errorf("cannot decode string into %s: %s", rv.Type(), err)
+			return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err)
 		}
-		rv2.SetInt(int64(i))
-		return nil
-	}
-	rv2, err = preDecode(rv, reflect.Bool, "Bool")
-	if err == nil {
+		rv.SetInt(int64(i))
+	case reflect.Bool:
 		b, err := strconv.ParseBool(d.exact)
 		if err != nil {
-			return fmt.Errorf("cannot decode string into %s: %s", rv.Type(), err)
+			return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err)
 		}
-		rv2.SetBool(b)
-		return nil
+		rv.SetBool(b)
 	}
-	return err
+	return nil
 }
diff --git a/internal/unify/value.go b/internal/unify/value.go
index 6bf121af..87387bbf 100644
--- a/internal/unify/value.go
+++ b/internal/unify/value.go
@@ -83,27 +83,58 @@ func (v *Value) Exact() bool {
 // are decoded into strings or ints. Any field can itself be a pointer to one of
 // these types. Top can be decoded into a pointer-typed field and will set the
 // field to nil. Anything else will allocate a value if necessary.
+//
+// Any type may implement [Decoder], in which case its DecodeUnified method will
+// be called instead of using the default decoding scheme.
 func (v *Value) Decode(into any) error {
 	rv := reflect.ValueOf(into)
 	if rv.Kind() != reflect.Pointer {
 		return fmt.Errorf("cannot decode into non-pointer %T", into)
 	}
-	return v.Domain.decode(rv)
+	return decodeReflect(v, rv.Elem())
 }
 
-func preDecode(rv reflect.Value, kind reflect.Kind, name string) (reflect.Value, error) {
-	if rv.Kind() == kind {
-		return rv, nil
-	}
-	if rv.Kind() == reflect.Pointer && rv.Type().Elem().Kind() == kind {
+func decodeReflect(v *Value, rv reflect.Value) error {
+	var ptr reflect.Value
+	if rv.Kind() == reflect.Pointer {
 		if rv.IsNil() {
-			rv.Set(reflect.New(rv.Type().Elem()))
+			// Transparently allocate through pointers, *except* for Top, which
+			// wants to set the pointer to nil.
+			//
+			// TODO: Drop this condition if I switch to an explicit Optional[T]
+			// or move the Top logic into Def.
+			if _, ok := v.Domain.(Top); !ok {
+				// Allocate the value to fill in, but don't actually store it in
+				// the pointer until we successfully decode.
+				ptr = rv
+				rv = reflect.New(rv.Type().Elem()).Elem()
+			}
+		} else {
+			rv = rv.Elem()
 		}
-		return rv.Elem(), nil
 	}
-	return reflect.Value{}, fmt.Errorf("cannot decode %s into %s", name, rv.Type())
+
+	var err error
+	if reflect.PointerTo(rv.Type()).Implements(decoderType) {
+		// Use the custom decoder.
+		err = rv.Addr().Interface().(Decoder).DecodeUnified(v)
+	} else {
+		err = v.Domain.decode(rv)
+	}
+	if err == nil && ptr.IsValid() {
+		ptr.Set(rv.Addr())
+	}
+	return err
+}
+
+// Decoder can be implemented by types as a custom implementation of [Decode]
+// for that type.
+type Decoder interface {
+	DecodeUnified(v *Value) error
 }
 
+var decoderType = reflect.TypeOf((*Decoder)(nil)).Elem()
+
 // Provenance iterates over all of the source Values that have contributed to
 // this Value.
 func (v *Value) Provenance() iter.Seq[*Value] {

From 63d3a519a5d6f506b1593ca7a2aa980362a0bc44 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 25 Jul 2025 17:13:29 -0400
Subject: [PATCH 155/200] internal/simdgen: split Operation into rawOperation

This prepares us to parse the raw unification results into
rawOperation and then override what makes sense with "more parsed"
forms in Operation, while simultaneously keeping everything working.

No effect on generated godefs.

Change-Id: Ic0dd6643488b3dbb3125fb17b31725576da84a2c
Reviewed-on: https://go-review.googlesource.com/c/arch/+/691339
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/godefs.go | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 32dc29cf..32467de7 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -15,6 +15,12 @@ import (
 )
 
 type Operation struct {
+	rawOperation
+}
+
+// rawOperation is the unifier representation of an [Operation]. It is
+// translated into a more parsed form after unifier decoding.
+type rawOperation struct {
 	Go string // Go method name
 
 	GoArch       string  // GOARCH for this definition
@@ -39,6 +45,13 @@ type Operation struct {
 	NameAndSizeCheck *bool
 }
 
+func (o *Operation) DecodeUnified(v *unify.Value) error {
+	if err := v.Decode(&o.rawOperation); err != nil {
+		return err
+	}
+	return nil
+}
+
 func (o *Operation) VectorWidth() int {
 	out := o.Out[0]
 	if out.Class == "vreg" {

From ce2e40c7d7dfb66cde2d1e77ce2ed27b1714d1b1 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 25 Jul 2025 17:31:06 -0400
Subject: [PATCH 156/200] internal/simdgen: use "NAME" for method names in doc
 strings

This will let us merge operations that differ only in their method
names. In particular, this will give us a robust way to insert
computed method names.

The YAML changes were done using the following Gemini CLI prompt:

  In all of the files named "categories.yaml", for each operation in
  the YAML list, find the Go method name from the "go" field, and
  replace anywhere that operation name appears as a word in the
  "documentation" field with the literal string "NAME". Please do this
  using Go with the yaml.v3 library. The yaml.v3 library is already
  imported in this module.

No effect on generated godefs.

Change-Id: Ifdac95c5d62475937fc33a8013d0b0c5c5dca312
Reviewed-on: https://go-review.googlesource.com/c/arch/+/691340
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/categories.yaml              | 261 ++++++++----------
 internal/simdgen/gen_utility.go               |   8 +-
 internal/simdgen/godefs.go                    |  20 ++
 internal/simdgen/ops/AddSub/categories.yaml   |  24 +-
 .../simdgen/ops/BitwiseLogic/categories.yaml  |  17 +-
 internal/simdgen/ops/Compares/categories.yaml |  29 +-
 .../simdgen/ops/FPonlyArith/categories.yaml   |  49 ++--
 .../simdgen/ops/GaloisField/categories.yaml   |   8 +-
 .../simdgen/ops/IntOnlyArith/categories.yaml  |  15 +-
 internal/simdgen/ops/MLOps/categories.yaml    |  33 ++-
 internal/simdgen/ops/MinMax/categories.yaml   |   8 +-
 internal/simdgen/ops/Moves/categories.yaml    |  21 +-
 internal/simdgen/ops/Mul/categories.yaml      |  16 +-
 .../simdgen/ops/ShiftRotate/categories.yaml   |  43 ++-
 14 files changed, 268 insertions(+), 284 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index f839f69b..5a7e711d 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -3,114 +3,115 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Add adds corresponding elements of two vectors.
+    // NAME adds corresponding elements of two vectors.
 - go: SaturatedAdd
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedAdd adds corresponding elements of two vectors with saturation.
+    // NAME adds corresponding elements of two vectors with saturation.
 - go: AddMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // AddMasked adds corresponding elements of two vectors.
+    // NAME adds corresponding elements of two vectors.
 - go: SaturatedAddMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+    // NAME adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sub subtracts corresponding elements of two vectors.
+    // NAME subtracts corresponding elements of two vectors.
 - go: SaturatedSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedSub subtracts corresponding elements of two vectors with saturation.
+    // NAME subtracts corresponding elements of two vectors with saturation.
 - go: SubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SubMasked subtracts corresponding elements of two vectors.
+    // NAME subtracts corresponding elements of two vectors.
 - go: SaturatedSubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+    // NAME subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairwiseAdd horizontally adds adjacent pairs of elements.
+    // NAME horizontally adds adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: PairwiseSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairwiseSub horizontally subtracts adjacent pairs of elements.
+    // NAME horizontally subtracts adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: SaturatedPairwiseAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
+    // NAME horizontally adds adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: SaturatedPairwiseSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
+    // NAME horizontally subtracts adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: And
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // And performs a bitwise AND operation between two vectors.
+    // NAME performs a bitwise AND operation between two vectors.
 - go: AndMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // AndMasked performs a bitwise AND operation between two vectors.
+    // NAME performs a bitwise AND operation between two vectors.
 - go: Or
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Or performs a bitwise OR operation between two vectors.
+    // NAME performs a bitwise OR operation between two vectors.
 - go: OrMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // OrMasked performs a bitwise OR operation between two vectors.
+    // NAME performs a bitwise OR operation between two vectors.
 - go: AndNot
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AndNot performs a bitwise x &^ y.
+    // NAME performs a bitwise x &^ y.
 - go: AndNotMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AndNotMasked performs a bitwise x &^ y.
+    // NAME performs a bitwise x &^ y.
 - go: Xor
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Xor performs a bitwise XOR operation between two vectors.
+    // NAME performs a bitwise XOR operation between two vectors.
 - go: XorMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // XorMasked performs a bitwise XOR operation between two vectors.
+    // NAME performs a bitwise XOR operation between two vectors.
+
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
 # const imm predicate(holds for both float and int|uint):
@@ -125,239 +126,233 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Equal compares for equality.
+    // NAME compares for equality.
 - go: Less
   constImm: 1
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Less compares for less than.
+    // NAME compares for less than.
 - go: LessEqual
   constImm: 2
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessEqual compares for less than or equal.
+    // NAME compares for less than or equal.
 - go: IsNan # For float only.
   constImm: 3
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // IsNan checks if elements are NaN. Use as x.IsNan(x).
+    // NAME checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqual
   constImm: 4
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // NotEqual compares for inequality.
+    // NAME compares for inequality.
 - go: GreaterEqual
   constImm: 13
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterEqual compares for greater than or equal.
+    // NAME compares for greater than or equal.
 - go: Greater
   constImm: 14
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Greater compares for greater than.
-
+    // NAME compares for greater than.
 - go: EqualMasked
   constImm: 0
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // EqualMasked compares for equality.
+    // NAME compares for equality.
 - go: LessMasked
   constImm: 1
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessMasked compares for less than.
+    // NAME compares for less than.
 - go: LessEqualMasked
   constImm: 2
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessEqualMasked compares for less than or equal.
+    // NAME compares for less than or equal.
 - go: IsNanMasked # For float only.
   constImm: 3
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+    // NAME checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqualMasked
   constImm: 4
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // NotEqualMasked compares for inequality.
+    // NAME compares for inequality.
 - go: GreaterEqualMasked
   constImm: 13
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterEqualMasked compares for greater than or equal.
+    // NAME compares for greater than or equal.
 - go: GreaterMasked
   constImm: 14
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterMasked compares for greater than.
+    // NAME compares for greater than.
 - go: Div
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Div divides elements of two vectors.
+    // NAME divides elements of two vectors.
 - go: DivMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // DivMasked divides elements of two vectors.
+    // NAME divides elements of two vectors.
 - go: Sqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sqrt computes the square root of each element.
+    // NAME computes the square root of each element.
 - go: SqrtMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // SqrtMasked computes the square root of each element.
+    // NAME computes the square root of each element.
 - go: ApproximateReciprocal
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocal computes an approximate reciprocal of each element.
+    // NAME computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
+    // NAME computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+    // NAME computes an approximate reciprocal of the square root of each element.
 - go: ApproximateReciprocalOfSqrtMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
+    // NAME computes an approximate reciprocal of the square root of each element.
 - go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulByPowOf2Masked multiplies elements by a power of 2.
-
+    // NAME multiplies elements by a power of 2.
 - go: Round
   commutative: false
   extension: "AVX.*"
   constImm: 0
   documentation: !string |-
-    // Round rounds elements to the nearest integer.
+    // NAME rounds elements to the nearest integer.
 - go: RoundWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 0
   masked: true
   documentation: !string |-
-    // RoundWithPrecisionMasked rounds elements with specified precision.
+    // NAME rounds elements with specified precision.
 - go: DiffWithRoundWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 0
   masked: true
   documentation: !string |-
-    // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-
+    // NAME computes the difference after rounding with specified precision.
 - go: Floor
   commutative: false
   extension: "AVX.*"
   constImm: 1
   documentation: !string |-
-    // Floor rounds elements down to the nearest integer.
+    // NAME rounds elements down to the nearest integer.
 - go: FloorWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 1
   masked: true
   documentation: !string |-
-    // FloorWithPrecisionMasked rounds elements down with specified precision.
+    // NAME rounds elements down with specified precision.
 - go: DiffWithFloorWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 1
   masked: true
   documentation: !string |-
-    // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-
+    // NAME computes the difference after flooring with specified precision.
 - go: Ceil
   commutative: false
   extension: "AVX.*"
   constImm: 2
   documentation: !string |-
-    // Ceil rounds elements up to the nearest integer.
+    // NAME rounds elements up to the nearest integer.
 - go: CeilWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 2
   masked: true
   documentation: !string |-
-    // CeilWithPrecisionMasked rounds elements up with specified precision.
+    // NAME rounds elements up with specified precision.
 - go: DiffWithCeilWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 2
   masked: true
   documentation: !string |-
-    // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-
+    // NAME computes the difference after ceiling with specified precision.
 - go: Trunc
   commutative: false
   extension: "AVX.*"
   constImm: 3
   documentation: !string |-
-    // Trunc truncates elements towards zero.
+    // NAME truncates elements towards zero.
 - go: TruncWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 3
   masked: true
   documentation: !string |-
-    // TruncWithPrecisionMasked truncates elements with specified precision.
+    // NAME truncates elements with specified precision.
 - go: DiffWithTruncWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 3
   masked: true
   documentation: !string |-
-    // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-
+    // NAME computes the difference after truncating with specified precision.
 - go: AddSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AddSub subtracts even elements and adds odd elements of two vectors.
+    // NAME subtracts even elements and adds odd elements of two vectors.
 - go: GaloisFieldAffineTransformMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
+    // NAME computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
@@ -366,7 +361,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
+    // NAME computes an affine transformation in GF(2^8),
     // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
@@ -376,288 +371,279 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
+    // NAME computes element-wise GF(2^8) multiplication with
     // reduction polynomial x^8 + x^4 + x^3 + x + 1.
 - go: Average
   commutative: true
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
-    // Average computes the rounded average of corresponding elements.
+    // NAME computes the rounded average of corresponding elements.
 - go: AverageMasked
   commutative: true
   masked: true
   extension: "AVX512.*" # Masked operations are typically AVX512
   documentation: !string |-
-    // AverageMasked computes the rounded average of corresponding elements.
-
+    // NAME computes the rounded average of corresponding elements.
 - go: Absolute
   commutative: false
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
-    // Absolute computes the absolute value of each element.
+    // NAME computes the absolute value of each element.
 - go: AbsoluteMasked
   commutative: false
   masked: true
   extension: "AVX512.*"
   documentation: !string |-
-    // AbsoluteMasked computes the absolute value of each element.
-
+    // NAME computes the absolute value of each element.
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sign returns the product of the first operand with -1, 0, or 1,
+    // NAME returns the product of the first operand with -1, 0, or 1,
     // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
-
 - go: PopCountMasked
   commutative: false
   masked: true
   extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
   documentation: !string |-
-    // PopCountMasked counts the number of set bits in each element.
+    // NAME counts the number of set bits in each element.
 - go: PairDotProd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProd multiplies the elements and add the pairs together,
+    // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 - go: PairDotProdMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdMasked multiplies the elements and add the pairs together,
+    // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProdMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
+    // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
-
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 - go: DotProdBroadcast
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // DotProdBroadcast multiplies all elements and broadcasts the sum.
+    // NAME multiplies all elements and broadcasts the sum.
 - go: UnsignedSignedQuadDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: UnsignedSignedQuadDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: PairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: PairDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: FusedMultiplyAddMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddMasked performs (x * y) + z.
+    // NAME performs (x * y) + z.
 - go: FusedMultiplyAddSubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
+    // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 - go: FusedMultiplySubAddMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+    // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
 - go: Max
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Max computes the maximum of corresponding elements.
+    // NAME computes the maximum of corresponding elements.
 - go: MaxMasked
   commutative: true
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MaxMasked computes the maximum of corresponding elements.
+    // NAME computes the maximum of corresponding elements.
 - go: Min
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Min computes the minimum of corresponding elements.
+    // NAME computes the minimum of corresponding elements.
 - go: MinMasked
   commutative: true
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MinMasked computes the minimum of corresponding elements.
+    // NAME computes the minimum of corresponding elements.
 - go: SetElem
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SetElem sets a single constant-indexed element's value.
+    // NAME sets a single constant-indexed element's value.
 - go: GetElem
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GetElem retrieves a single constant-indexed element's value.
+    // NAME retrieves a single constant-indexed element's value.
 - go: Set128
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+    // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
 - go: Get128
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
-
-
+    // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
 - go: Permute
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Permute performs a full permutation of vector x using indices:
+    // NAME performs a full permutation of vector x using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
-
 - go: PermuteMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // PermuteMasked performs a full permutation of vector y using indices:
+    // NAME performs a full permutation of vector y using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
-
 - go: Permute2Masked # Permute2Masked is only available on or after AVX512
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // Permute2Masked performs a full permutation of vector x, y using indices:
+    // NAME performs a full permutation of vector x, y using indices:
     // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
     // where xy is x appending y.
     // Only the needed bits to represent xy's index are used in indices' elements.
-
 - go: Compress
   commutative: false
   # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
   extension: "AVX.*"
   documentation: !string |-
-    // Compress performs a compression on vector x using mask by
+    // NAME performs a compression on vector x using mask by
     // selecting elements as indicated by mask, and pack them to lower indexed elements.
 - go: Mul
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Mul multiplies corresponding elements of two vectors.
+    // NAME multiplies corresponding elements of two vectors.
 - go: MulEvenWiden
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulEvenWiden multiplies even-indexed elements, widening the result.
+    // NAME multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHigh
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulHigh multiplies elements and stores the high part of the result.
+    // NAME multiplies elements and stores the high part of the result.
 - go: MulLow
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulLow multiplies elements and stores the low part of the result.
+    // NAME multiplies elements and stores the low part of the result.
 - go: MulMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulMasked multiplies corresponding elements of two vectors.
+    // NAME multiplies corresponding elements of two vectors.
 - go: MulEvenWidenMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulEvenWidenMasked multiplies even-indexed elements, widening the result.
+    // NAME multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHighMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulHighMasked multiplies elements and stores the high part of the result.
+    // NAME multiplies elements and stores the high part of the result.
 - go: MulLowMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulLowMasked multiplies elements and stores the low part of the result.
+    // NAME multiplies elements and stores the low part of the result.
 - go: ShiftAllLeft
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
   signed: false
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRightMasked
   signed: false
   nameAndSizeCheck: true
@@ -665,14 +651,14 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
   signed: true
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftAllRightMasked
   signed: true
   nameAndSizeCheck: true
@@ -680,28 +666,27 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftLeft
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   signed: false
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRightMasked
   signed: false
   nameAndSizeCheck: true
@@ -709,14 +694,14 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRight
   signed: true
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: ShiftRightMasked
   signed: true
   nameAndSizeCheck: true
@@ -724,44 +709,42 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: RotateAllLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
+    // NAME rotates each element to the left by the number of bits specified by the immediate.
 - go: RotateLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+    // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 - go: RotateAllRightMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
+    // NAME rotates each element to the right by the number of bits specified by the immediate.
 - go: RotateRightMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-
+    // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 - go: ShiftAllLeftAndFillUpperFromMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+    // NAME shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 - go: ShiftAllRightAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -769,7 +752,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+    // NAME shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 - go: ShiftLeftAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -777,7 +760,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+    // NAME shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 - go: ShiftRightAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -785,5 +768,5 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+    // NAME shifts each element of x to the right by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index ea4d56ac..be0a945d 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -569,8 +569,8 @@ func splitMask(ops []Operation) ([]Operation, error) {
 			}
 			maskedOpName := op2.Go
 			op2.Go = strings.TrimSuffix(op2.Go, "Masked")
-			op2Doc := strings.ReplaceAll(*op2.Documentation, maskedOpName, op2.Go)
-			op2.Documentation = &op2Doc
+			op2Doc := strings.ReplaceAll(op2.Documentation, maskedOpName, op2.Go)
+			op2.Documentation = op2Doc
 			op2.Masked = nil // It's no longer masked.
 			splited = append(splited, op2)
 		} else {
@@ -583,9 +583,7 @@ func splitMask(ops []Operation) ([]Operation, error) {
 func insertMaskDescToDoc(ops []Operation) {
 	for i, _ := range ops {
 		if ops[i].Masked != nil && *ops[i].Masked {
-			if ops[i].Documentation != nil {
-				*ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask."
-			}
+			ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask."
 		}
 	}
 }
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 32467de7..b45c249f 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -7,6 +7,7 @@ package main
 import (
 	"fmt"
 	"log"
+	"regexp"
 	"slices"
 	"strconv"
 	"strings"
@@ -16,6 +17,16 @@ import (
 
 type Operation struct {
 	rawOperation
+
+	// Documentation is the doc string for this API.
+	//
+	// It is computed from the raw documentation:
+	//
+	// - "NAME" is replaced by the Go method name.
+	//
+	// - For masked operation, the method name is updated and a sentence about
+	// masking is added.
+	Documentation string
 }
 
 // rawOperation is the unifier representation of an [Operation]. It is
@@ -49,6 +60,15 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
 	if err := v.Decode(&o.rawOperation); err != nil {
 		return err
 	}
+
+	// Compute doc string.
+	if o.rawOperation.Documentation != nil {
+		o.Documentation = *o.rawOperation.Documentation
+	} else {
+		o.Documentation = "// UNDOCUMENTED"
+	}
+	o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
+
 	return nil
 }
 
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index 9bae42e9..667508b5 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -3,67 +3,67 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Add adds corresponding elements of two vectors.
+    // NAME adds corresponding elements of two vectors.
 - go: SaturatedAdd
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedAdd adds corresponding elements of two vectors with saturation.
+    // NAME adds corresponding elements of two vectors with saturation.
 - go: AddMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // AddMasked adds corresponding elements of two vectors.
+    // NAME adds corresponding elements of two vectors.
 - go: SaturatedAddMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedAddMasked adds corresponding elements of two vectors with saturation.
+    // NAME adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sub subtracts corresponding elements of two vectors.
+    // NAME subtracts corresponding elements of two vectors.
 - go: SaturatedSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedSub subtracts corresponding elements of two vectors with saturation.
+    // NAME subtracts corresponding elements of two vectors with saturation.
 - go: SubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SubMasked subtracts corresponding elements of two vectors.
+    // NAME subtracts corresponding elements of two vectors.
 - go: SaturatedSubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedSubMasked subtracts corresponding elements of two vectors with saturation.
+    // NAME subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairwiseAdd horizontally adds adjacent pairs of elements.
+    // NAME horizontally adds adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: PairwiseSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairwiseSub horizontally subtracts adjacent pairs of elements.
+    // NAME horizontally subtracts adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: SaturatedPairwiseAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation.
+    // NAME horizontally adds adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: SaturatedPairwiseSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairwiseSub horizontally subtracts adjacent pairs of elements with saturation.
+    // NAME horizontally subtracts adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index c6a00cc2..3d2eda7c 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -3,45 +3,46 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // And performs a bitwise AND operation between two vectors.
+    // NAME performs a bitwise AND operation between two vectors.
 - go: AndMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // AndMasked performs a bitwise AND operation between two vectors.
+    // NAME performs a bitwise AND operation between two vectors.
 - go: Or
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Or performs a bitwise OR operation between two vectors.
+    // NAME performs a bitwise OR operation between two vectors.
 - go: OrMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // OrMasked performs a bitwise OR operation between two vectors.
+    // NAME performs a bitwise OR operation between two vectors.
 - go: AndNot
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AndNot performs a bitwise x &^ y.
+    // NAME performs a bitwise x &^ y.
 - go: AndNotMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AndNotMasked performs a bitwise x &^ y.
+    // NAME performs a bitwise x &^ y.
 - go: Xor
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Xor performs a bitwise XOR operation between two vectors.
+    // NAME performs a bitwise XOR operation between two vectors.
 - go: XorMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // XorMasked performs a bitwise XOR operation between two vectors.
+    // NAME performs a bitwise XOR operation between two vectors.
+
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index d1080513..e17e45db 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -11,90 +11,89 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Equal compares for equality.
+    // NAME compares for equality.
 - go: Less
   constImm: 1
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Less compares for less than.
+    // NAME compares for less than.
 - go: LessEqual
   constImm: 2
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessEqual compares for less than or equal.
+    // NAME compares for less than or equal.
 - go: IsNan # For float only.
   constImm: 3
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // IsNan checks if elements are NaN. Use as x.IsNan(x).
+    // NAME checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqual
   constImm: 4
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // NotEqual compares for inequality.
+    // NAME compares for inequality.
 - go: GreaterEqual
   constImm: 13
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterEqual compares for greater than or equal.
+    // NAME compares for greater than or equal.
 - go: Greater
   constImm: 14
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Greater compares for greater than.
-
+    // NAME compares for greater than.
 - go: EqualMasked
   constImm: 0
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // EqualMasked compares for equality.
+    // NAME compares for equality.
 - go: LessMasked
   constImm: 1
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessMasked compares for less than.
+    // NAME compares for less than.
 - go: LessEqualMasked
   constImm: 2
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // LessEqualMasked compares for less than or equal.
+    // NAME compares for less than or equal.
 - go: IsNanMasked # For float only.
   constImm: 3
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+    // NAME checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqualMasked
   constImm: 4
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // NotEqualMasked compares for inequality.
+    // NAME compares for inequality.
 - go: GreaterEqualMasked
   constImm: 13
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterEqualMasked compares for greater than or equal.
+    // NAME compares for greater than or equal.
 - go: GreaterMasked
   constImm: 14
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GreaterMasked compares for greater than.
+    // NAME compares for greater than.
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 1347b533..53292048 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -3,139 +3,134 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Div divides elements of two vectors.
+    // NAME divides elements of two vectors.
 - go: DivMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // DivMasked divides elements of two vectors.
+    // NAME divides elements of two vectors.
 - go: Sqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sqrt computes the square root of each element.
+    // NAME computes the square root of each element.
 - go: SqrtMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // SqrtMasked computes the square root of each element.
+    // NAME computes the square root of each element.
 - go: ApproximateReciprocal
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocal computes an approximate reciprocal of each element.
+    // NAME computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalMasked computes an approximate reciprocal of each element.
+    // NAME computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element.
+    // NAME computes an approximate reciprocal of the square root of each element.
 - go: ApproximateReciprocalOfSqrtMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // ApproximateReciprocalOfSqrtMasked computes an approximate reciprocal of the square root of each element.
+    // NAME computes an approximate reciprocal of the square root of each element.
 - go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulByPowOf2Masked multiplies elements by a power of 2.
-
+    // NAME multiplies elements by a power of 2.
 - go: Round
   commutative: false
   extension: "AVX.*"
   constImm: 0
   documentation: !string |-
-    // Round rounds elements to the nearest integer.
+    // NAME rounds elements to the nearest integer.
 - go: RoundWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 0
   masked: true
   documentation: !string |-
-    // RoundWithPrecisionMasked rounds elements with specified precision.
+    // NAME rounds elements with specified precision.
 - go: DiffWithRoundWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 0
   masked: true
   documentation: !string |-
-    // DiffWithRoundWithPrecisionMasked computes the difference after rounding with specified precision.
-
+    // NAME computes the difference after rounding with specified precision.
 - go: Floor
   commutative: false
   extension: "AVX.*"
   constImm: 1
   documentation: !string |-
-    // Floor rounds elements down to the nearest integer.
+    // NAME rounds elements down to the nearest integer.
 - go: FloorWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 1
   masked: true
   documentation: !string |-
-    // FloorWithPrecisionMasked rounds elements down with specified precision.
+    // NAME rounds elements down with specified precision.
 - go: DiffWithFloorWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 1
   masked: true
   documentation: !string |-
-    // DiffWithFloorWithPrecisionMasked computes the difference after flooring with specified precision.
-
+    // NAME computes the difference after flooring with specified precision.
 - go: Ceil
   commutative: false
   extension: "AVX.*"
   constImm: 2
   documentation: !string |-
-    // Ceil rounds elements up to the nearest integer.
+    // NAME rounds elements up to the nearest integer.
 - go: CeilWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 2
   masked: true
   documentation: !string |-
-    // CeilWithPrecisionMasked rounds elements up with specified precision.
+    // NAME rounds elements up with specified precision.
 - go: DiffWithCeilWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 2
   masked: true
   documentation: !string |-
-    // DiffWithCeilWithPrecisionMasked computes the difference after ceiling with specified precision.
-
+    // NAME computes the difference after ceiling with specified precision.
 - go: Trunc
   commutative: false
   extension: "AVX.*"
   constImm: 3
   documentation: !string |-
-    // Trunc truncates elements towards zero.
+    // NAME truncates elements towards zero.
 - go: TruncWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 3
   masked: true
   documentation: !string |-
-    // TruncWithPrecisionMasked truncates elements with specified precision.
+    // NAME truncates elements with specified precision.
 - go: DiffWithTruncWithPrecisionMasked
   commutative: false
   extension: "AVX.*"
   constImm: 3
   masked: true
   documentation: !string |-
-    // DiffWithTruncWithPrecisionMasked computes the difference after truncating with specified precision.
-
+    // NAME computes the difference after truncating with specified precision.
 - go: AddSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // AddSub subtracts even elements and adds odd elements of two vectors.
\ No newline at end of file
+    // NAME subtracts even elements and adds odd elements of two vectors.
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
index 4184c5e4..62d8709e 100644
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -4,7 +4,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
+    // NAME computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
@@ -13,7 +13,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
+    // NAME computes an affine transformation in GF(2^8),
     // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
@@ -23,5 +23,5 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-    // reduction polynomial x^8 + x^4 + x^3 + x + 1.
\ No newline at end of file
+    // NAME computes element-wise GF(2^8) multiplication with
+    // reduction polynomial x^8 + x^4 + x^3 + x + 1.
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
index fc277f81..76ab14ba 100644
--- a/internal/simdgen/ops/IntOnlyArith/categories.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml
@@ -3,39 +3,36 @@
   commutative: true
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
-    // Average computes the rounded average of corresponding elements.
+    // NAME computes the rounded average of corresponding elements.
 - go: AverageMasked
   commutative: true
   masked: true
   extension: "AVX512.*" # Masked operations are typically AVX512
   documentation: !string |-
-    // AverageMasked computes the rounded average of corresponding elements.
-
+    // NAME computes the rounded average of corresponding elements.
 - go: Absolute
   commutative: false
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
-    // Absolute computes the absolute value of each element.
+    // NAME computes the absolute value of each element.
 - go: AbsoluteMasked
   commutative: false
   masked: true
   extension: "AVX512.*"
   documentation: !string |-
-    // AbsoluteMasked computes the absolute value of each element.
-
+    // NAME computes the absolute value of each element.
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Sign returns the product of the first operand with -1, 0, or 1,
+    // NAME returns the product of the first operand with -1, 0, or 1,
     // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
-
 - go: PopCountMasked
   commutative: false
   masked: true
   extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
   documentation: !string |-
-    // PopCountMasked counts the number of set bits in each element.
\ No newline at end of file
+    // NAME counts the number of set bits in each element.
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index d26b846d..65f7462e 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -3,95 +3,94 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProd multiplies the elements and add the pairs together,
+    // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 - go: PairDotProdMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdMasked multiplies the elements and add the pairs together,
+    // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedPairDotProd multiplies the elements and add the pairs together with saturation,
+    // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 - go: SaturatedUnsignedSignedPairDotProdMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedPairDotProdMasked multiplies the elements and add the pairs together with saturation,
+    // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
-
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 - go: DotProdBroadcast
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // DotProdBroadcast multiplies all elements and broadcasts the sum.
+    // NAME multiplies all elements and broadcasts the sum.
 - go: UnsignedSignedQuadDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: UnsignedSignedQuadDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // UnsignedSignedQuadDotProdAccumulateMasked performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedUnsignedSignedQuadDotProdAccumulateMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: PairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: PairDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // PairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulateMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SaturatedPairDotProdAccumulateMasked performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: FusedMultiplyAddMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddMasked performs (x * y) + z.
+    // NAME performs (x * y) + z.
 - go: FusedMultiplyAddSubMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplyAddSubMasked performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
+    // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 - go: FusedMultiplySubAddMasked
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // FusedMultiplySubAddMasked performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+    // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml
index 929bfadd..ce87994f 100644
--- a/internal/simdgen/ops/MinMax/categories.yaml
+++ b/internal/simdgen/ops/MinMax/categories.yaml
@@ -3,21 +3,21 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Max computes the maximum of corresponding elements.
+    // NAME computes the maximum of corresponding elements.
 - go: MaxMasked
   commutative: true
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MaxMasked computes the maximum of corresponding elements.
+    // NAME computes the maximum of corresponding elements.
 - go: Min
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Min computes the minimum of corresponding elements.
+    // NAME computes the minimum of corresponding elements.
 - go: MinMasked
   commutative: true
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // MinMasked computes the minimum of corresponding elements.
+    // NAME computes the minimum of corresponding elements.
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index d6c4d5da..dd30ca8a 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -3,55 +3,50 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // SetElem sets a single constant-indexed element's value.
+    // NAME sets a single constant-indexed element's value.
 - go: GetElem
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // GetElem retrieves a single constant-indexed element's value.
+    // NAME retrieves a single constant-indexed element's value.
 - go: Set128
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+    // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
 - go: Get128
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
-
-
+    // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
 - go: Permute
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // Permute performs a full permutation of vector x using indices:
+    // NAME performs a full permutation of vector x using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
-
 - go: PermuteMasked
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // PermuteMasked performs a full permutation of vector y using indices:
+    // NAME performs a full permutation of vector y using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
-
 - go: Permute2Masked # Permute2Masked is only available on or after AVX512
   commutative: false
   masked: true
   extension: "AVX.*"
   documentation: !string |-
-    // Permute2Masked performs a full permutation of vector x, y using indices:
+    // NAME performs a full permutation of vector x, y using indices:
     // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
     // where xy is x appending y.
     // Only the needed bits to represent xy's index are used in indices' elements.
-
 - go: Compress
   commutative: false
   # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
   extension: "AVX.*"
   documentation: !string |-
-    // Compress performs a compression on vector x using mask by
+    // NAME performs a compression on vector x using mask by
     // selecting elements as indicated by mask, and pack them to lower indexed elements.
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index 1884d660..8dc51f45 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -3,45 +3,45 @@
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // Mul multiplies corresponding elements of two vectors.
+    // NAME multiplies corresponding elements of two vectors.
 - go: MulEvenWiden
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulEvenWiden multiplies even-indexed elements, widening the result.
+    // NAME multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHigh
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulHigh multiplies elements and stores the high part of the result.
+    // NAME multiplies elements and stores the high part of the result.
 - go: MulLow
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulLow multiplies elements and stores the low part of the result.
+    // NAME multiplies elements and stores the low part of the result.
 - go: MulMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulMasked multiplies corresponding elements of two vectors.
+    // NAME multiplies corresponding elements of two vectors.
 - go: MulEvenWidenMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulEvenWidenMasked multiplies even-indexed elements, widening the result.
+    // NAME multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHighMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulHighMasked multiplies elements and stores the high part of the result.
+    // NAME multiplies elements and stores the high part of the result.
 - go: MulLowMasked
   masked: true
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
-    // MulLowMasked multiplies elements and stores the low part of the result.
+    // NAME multiplies elements and stores the low part of the result.
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
index b9e2a634..71e78251 100644
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -4,21 +4,21 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
   signed: false
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRightMasked
   signed: false
   nameAndSizeCheck: true
@@ -26,14 +26,14 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
   signed: true
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftAllRightMasked
   signed: true
   nameAndSizeCheck: true
@@ -41,28 +41,27 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-
+    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftLeft
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   signed: false
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRightMasked
   signed: false
   nameAndSizeCheck: true
@@ -70,14 +69,14 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRight
   signed: true
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: ShiftRightMasked
   signed: true
   nameAndSizeCheck: true
@@ -85,44 +84,42 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-
+    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: RotateAllLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
+    // NAME rotates each element to the left by the number of bits specified by the immediate.
 - go: RotateLeftMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
+    // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 - go: RotateAllRightMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
+    // NAME rotates each element to the right by the number of bits specified by the immediate.
 - go: RotateRightMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-
+    // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 - go: ShiftAllLeftAndFillUpperFromMasked
   nameAndSizeCheck: true
   masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+    // NAME shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 - go: ShiftAllRightAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -130,7 +127,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+    // NAME shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 - go: ShiftLeftAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -138,7 +135,7 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+    // NAME shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 - go: ShiftRightAndFillUpperFromMasked
   nameAndSizeCheck: true
@@ -146,5 +143,5 @@
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+    // NAME shifts each element of x to the right by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.

From 6ef798663e3f9ab0392f5e7e1575085409694a2d Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 29 Jul 2025 10:54:54 -0400
Subject: [PATCH 157/200] internal/simdgen: introduce instruction variants and
 use for masked ops

In AVX-512, nearly all operations take a mask, but it's optional.

Currently, the XED loader produces only the masked form of these
instructions. Since this mask is one of the input operands, the Go
definitions YAML needs a separate entry to match the masked form. To
generate both the masked and unmasked forms, we unify only the masked
form and then when generating the Go API we recognize this and
duplicate the operation in unmasked form. Unfortunately, since
pre-AVX-512 operations never have this mask input, we wind up
duplicating many unifier definitions to match both the pre-AVX-512 and
AVX-512 forms, even though the unmasked operation produced during API
generation looks like a pre-AVX-512 definition.

To fix all this, we flip things around. Instead of generating the
masked and unmasked variants at API generation, we generate them much
earlier, during XED loading. The XED data already contains a clear
marker for which masks are optional (it's wrong in a few cases, which
we work around). For instructions with an optional mask, the XED
loader now generates both the masked and unmasked forms.

Then, to make both easy to match, we put the mask operand into a new
top-level tuple called "inVariant". This way, a single unifier def can
match the pre-AVX-512 instruction, and the masked and unmasked AVX-512
instructions.

When we load the results of unification for generating the API, we do
some light canonicalization of the operation. We append any inVariant
operands to the input operands list; and if there's a mask in
inVariant, we append "Masked" to the Go method name.

With all of this done, we can delete all of the "*Masked" forms of
operations from the YAML. In a few cases, we have to merge some
information from the masked form into the unmasked form. For
operations that were introduced in AVX-512, we currently *only* have
the Masked form; so for these we keep the definition but strip out the
masking.

This ultimately has very little effect on the generated API. In a few
cases it fills in holes that let us pick a better instruction. It
fixes some doc strings that got duplicated incorrectly between masked
and unmasked forms.

This change makes it much easier to write other tools besides the Go
API generator because it moves nearly all masking logic out of
writeGoDefs. It also eliminates some fragile Operation duplication
logic from writeGoDefs. I plan to move a few more things out of the Go
API generator, but this is definitely the big one.

Change-Id: I17ee70cff15a80e8025eec96a7286266233546d9
Reviewed-on: https://go-review.googlesource.com/c/arch/+/691341
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              | 340 +------------
 internal/simdgen/gen_utility.go               |  39 --
 internal/simdgen/go.yaml                      | 465 +++---------------
 internal/simdgen/godefs.go                    |  48 +-
 internal/simdgen/main.go                      |   1 -
 internal/simdgen/ops/AddSub/categories.yaml   |  24 -
 internal/simdgen/ops/AddSub/go.yaml           |  48 --
 .../simdgen/ops/BitwiseLogic/categories.yaml  |  24 -
 internal/simdgen/ops/BitwiseLogic/go.yaml     |  77 ++-
 internal/simdgen/ops/Compares/categories.yaml |  49 --
 internal/simdgen/ops/Compares/go.yaml         |  16 +-
 .../simdgen/ops/FPonlyArith/categories.yaml   |  51 +-
 internal/simdgen/ops/FPonlyArith/go.yaml      |  38 +-
 .../simdgen/ops/GaloisField/categories.yaml   |   9 +-
 internal/simdgen/ops/GaloisField/go.yaml      |  11 +-
 .../simdgen/ops/IntOnlyArith/categories.yaml  |  17 +-
 internal/simdgen/ops/IntOnlyArith/go.yaml     |  20 +-
 internal/simdgen/ops/MLOps/categories.yaml    |  47 +-
 internal/simdgen/ops/MLOps/go.yaml            |  71 +--
 internal/simdgen/ops/MinMax/categories.yaml   |  12 -
 internal/simdgen/ops/MinMax/go.yaml           |  33 --
 internal/simdgen/ops/Moves/categories.yaml    |  12 +-
 internal/simdgen/ops/Moves/go.yaml            |  16 +-
 internal/simdgen/ops/Mul/categories.yaml      |  25 -
 internal/simdgen/ops/Mul/go.yaml              |  50 --
 .../simdgen/ops/ShiftRotate/categories.yaml   |  70 +--
 internal/simdgen/ops/ShiftRotate/go.yaml      |  95 +---
 internal/simdgen/types.yaml                   |   3 +
 internal/simdgen/xed.go                       | 117 ++++-
 29 files changed, 329 insertions(+), 1499 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 5a7e711d..c13fd431 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -9,18 +9,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME adds corresponding elements of two vectors with saturation.
-- go: AddMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME adds corresponding elements of two vectors.
-- go: SaturatedAddMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: false
   extension: "AVX.*"
@@ -31,18 +19,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME subtracts corresponding elements of two vectors with saturation.
-- go: SubMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME subtracts corresponding elements of two vectors.
-- go: SaturatedSubMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
   commutative: false
   extension: "AVX.*"
@@ -72,45 +48,21 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise AND operation between two vectors.
-- go: AndMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise AND operation between two vectors.
 - go: Or
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise OR operation between two vectors.
-- go: OrMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise OR operation between two vectors.
 - go: AndNot
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise x &^ y.
-- go: AndNotMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise x &^ y.
 - go: Xor
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise XOR operation between two vectors.
-- go: XorMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise XOR operation between two vectors.
 
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
@@ -163,102 +115,28 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME compares for greater than.
-- go: EqualMasked
-  constImm: 0
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for equality.
-- go: LessMasked
-  constImm: 1
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for less than.
-- go: LessEqualMasked
-  constImm: 2
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for less than or equal.
-- go: IsNanMasked # For float only.
-  constImm: 3
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME checks if elements are NaN. Use as x.IsNan(x).
-- go: NotEqualMasked
-  constImm: 4
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for inequality.
-- go: GreaterEqualMasked
-  constImm: 13
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for greater than or equal.
-- go: GreaterMasked
-  constImm: 14
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for greater than.
 - go: Div
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME divides elements of two vectors.
-- go: DivMasked
-  commutative: false
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME divides elements of two vectors.
 - go: Sqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes the square root of each element.
-- go: SqrtMasked
-  commutative: false
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes the square root of each element.
 - go: ApproximateReciprocal
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes an approximate reciprocal of each element.
-- go: ApproximateReciprocalMasked
-  commutative: false
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes an approximate reciprocal of the square root of each element.
-- go: ApproximateReciprocalOfSqrtMasked
-  commutative: false
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes an approximate reciprocal of the square root of each element.
-- go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
+- go: MulByPowOf2
   commutative: false
-  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies elements by a power of 2.
@@ -268,18 +146,16 @@
   constImm: 0
   documentation: !string |-
     // NAME rounds elements to the nearest integer.
-- go: RoundWithPrecisionMasked
+- go: RoundWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 0
-  masked: true
   documentation: !string |-
     // NAME rounds elements with specified precision.
-- go: DiffWithRoundWithPrecisionMasked
+- go: DiffWithRoundWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 0
-  masked: true
   documentation: !string |-
     // NAME computes the difference after rounding with specified precision.
 - go: Floor
@@ -288,18 +164,16 @@
   constImm: 1
   documentation: !string |-
     // NAME rounds elements down to the nearest integer.
-- go: FloorWithPrecisionMasked
+- go: FloorWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 1
-  masked: true
   documentation: !string |-
     // NAME rounds elements down with specified precision.
-- go: DiffWithFloorWithPrecisionMasked
+- go: DiffWithFloorWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 1
-  masked: true
   documentation: !string |-
     // NAME computes the difference after flooring with specified precision.
 - go: Ceil
@@ -308,18 +182,16 @@
   constImm: 2
   documentation: !string |-
     // NAME rounds elements up to the nearest integer.
-- go: CeilWithPrecisionMasked
+- go: CeilWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 2
-  masked: true
   documentation: !string |-
     // NAME rounds elements up with specified precision.
-- go: DiffWithCeilWithPrecisionMasked
+- go: DiffWithCeilWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 2
-  masked: true
   documentation: !string |-
     // NAME computes the difference after ceiling with specified precision.
 - go: Trunc
@@ -328,18 +200,16 @@
   constImm: 3
   documentation: !string |-
     // NAME truncates elements towards zero.
-- go: TruncWithPrecisionMasked
+- go: TruncWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 3
-  masked: true
   documentation: !string |-
     // NAME truncates elements with specified precision.
-- go: DiffWithTruncWithPrecisionMasked
+- go: DiffWithTruncWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 3
-  masked: true
   documentation: !string |-
     // NAME computes the difference after truncating with specified precision.
 - go: AddSub
@@ -347,8 +217,7 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME subtracts even elements and adds odd elements of two vectors.
-- go: GaloisFieldAffineTransformMasked
-  masked: true
+- go: GaloisFieldAffineTransform
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -356,8 +225,7 @@
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
-- go: GaloisFieldAffineTransformInverseMasked
-  masked: true
+- go: GaloisFieldAffineTransformInverse
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -366,8 +234,7 @@
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
-- go: GaloisFieldMulMasked
-  masked: true
+- go: GaloisFieldMul
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -378,24 +245,12 @@
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
     // NAME computes the rounded average of corresponding elements.
-- go: AverageMasked
-  commutative: true
-  masked: true
-  extension: "AVX512.*" # Masked operations are typically AVX512
-  documentation: !string |-
-    // NAME computes the rounded average of corresponding elements.
 - go: Absolute
   commutative: false
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
     // NAME computes the absolute value of each element.
-- go: AbsoluteMasked
-  commutative: false
-  masked: true
-  extension: "AVX512.*"
-  documentation: !string |-
-    // NAME computes the absolute value of each element.
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
   commutative: false
@@ -404,10 +259,9 @@
     // NAME returns the product of the first operand with -1, 0, or 1,
     // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
-- go: PopCountMasked
+- go: PopCount
   commutative: false
-  masked: true
-  extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
+  extension: "AVX512.*"
   documentation: !string |-
     // NAME counts the number of set bits in each element.
 - go: PairDotProd
@@ -416,13 +270,6 @@
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
-- go: PairDotProdMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies the elements and add the pairs together,
-    // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: false
@@ -430,13 +277,6 @@
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
-- go: SaturatedUnsignedSignedPairDotProdMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies the elements and add the pairs together with saturation,
-    // yielding a vector of half as many elements with twice the input element size.
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 - go: DotProdBroadcast
   commutative: true
@@ -448,59 +288,32 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on groups of 4 elements of x and y and then adds z.
-- go: UnsignedSignedQuadDotProdAccumulateMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: PairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on pairs of elements of x and y and then adds z.
-- go: PairDotProdAccumulateMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on pairs of elements of x and y and then adds z.
-- go: SaturatedPairDotProdAccumulateMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs dot products on pairs of elements of x and y and then adds z.
-- go: FusedMultiplyAddMasked
-  masked: true
+- go: FusedMultiplyAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs (x * y) + z.
-- go: FusedMultiplyAddSubMasked
-  masked: true
+- go: FusedMultiplyAddSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-- go: FusedMultiplySubAddMasked
-  masked: true
+- go: FusedMultiplySubAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -510,23 +323,11 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes the maximum of corresponding elements.
-- go: MaxMasked
-  commutative: true
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes the maximum of corresponding elements.
 - go: Min
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes the minimum of corresponding elements.
-- go: MinMasked
-  commutative: true
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes the minimum of corresponding elements.
 - go: SetElem
   commutative: false
   extension: "AVX.*"
@@ -554,17 +355,8 @@
     // NAME performs a full permutation of vector x using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
-- go: PermuteMasked
+- go: Permute2 # Permute2 is only available on or after AVX512
   commutative: false
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a full permutation of vector y using indices:
-    // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-    // Only the needed bits to represent x's index are used in indices' elements.
-- go: Permute2Masked # Permute2Masked is only available on or after AVX512
-  commutative: false
-  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a full permutation of vector x, y using indices:
@@ -573,7 +365,6 @@
     // Only the needed bits to represent xy's index are used in indices' elements.
 - go: Compress
   commutative: false
-  # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a compression on vector x using mask by
@@ -599,44 +390,12 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies elements and stores the low part of the result.
-- go: MulMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies corresponding elements of two vectors.
-- go: MulEvenWidenMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies even-indexed elements, widening the result.
-    // Result[i] = v1.Even[i] * v2.Even[i].
-- go: MulHighMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies elements and stores the high part of the result.
-- go: MulLowMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies elements and stores the low part of the result.
 - go: ShiftAllLeft
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-- go: ShiftAllLeftMasked
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
   signed: false
   nameAndSizeCheck: true
@@ -644,14 +403,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-- go: ShiftAllRightMasked
-  signed: false
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
   signed: true
   nameAndSizeCheck: true
@@ -659,27 +410,12 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-- go: ShiftAllRightMasked
-  signed: true
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftLeft
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-- go: ShiftLeftMasked
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   signed: false
   nameAndSizeCheck: true
@@ -687,14 +423,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-- go: ShiftRightMasked
-  signed: false
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRight
   signed: true
   nameAndSizeCheck: true
@@ -702,69 +430,53 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-- go: ShiftRightMasked
-  signed: true
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-- go: RotateAllLeftMasked
+- go: RotateAllLeft
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element to the left by the number of bits specified by the immediate.
-- go: RotateLeftMasked
+- go: RotateLeft
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-- go: RotateAllRightMasked
+- go: RotateAllRight
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element to the right by the number of bits specified by the immediate.
-- go: RotateRightMasked
+- go: RotateRight
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-- go: ShiftAllLeftAndFillUpperFromMasked
+- go: ShiftAllLeftAndFillUpperFrom
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-- go: ShiftAllRightAndFillUpperFromMasked
+- go: ShiftAllRightAndFillUpperFrom
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-- go: ShiftLeftAndFillUpperFromMasked
+- go: ShiftLeftAndFillUpperFrom
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-- go: ShiftRightAndFillUpperFromMasked
+- go: ShiftRightAndFillUpperFrom
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index be0a945d..f1cfcfe9 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -549,45 +549,6 @@ func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation)
 	return
 }
 
-// splitMask splits operations with a single mask vreg input to be masked and unmasked(const: K0).
-// It also remove the "Masked" keyword from the name.
-func splitMask(ops []Operation) ([]Operation, error) {
-	splited := []Operation{}
-	for _, op := range ops {
-		splited = append(splited, op)
-		if op.Masked == nil || !*op.Masked {
-			continue
-		}
-		shapeIn, _, _, _, _ := op.shape()
-
-		if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn {
-			op2 := op
-			// The ops should be sorted when calling this function, the mask is in the end, drop the mask
-			op2.In = slices.Clone(op.In)[:len(op.In)-1]
-			if !strings.HasSuffix(op2.Go, "Masked") {
-				return nil, fmt.Errorf("simdgen only recognizes masked operations with name ending with 'Masked': %s", op)
-			}
-			maskedOpName := op2.Go
-			op2.Go = strings.TrimSuffix(op2.Go, "Masked")
-			op2Doc := strings.ReplaceAll(op2.Documentation, maskedOpName, op2.Go)
-			op2.Documentation = op2Doc
-			op2.Masked = nil // It's no longer masked.
-			splited = append(splited, op2)
-		} else {
-			return nil, fmt.Errorf("simdgen only recognizes masked operations with exactly one mask input: %s", op)
-		}
-	}
-	return splited, nil
-}
-
-func insertMaskDescToDoc(ops []Operation) {
-	for i, _ := range ops {
-		if ops[i].Masked != nil && *ops[i].Masked {
-			ops[i].Documentation += "\n//\n// This operation is applied selectively under a write mask."
-		}
-	}
-}
-
 func genericName(op Operation) string {
 	if op.OperandOrder != nil {
 		switch *op.OperandOrder {
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 8ef04b8e..c58d692e 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -8,14 +8,6 @@
   - *any
   out:
   - *any
-- go: AddMasked
-  asm: "VPADD[BWDQ]|VADDP[SD]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 # Saturated Add
 - go: SaturatedAdd
   asm: "VPADDS[BWDQ]"
@@ -35,22 +27,6 @@
   - *uint
   out:
   - *uint
-- go: SaturatedAddMasked
-  asm: "VPADDS[BWDQ]"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int
-- go: SaturatedAddMasked
-  asm: "VPADDS[BWDQ]"
-  in:
-  - class: mask
-  - *uint
-  - *uint
-  out:
-  - *uint
 
 # Sub
 - go: Sub
@@ -60,14 +36,6 @@
   - *any
   out: &1any
   - *any
-- go: SubMasked
-  asm: "VPSUB[BWDQ]|VSUBP[SD]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 # Saturated Sub
 - go: SaturatedSub
   asm: "VPSUBS[BWDQ]"
@@ -83,22 +51,6 @@
   - *uint
   out:
   - *uint
-- go: SaturatedSubMasked
-  asm: "VPSUBS[BWDQ]"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int
-- go: SaturatedSubMasked
-  asm: "VPSUBS[BWDQ]"
-  in:
-  - class: mask
-  - *uint
-  - *uint
-  out:
-  - *uint
 - go: PairwiseAdd
   asm: "VPHADD[DW]"
   in: *2any
@@ -128,77 +80,64 @@
 # decided that they want FP bit-wise logic operations, but this irregularity
 # has to be dealed with in separate rules with some overwrites.
 
-# Int/Uint operations.
-# Non-masked for 128/256-bit vectors
+# For many bit-wise operations, we have the following non-orthogonal
+# choices:
+#
+# - Non-masked AVX operations have no element width (because it
+# doesn't matter), but only cover 128 and 256 bit vectors.
+#
+# - Masked AVX-512 operations have an element width (because it needs
+# to know how to interpret the mask), and cover 128, 256, and 512 bit
+# vectors. These only cover 32- and 64-bit element widths.
+#
+# - Non-masked AVX-512 operations still have an element width (because
+# they're just the masked operations with an implicit K0 mask) but it
+# doesn't matter! This is the only option for non-masked 512 bit
+# operations, and we can pick any of the element widths.
+#
+# We unify with ALL of these operations and the compiler generator
+# picks when there are multiple options.
+
+# TODO: We don't currently generate unmasked bit-wise operations on 512 bit
+# vectors of 8- or 16-bit elements. AVX-512 only has *masked* bit-wise
+# operations for 32- and 64-bit elements; while the element width doesn't matter
+# for unmasked operations, right now we don't realize that we can just use the
+# 32- or 64-bit version for the unmasked form. Maybe in the XED decoder we
+# should recognize bit-wise operations when generating unmasked versions and
+# omit the element width.
+
 # For binary operations, we constrain their two inputs and one output to the
-# same Go type using a variable. This will map to instructions before AVX512.
+# same Go type using a variable.
+
 - go: And
-  asm: "VPAND"
+  asm: "VPAND[DQ]?"
   in:
   - &any
     go: $t
   - *any
   out:
   - *any
-# Masked
-# Looks like VPAND$xi works only for 2 shapes for integer:
-# Dword and Qword.
-# TODO: should we wildcard other smaller elemBits to VPANDQ or
-# VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations.
-- go: AndMasked
-  asm: "VPAND[DQ]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 
 - go: AndNot
-  asm: "VPANDN"
+  asm: "VPANDN[DQ]?"
   operandOrder: "21" # switch the arg order
   in:
   - *any
   - *any
   out:
   - *any
-- go: AndNotMasked
-  asm: "VPANDN[DQ]"
-  operandOrder: "21"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 
 - go: Or
-  asm: "VPOR"
+  asm: "VPOR[DQ]?"
   in:
   - *any
   - *any
   out:
   - *any
-- go: OrMasked
-  asm: "VPOR[DQ]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 
 - go: Xor
-  asm: "VPXOR"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-- go: XorMasked
-  asm: "VPXOR[DQ]"
+  asm: "VPXOR[DQ]?"
   in:
-  - class: mask
   - *any
   - *any
   out:
@@ -240,38 +179,35 @@
     overwriteElementBits: 64
     overwriteClass: mask
     overwriteBase: int
-- go: EqualMasked
+# AVX-512 compares produce masks.
+- go: Equal
   asm: "V?PCMPEQ[BWDQ]"
   in:
-  - class: mask
   - *any
   - *any
   out:
   - class: mask
-- go: GreaterMasked
+- go: Greater
   asm: "V?PCMPGT[BWDQ]"
   in:
-  - class: mask
   - *int
   - *int
   out:
   - class: mask
 # The const imm predicated compares after AVX512, please see categories.yaml
 # for const imm specification.
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMP[BWDQ]"
   in:
-  - class: mask
   - *int
   - *int
   - class: immediate
     const: 0 # Just a placeholder, will be overwritten by const imm porting.
   out:
   - class: mask
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMPU[BWDQ]"
   in:
-  - class: mask
   - &uint
     go: $t
     base: uint
@@ -295,10 +231,9 @@
   - go: $t
     overwriteBase: int
     overwriteClass: mask
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
   asm: "VCMPP[SD]"
   in:
-  - class: mask
   - *float
   - *float
   - class: immediate
@@ -314,38 +249,22 @@
   - *fp
   out: &1fp
   - *fp
-- go: DivMasked
-  asm: "V?DIVP[SD]"
-  in: &1mask2fp
-  - class: mask
-  - *fp
-  - *fp
-  out: *1fp
 - go: Sqrt
   asm: "V?SQRTP[SD]"
   in: *1fp
   out: *1fp
-- go: SqrtMasked
-  asm: "V?SQRTP[SD]"
-  in: &1mask1fp
-  - class: mask
-  - *fp
-  out: *1fp
-- go: ApproximateReciprocalMasked
-  asm: "VRCP14P[SD]"
-  in: *1mask1fp
+# TODO: Provide separate methods for 12-bit precision and 14-bit precision?
+- go: ApproximateReciprocal
+  asm: "VRCP(14)?P[SD]"
+  in: *1fp
   out: *1fp
 - go: ApproximateReciprocalOfSqrt
-  asm: "V?RSQRTPS"
+  asm: "V?RSQRT(14)?P[SD]"
   in: *1fp
   out: *1fp
-- go: ApproximateReciprocalOfSqrtMasked
-  asm: "VRSQRT14P[SD]"
-  in: *1mask1fp
-  out: *1fp
-- go: MulByPowOf2Masked
+- go: MulByPowOf2
   asm: "VSCALEFP[SD]"
-  in: *1mask2fp
+  in: *2fp
   out: *1fp
 
 - go: "Round|Ceil|Floor|Trunc"
@@ -356,20 +275,18 @@
     const: 0 # place holder
   out: *1fp
 
-- go: "(Round|Ceil|Floor|Trunc)WithPrecisionMasked"
+- go: "(Round|Ceil|Floor|Trunc)WithPrecision"
   asm: "VRNDSCALEP[SD]"
   in:
-  - class: mask
   - *fp
   - class: immediate
     const: 0 # place holder
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
     name: prec
   out: *1fp
-- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked"
+- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecision"
   asm: "VREDUCEP[SD]"
   in:
-  - class: mask
   - *fp
   - class: immediate
     const: 0 # place holder
@@ -384,12 +301,10 @@
   - *fp
   out:
   - *fp
-- go: GaloisFieldAffineTransformMasked
+- go: GaloisFieldAffineTransform
   asm: VGF2P8AFFINEQB
   operandOrder: 2I # 2nd operand, then immediate
   in: &AffineArgs
-  - class: mask
-    name: m
   - &uint8
     go: $t
     base: uint
@@ -403,17 +318,16 @@
   out:
   - *uint8
 
-- go: GaloisFieldAffineTransformInverseMasked
+- go: GaloisFieldAffineTransformInverse
   asm: VGF2P8AFFINEINVQB
   operandOrder: 2I # 2nd operand, then immediate
   in: *AffineArgs
   out:
   - *uint8
 
-- go: GaloisFieldMulMasked
+- go: GaloisFieldMul
   asm: VGF2P8MULB
   in:
-  - class: mask
   - *uint8
   - *uint8
   out:
@@ -429,14 +343,6 @@
   - *uint_t
   out:
   - *uint_t
-- go: AverageMasked
-  asm: "VPAVG[BW]"
-  in:
-  - class: mask
-  - *uint_t
-  - *uint_t
-  out:
-  - *uint_t
 
 # Absolute Value (signed byte, word, dword, qword)
 # Instructions: VPABSB, VPABSW, VPABSD, VPABSQ
@@ -448,13 +354,6 @@
     base: int
   out:
   - *int_t # Output is magnitude, fits in the same signed type
-- go: AbsoluteMasked
-  asm: "VPABS[BWDQ]"
-  in:
-  - class: mask
-  - *int_t
-  out:
-  - *int_t
 
 # Sign Operation (signed byte, word, dword)
 # Applies sign of second operand to the first.
@@ -470,10 +369,9 @@
 # Population Count (count set bits in each element)
 # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
 #               VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
-- go: PopCountMasked
+- go: PopCount
   asm: "VPOPCNT[BWDQ]"
   in:
-  - class: mask
   - &any
     go: $t
   out:
@@ -489,37 +387,19 @@
   - &int2 # The elemBits are different
     go: $t2
     base: int
-- go: PairDotProdMasked
-  asm: VPMADDWD
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int2
 - go: SaturatedUnsignedSignedPairDotProd
   asm: VPMADDUBSW
   in:
   - &uint
     go: $t
     base: uint
+    overwriteElementBits: 8
   - &int3
     go: $t3
     base: int
-  out:
-  - *int2
-- go: SaturatedUnsignedSignedPairDotProdMasked
-  asm: VPMADDUBSW
-  in:
-  - class: mask
-  - go: $t1
-    base: uint
-    overwriteElementBits: 8
-  - go: $t2
-    base: int
     overwriteElementBits: 8
   out:
-  - *int3
+  - *int2
 - go: DotProdBroadcast
   asm: VDPP[SD]
   in:
@@ -548,16 +428,6 @@
     overwriteElementBits: 8
   out:
   - *qdpa_acc
-- go: UnsignedSignedQuadDotProdAccumulateMasked
-  asm: "VPDPBUSD"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - *qdpa_acc
-  - class: mask
-  - *qdpa_src1
-  - *qdpa_src2
-  out:
-  - *qdpa_acc
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   asm: "VPDPBUSDS"
   operandOrder: "31" # switch operand 3 and 1
@@ -567,16 +437,6 @@
   - *qdpa_src2
   out:
   - *qdpa_acc
-- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
-  asm: "VPDPBUSDS"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - *qdpa_acc
-  - class: mask
-  - *qdpa_src1
-  - *qdpa_src2
-  out:
-  - *qdpa_acc
 - go: PairDotProdAccumulate
   asm: "VPDPWSSD"
   operandOrder: "31" # switch operand 3 and 1
@@ -595,16 +455,6 @@
     overwriteElementBits: 16
   out:
   - *pdpa_acc
-- go: PairDotProdAccumulateMasked
-  asm: "VPDPWSSD"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - *pdpa_acc
-  - class: mask
-  - *pdpa_src1
-  - *pdpa_src2
-  out:
-  - *pdpa_acc
 - go: SaturatedPairDotProdAccumulate
   asm: "VPDPWSSDS"
   operandOrder: "31" # switch operand 3 and 1
@@ -614,41 +464,28 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
-- go: SaturatedPairDotProdAccumulateMasked
-  asm: "VPDPWSSDS"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - *pdpa_acc
-  - class: mask
-  - *pdpa_src1
-  - *pdpa_src2
-  out:
-  - *pdpa_acc
-- go: FusedMultiplyAddMasked
+- go: FusedMultiplyAdd
   asm: "VFMADD213PS|VFMADD213PD"
   in:
   - &fma_op
    go: $t
    base: float
-  - class: mask
   - *fma_op
   - *fma_op
   out:
   - *fma_op
-- go: FusedMultiplyAddSubMasked
+- go: FusedMultiplyAddSub
   asm: "VFMADDSUB213PS|VFMADDSUB213PD"
   in:
   - *fma_op
-  - class: mask
   - *fma_op
   - *fma_op
   out:
   - *fma_op
-- go: FusedMultiplySubAddMasked
+- go: FusedMultiplySubAdd
   asm: "VFMSUBADD213PS|VFMSUBADD213PD"
   in:
   - *fma_op
-  - class: mask
   - *fma_op
   - *fma_op
   out:
@@ -671,20 +508,6 @@
   - *uint
   out: &1uint
   - *uint
-- go: MaxMasked
-  asm: "V?PMAXS[BWDQ]"
-  in: &1mask2int
-  - class: mask
-  - *int
-  - *int
-  out: *1int
-- go: MaxMasked
-  asm: "V?PMAXU[BWDQ]"
-  in: &1mask2uint
-  - class: mask
-  - *uint
-  - *uint
-  out: *1uint
 
 - go: Min
   asm: "V?PMINS[BWDQ]"
@@ -694,14 +517,6 @@
   asm: "V?PMINU[BWDQ]"
   in: *2uint
   out: *1uint
-- go: MinMasked
-  asm: "V?PMINS[BWDQ]"
-  in: *1mask2int
-  out: *1int
-- go: MinMasked
-  asm: "V?PMINU[BWDQ]"
-  in: *1mask2uint
-  out: *1uint
 
 - go: Max
   asm: "V?MAXP[SD]"
@@ -712,21 +527,10 @@
   - *float
   out: &1float
   - *float
-- go: MaxMasked
-  asm: "V?MAXP[SD]"
-  in: &1mask2float
-  - class: mask
-  - *float
-  - *float
-  out: *1float
 - go: Min
   asm: "V?MINP[SD]"
   in: *2float
   out: *1float
-- go: MinMasked
-  asm: "V?MINP[SD]"
-  in: *1mask2float
-  out: *1float
 - go: SetElem
   asm: "VPINSR[BWDQ]"
   in:
@@ -920,17 +724,7 @@
   out:
   - *any
 
-- go: PermuteMasked
-  asm: "VPERM[BWDQ]|VPERMP[SD]"
-  operandOrder: "21Type1"
-  in:
-  - class: mask
-  - *anyindices
-  - *any
-  out:
-  - *any
-
-- go: Permute2Masked
+- go: Permute2
   asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
   # Because we are overwriting the receiver's type, we 
   # have to move the receiver to be a parameter so that
@@ -938,7 +732,6 @@
   operandOrder: "231Type1" 
   in:
   - *anyindices # result in arg 0
-  - class: mask
   - *any
   - *any
   out:
@@ -947,6 +740,7 @@
 - go: Compress
   asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]"
   in:
+    # The mask in Compress is a control mask rather than a write mask, so it's not optional.
   - class: mask
   - *any
   out:
@@ -962,14 +756,6 @@
   - *fp
   out:
   - *fp
-- go: MulMasked
-  asm: "VMULP[SD]"
-  in:
-  - class: mask
-  - *fp
-  - *fp
-  out:
-  - *fp
 
 # Integer multiplications.
 
@@ -997,26 +783,9 @@
   - &uint2
     go: $t2
     base: uint
-- go: MulEvenWidenMasked
-  asm: "VPMULDQ"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int2
-- go: MulEvenWidenMasked
-  asm: "VPMULUDQ"
-  in:
-  - class: mask
-  - *uint
-  - *uint
-  out:
-  - *uint2
 
 # MulHigh
 # Word only.
-# Non-masked
 - go: MulHigh
   asm: "VPMULHW"
   in:
@@ -1031,26 +800,9 @@
   - *uint
   out:
   - *uint2
-- go: MulHighMasked
-  asm: "VPMULHW"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int2
-- go: MulHighMasked
-  asm: "VPMULHUW"
-  in:
-  - class: mask
-  - *uint
-  - *uint
-  out:
-  - *uint2
 
 # MulLow
 # Signed int only.
-# Non-masked
 - go: MulLow
   asm: "VPMULL[WDQ]"
   in:
@@ -1058,14 +810,6 @@
   - *int
   out:
   - *int2
-- go: MulLowMasked
-  asm: "VPMULL[WDQ]"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int2
 # Integers
 # ShiftAll*
 - go: ShiftAllLeft
@@ -1078,14 +822,6 @@
     treatLikeAScalarOfSize: 64
   out:
   - *any
-- go: ShiftAllLeftMasked
-  asm: "VPSLL[WDQ]"
-  in:
-  - class: mask
-  - *any
-  - *vecAsScalar64
-  out:
-  - *any
 - go: ShiftAllRight
   signed: false
   asm: "VPSRL[WDQ]"
@@ -1096,15 +832,6 @@
   - *vecAsScalar64
   out:
   - *uint
-- go: ShiftAllRightMasked
-  signed: false
-  asm: "VPSRL[WDQ]"
-  in:
-  - class: mask
-  - *uint
-  - *vecAsScalar64
-  out:
-  - *uint
 - go: ShiftAllRight
   signed: true
   asm: "VPSRA[WDQ]"
@@ -1115,15 +842,6 @@
   - *vecAsScalar64
   out:
   - *int
-- go: ShiftAllRightMasked
-  signed: true
-  asm: "VPSRA[WDQ]"
-  in:
-  - class: mask
-  - *int
-  - *vecAsScalar64
-  out:
-  - *int
 
 # Shift* (variable)
 - go: ShiftLeft
@@ -1133,14 +851,6 @@
   - *any
   out:
   - *any
-- go: ShiftLeftMasked
-  asm: "VPSLLV[WD]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 # XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite
 # it to 64.
 - go: ShiftLeft
@@ -1152,14 +862,6 @@
   - *anyOverwriteElemBits
   out:
   - *anyOverwriteElemBits
-- go: ShiftLeftMasked
-  asm: "VPSLLVQ"
-  in:
-  - class: mask
-  - *anyOverwriteElemBits
-  - *anyOverwriteElemBits
-  out:
-  - *anyOverwriteElemBits
 - go: ShiftRight
   signed: false
   asm: "VPSRLV[WD]"
@@ -1168,15 +870,6 @@
   - *uint
   out:
   - *uint
-- go: ShiftRightMasked
-  signed: false
-  asm: "VPSRLV[WD]"
-  in:
-  - class: mask
-  - *uint
-  - *uint
-  out:
-  - *uint
 # XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
 - go: ShiftRight
   signed: false
@@ -1189,15 +882,6 @@
   - *uintOverwriteElemBits
   out:
   - *uintOverwriteElemBits
-- go: ShiftRightMasked
-  signed: false
-  asm: "VPSRLVQ"
-  in:
-  - class: mask
-  - *uintOverwriteElemBits
-  - *uintOverwriteElemBits
-  out:
-  - *uintOverwriteElemBits
 - go: ShiftRight
   signed: true
   asm: "VPSRAV[WDQ]"
@@ -1206,21 +890,11 @@
   - *int
   out:
   - *int
-- go: ShiftRightMasked
-  signed: true
-  asm: "VPSRAV[WDQ]"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int
 
 # Rotate
-- go: RotateAllLeftMasked
+- go: RotateAllLeft
   asm: "VPROL[DQ]"
   in:
-  - class: mask
   - *any
   - &pureImm
     class: immediate
@@ -1228,64 +902,57 @@
     name: shift
   out:
   - *any
-- go: RotateAllRightMasked
+- go: RotateAllRight
   asm: "VPROR[DQ]"
   in:
-  - class: mask
   - *any
   - *pureImm
   out:
   - *any
-- go: RotateLeftMasked
+- go: RotateLeft
   asm: "VPROLV[DQ]"
   in:
-  - class: mask
   - *any
   - *any
   out:
   - *any
-- go: RotateRightMasked
+- go: RotateRight
   asm: "VPRORV[DQ]"
   in:
-  - class: mask
   - *any
   - *any
   out:
   - *any
 
 # Bizzare shifts.
-- go: ShiftAllLeftAndFillUpperFromMasked
+- go: ShiftAllLeftAndFillUpperFrom
   asm: "VPSHLD[WDQ]"
   in:
-  - class: mask
   - *any
   - *any
   - *pureImm
   out:
   - *any
-- go: ShiftAllRightAndFillUpperFromMasked
+- go: ShiftAllRightAndFillUpperFrom
   asm: "VPSHRD[WDQ]"
   in:
-  - class: mask
   - *any
   - *any
   - *pureImm
   out:
   - *any
-- go: ShiftLeftAndFillUpperFromMasked
+- go: ShiftLeftAndFillUpperFrom
   asm: "VPSHLDV[WDQ]"
   in:
   - *any
-  - class: mask
   - *any
   - *any
   out:
   - *any
-- go: ShiftRightAndFillUpperFromMasked
+- go: ShiftRightAndFillUpperFrom
   asm: "VPSHRDV[WDQ]"
   in:
   - *any
-  - class: mask
   - *any
   - *any
   out:
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index b45c249f..741214bb 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -18,27 +18,38 @@ import (
 type Operation struct {
 	rawOperation
 
+	// Go is the Go method name of this operation.
+	//
+	// It is derived from the raw Go method name by adding optional suffixes.
+	// Currently, "Masked" is the only suffix.
+	Go string
+
 	// Documentation is the doc string for this API.
 	//
 	// It is computed from the raw documentation:
 	//
 	// - "NAME" is replaced by the Go method name.
 	//
-	// - For masked operation, the method name is updated and a sentence about
-	// masking is added.
+	// - For masked operation, a sentence about masking is added.
 	Documentation string
+
+	// In is the sequence of parameters to the Go method.
+	//
+	// For masked operations, this will have the mask operand appended.
+	In []Operand
 }
 
 // rawOperation is the unifier representation of an [Operation]. It is
 // translated into a more parsed form after unifier decoding.
 type rawOperation struct {
-	Go string // Go method name
+	Go string // Base Go method name
 
 	GoArch       string  // GOARCH for this definition
 	Asm          string  // Assembly mnemonic
 	OperandOrder *string // optional Operand order for better Go declarations
 
-	In            []Operand // Arguments
+	In            []Operand // Parameters
+	InVariant     []Operand // Optional parameters
 	Out           []Operand // Results
 	Commutative   bool      // Commutativity
 	Extension     string    // Extension
@@ -49,9 +60,6 @@ type rawOperation struct {
 	// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
 	// If present, it will be copied to [In[0].Const].
 	ConstImm *string
-	// Masked indicates that this is a masked operation, this field has to be set for masked operations
-	// otherwise simdgen won't recognize it in [splitMask].
-	Masked *bool
 	// NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits.
 	NameAndSizeCheck *bool
 }
@@ -61,6 +69,21 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
 		return err
 	}
 
+	isMasked := false
+	if len(o.InVariant) == 0 {
+		// No variant
+	} else if len(o.InVariant) == 1 && o.InVariant[0].Class == "mask" {
+		isMasked = true
+	} else {
+		return fmt.Errorf("unknown inVariant")
+	}
+
+	// Compute full Go method name.
+	o.Go = o.rawOperation.Go
+	if isMasked {
+		o.Go += "Masked"
+	}
+
 	// Compute doc string.
 	if o.rawOperation.Documentation != nil {
 		o.Documentation = *o.rawOperation.Documentation
@@ -68,6 +91,11 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
 		o.Documentation = "// UNDOCUMENTED"
 	}
 	o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
+	if isMasked {
+		o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
+	}
+
+	o.In = append(o.rawOperation.In, o.rawOperation.InVariant...)
 
 	return nil
 }
@@ -296,12 +324,6 @@ func writeGoDefs(path string, cl unify.Closure) error {
 	if *Verbose {
 		log.Printf("dedup len: %d\n", len(deduped))
 	}
-	if !*FlagNoSplitMask {
-		if deduped, err = splitMask(deduped); err != nil {
-			return err
-		}
-	}
-	insertMaskDescToDoc(deduped)
 	if *Verbose {
 		log.Printf("dedup len: %d\n", len(deduped))
 	}
diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index 2b0e65f7..6ac22a68 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -107,7 +107,6 @@ var (
 	flagO                 = flag.String("o", "yaml", "output type: yaml, godefs (generate definitions into a Go source tree")
 	flagGoDefRoot         = flag.String("goroot", ".", "the path to the Go dev directory that will receive the generated files")
 	FlagNoDedup           = flag.Bool("nodedup", false, "disable deduplicating godefs of 2 qualifying operations from different extensions")
-	FlagNoSplitMask       = flag.Bool("nosplitmask", false, "disable splitting the masks to const and non const")
 	FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand")
 	FlagArch              = flag.String("arch", "amd64", "the target architecture")
 
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index 667508b5..2ffd1e23 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -9,18 +9,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME adds corresponding elements of two vectors with saturation.
-- go: AddMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME adds corresponding elements of two vectors.
-- go: SaturatedAddMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: false
   extension: "AVX.*"
@@ -31,18 +19,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME subtracts corresponding elements of two vectors with saturation.
-- go: SubMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME subtracts corresponding elements of two vectors.
-- go: SaturatedSubMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME subtracts corresponding elements of two vectors with saturation.
 - go: PairwiseAdd
   commutative: false
   extension: "AVX.*"
diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml
index 793bc489..c952c150 100644
--- a/internal/simdgen/ops/AddSub/go.yaml
+++ b/internal/simdgen/ops/AddSub/go.yaml
@@ -8,14 +8,6 @@
   - *any
   out:
   - *any
-- go: AddMasked
-  asm: "VPADD[BWDQ]|VADDP[SD]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 # Saturated Add
 - go: SaturatedAdd
   asm: "VPADDS[BWDQ]"
@@ -35,22 +27,6 @@
   - *uint
   out:
   - *uint
-- go: SaturatedAddMasked
-  asm: "VPADDS[BWDQ]"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int
-- go: SaturatedAddMasked
-  asm: "VPADDS[BWDQ]"
-  in:
-  - class: mask
-  - *uint
-  - *uint
-  out:
-  - *uint
 
 # Sub
 - go: Sub
@@ -60,14 +36,6 @@
   - *any
   out: &1any
   - *any
-- go: SubMasked
-  asm: "VPSUB[BWDQ]|VSUBP[SD]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 # Saturated Sub
 - go: SaturatedSub
   asm: "VPSUBS[BWDQ]"
@@ -83,22 +51,6 @@
   - *uint
   out:
   - *uint
-- go: SaturatedSubMasked
-  asm: "VPSUBS[BWDQ]"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int
-- go: SaturatedSubMasked
-  asm: "VPSUBS[BWDQ]"
-  in:
-  - class: mask
-  - *uint
-  - *uint
-  out:
-  - *uint
 - go: PairwiseAdd
   asm: "VPHADD[DW]"
   in: *2any
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index 3d2eda7c..320cfd18 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -4,45 +4,21 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise AND operation between two vectors.
-- go: AndMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise AND operation between two vectors.
 - go: Or
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise OR operation between two vectors.
-- go: OrMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise OR operation between two vectors.
 - go: AndNot
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise x &^ y.
-- go: AndNotMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise x &^ y.
 - go: Xor
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise XOR operation between two vectors.
-- go: XorMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise XOR operation between two vectors.
 
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml
index acc7a51e..0d0f1c8c 100644
--- a/internal/simdgen/ops/BitwiseLogic/go.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/go.yaml
@@ -4,78 +4,65 @@
 # decided that they want FP bit-wise logic operations, but this irregularity
 # has to be dealed with in separate rules with some overwrites.
 
-# Int/Uint operations.
-# Non-masked for 128/256-bit vectors
+# For many bit-wise operations, we have the following non-orthogonal
+# choices:
+#
+# - Non-masked AVX operations have no element width (because it
+# doesn't matter), but only cover 128 and 256 bit vectors.
+#
+# - Masked AVX-512 operations have an element width (because it needs
+# to know how to interpret the mask), and cover 128, 256, and 512 bit
+# vectors. These only cover 32- and 64-bit element widths.
+#
+# - Non-masked AVX-512 operations still have an element width (because
+# they're just the masked operations with an implicit K0 mask) but it
+# doesn't matter! This is the only option for non-masked 512 bit
+# operations, and we can pick any of the element widths.
+#
+# We unify with ALL of these operations and the compiler generator
+# picks when there are multiple options.
+
+# TODO: We don't currently generate unmasked bit-wise operations on 512 bit
+# vectors of 8- or 16-bit elements. AVX-512 only has *masked* bit-wise
+# operations for 32- and 64-bit elements; while the element width doesn't matter
+# for unmasked operations, right now we don't realize that we can just use the
+# 32- or 64-bit version for the unmasked form. Maybe in the XED decoder we
+# should recognize bit-wise operations when generating unmasked versions and
+# omit the element width.
+
 # For binary operations, we constrain their two inputs and one output to the
-# same Go type using a variable. This will map to instructions before AVX512.
+# same Go type using a variable.
+
 - go: And
-  asm: "VPAND"
+  asm: "VPAND[DQ]?"
   in:
   - &any
     go: $t
   - *any
   out:
   - *any
-# Masked
-# Looks like VPAND$xi works only for 2 shapes for integer:
-# Dword and Qword.
-# TODO: should we wildcard other smaller elemBits to VPANDQ or
-# VPANDD? Looks like elemBits doesn't really matter afterall in bitwise operations.
-- go: AndMasked
-  asm: "VPAND[DQ]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 
 - go: AndNot
-  asm: "VPANDN"
+  asm: "VPANDN[DQ]?"
   operandOrder: "21" # switch the arg order
   in:
   - *any
   - *any
   out:
   - *any
-- go: AndNotMasked
-  asm: "VPANDN[DQ]"
-  operandOrder: "21"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 
 - go: Or
-  asm: "VPOR"
+  asm: "VPOR[DQ]?"
   in:
   - *any
   - *any
   out:
   - *any
-- go: OrMasked
-  asm: "VPOR[DQ]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 
 - go: Xor
-  asm: "VPXOR"
+  asm: "VPXOR[DQ]?"
   in:
   - *any
   - *any
   out:
   - *any
-- go: XorMasked
-  asm: "VPXOR[DQ]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
\ No newline at end of file
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index e17e45db..e3d990ed 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -48,52 +48,3 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME compares for greater than.
-- go: EqualMasked
-  constImm: 0
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for equality.
-- go: LessMasked
-  constImm: 1
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for less than.
-- go: LessEqualMasked
-  constImm: 2
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for less than or equal.
-- go: IsNanMasked # For float only.
-  constImm: 3
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME checks if elements are NaN. Use as x.IsNan(x).
-- go: NotEqualMasked
-  constImm: 4
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for inequality.
-- go: GreaterEqualMasked
-  constImm: 13
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for greater than or equal.
-- go: GreaterMasked
-  constImm: 14
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for greater than.
diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml
index c1ea2061..a8e2368f 100644
--- a/internal/simdgen/ops/Compares/go.yaml
+++ b/internal/simdgen/ops/Compares/go.yaml
@@ -36,38 +36,35 @@
     overwriteElementBits: 64
     overwriteClass: mask
     overwriteBase: int
-- go: EqualMasked
+# AVX-512 compares produce masks.
+- go: Equal
   asm: "V?PCMPEQ[BWDQ]"
   in:
-  - class: mask
   - *any
   - *any
   out:
   - class: mask
-- go: GreaterMasked
+- go: Greater
   asm: "V?PCMPGT[BWDQ]"
   in:
-  - class: mask
   - *int
   - *int
   out:
   - class: mask
 # The const imm predicated compares after AVX512, please see categories.yaml
 # for const imm specification.
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMP[BWDQ]"
   in:
-  - class: mask
   - *int
   - *int
   - class: immediate
     const: 0 # Just a placeholder, will be overwritten by const imm porting.
   out:
   - class: mask
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)Masked
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMPU[BWDQ]"
   in:
-  - class: mask
   - &uint
     go: $t
     base: uint
@@ -91,10 +88,9 @@
   - go: $t
     overwriteBase: int
     overwriteClass: mask
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)Masked
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
   asm: "VCMPP[SD]"
   in:
-  - class: mask
   - *float
   - *float
   - class: immediate
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 53292048..0fb727d5 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -4,48 +4,23 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME divides elements of two vectors.
-- go: DivMasked
-  commutative: false
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME divides elements of two vectors.
 - go: Sqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes the square root of each element.
-- go: SqrtMasked
-  commutative: false
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes the square root of each element.
 - go: ApproximateReciprocal
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes an approximate reciprocal of each element.
-- go: ApproximateReciprocalMasked
-  commutative: false
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes an approximate reciprocal of each element.
 - go: ApproximateReciprocalOfSqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes an approximate reciprocal of the square root of each element.
-- go: ApproximateReciprocalOfSqrtMasked
-  commutative: false
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes an approximate reciprocal of the square root of each element.
-- go: MulByPowOf2Masked # This operation is all after AVX512, the unmasked version will be generated.
+- go: MulByPowOf2
   commutative: false
-  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies elements by a power of 2.
@@ -55,18 +30,16 @@
   constImm: 0
   documentation: !string |-
     // NAME rounds elements to the nearest integer.
-- go: RoundWithPrecisionMasked
+- go: RoundWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 0
-  masked: true
   documentation: !string |-
     // NAME rounds elements with specified precision.
-- go: DiffWithRoundWithPrecisionMasked
+- go: DiffWithRoundWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 0
-  masked: true
   documentation: !string |-
     // NAME computes the difference after rounding with specified precision.
 - go: Floor
@@ -75,18 +48,16 @@
   constImm: 1
   documentation: !string |-
     // NAME rounds elements down to the nearest integer.
-- go: FloorWithPrecisionMasked
+- go: FloorWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 1
-  masked: true
   documentation: !string |-
     // NAME rounds elements down with specified precision.
-- go: DiffWithFloorWithPrecisionMasked
+- go: DiffWithFloorWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 1
-  masked: true
   documentation: !string |-
     // NAME computes the difference after flooring with specified precision.
 - go: Ceil
@@ -95,18 +66,16 @@
   constImm: 2
   documentation: !string |-
     // NAME rounds elements up to the nearest integer.
-- go: CeilWithPrecisionMasked
+- go: CeilWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 2
-  masked: true
   documentation: !string |-
     // NAME rounds elements up with specified precision.
-- go: DiffWithCeilWithPrecisionMasked
+- go: DiffWithCeilWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 2
-  masked: true
   documentation: !string |-
     // NAME computes the difference after ceiling with specified precision.
 - go: Trunc
@@ -115,18 +84,16 @@
   constImm: 3
   documentation: !string |-
     // NAME truncates elements towards zero.
-- go: TruncWithPrecisionMasked
+- go: TruncWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 3
-  masked: true
   documentation: !string |-
     // NAME truncates elements with specified precision.
-- go: DiffWithTruncWithPrecisionMasked
+- go: DiffWithTruncWithPrecision
   commutative: false
   extension: "AVX.*"
   constImm: 3
-  masked: true
   documentation: !string |-
     // NAME computes the difference after truncating with specified precision.
 - go: AddSub
diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml
index d35610df..71d1cb5f 100644
--- a/internal/simdgen/ops/FPonlyArith/go.yaml
+++ b/internal/simdgen/ops/FPonlyArith/go.yaml
@@ -8,38 +8,22 @@
   - *fp
   out: &1fp
   - *fp
-- go: DivMasked
-  asm: "V?DIVP[SD]"
-  in: &1mask2fp
-  - class: mask
-  - *fp
-  - *fp
-  out: *1fp
 - go: Sqrt
   asm: "V?SQRTP[SD]"
   in: *1fp
   out: *1fp
-- go: SqrtMasked
-  asm: "V?SQRTP[SD]"
-  in: &1mask1fp
-  - class: mask
-  - *fp
-  out: *1fp
-- go: ApproximateReciprocalMasked
-  asm: "VRCP14P[SD]"
-  in: *1mask1fp
+# TODO: Provide separate methods for 12-bit precision and 14-bit precision?
+- go: ApproximateReciprocal
+  asm: "VRCP(14)?P[SD]"
+  in: *1fp
   out: *1fp
 - go: ApproximateReciprocalOfSqrt
-  asm: "V?RSQRTPS"
+  asm: "V?RSQRT(14)?P[SD]"
   in: *1fp
   out: *1fp
-- go: ApproximateReciprocalOfSqrtMasked
-  asm: "VRSQRT14P[SD]"
-  in: *1mask1fp
-  out: *1fp
-- go: MulByPowOf2Masked
+- go: MulByPowOf2
   asm: "VSCALEFP[SD]"
-  in: *1mask2fp
+  in: *2fp
   out: *1fp
 
 - go: "Round|Ceil|Floor|Trunc"
@@ -50,20 +34,18 @@
     const: 0 # place holder
   out: *1fp
 
-- go: "(Round|Ceil|Floor|Trunc)WithPrecisionMasked"
+- go: "(Round|Ceil|Floor|Trunc)WithPrecision"
   asm: "VRNDSCALEP[SD]"
   in:
-  - class: mask
   - *fp
   - class: immediate
     const: 0 # place holder
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
     name: prec
   out: *1fp
-- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecisionMasked"
+- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecision"
   asm: "VREDUCEP[SD]"
   in:
-  - class: mask
   - *fp
   - class: immediate
     const: 0 # place holder
@@ -77,4 +59,4 @@
   - *fp
   - *fp
   out:
-  - *fp
\ No newline at end of file
+  - *fp
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
index 62d8709e..d57b5265 100644
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -1,6 +1,5 @@
 !sum
-- go: GaloisFieldAffineTransformMasked
-  masked: true
+- go: GaloisFieldAffineTransform
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -8,8 +7,7 @@
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
-- go: GaloisFieldAffineTransformInverseMasked
-  masked: true
+- go: GaloisFieldAffineTransformInverse
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -18,8 +16,7 @@
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
     // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
     // corresponding to a group of 8 elements in x.
-- go: GaloisFieldMulMasked
-  masked: true
+- go: GaloisFieldMul
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml
index 68875d17..e86211cb 100644
--- a/internal/simdgen/ops/GaloisField/go.yaml
+++ b/internal/simdgen/ops/GaloisField/go.yaml
@@ -1,10 +1,8 @@
 !sum
-- go: GaloisFieldAffineTransformMasked
+- go: GaloisFieldAffineTransform
   asm: VGF2P8AFFINEQB
   operandOrder: 2I # 2nd operand, then immediate
   in: &AffineArgs
-  - class: mask
-    name: m
   - &uint8
     go: $t
     base: uint
@@ -18,18 +16,17 @@
   out:
   - *uint8
 
-- go: GaloisFieldAffineTransformInverseMasked
+- go: GaloisFieldAffineTransformInverse
   asm: VGF2P8AFFINEINVQB
   operandOrder: 2I # 2nd operand, then immediate
   in: *AffineArgs
   out:
   - *uint8
 
-- go: GaloisFieldMulMasked
+- go: GaloisFieldMul
   asm: VGF2P8MULB
   in:
-  - class: mask
   - *uint8
   - *uint8
   out:
-  - *uint8
\ No newline at end of file
+  - *uint8
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
index 76ab14ba..477b1896 100644
--- a/internal/simdgen/ops/IntOnlyArith/categories.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml
@@ -4,24 +4,12 @@
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
     // NAME computes the rounded average of corresponding elements.
-- go: AverageMasked
-  commutative: true
-  masked: true
-  extension: "AVX512.*" # Masked operations are typically AVX512
-  documentation: !string |-
-    // NAME computes the rounded average of corresponding elements.
 - go: Absolute
   commutative: false
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
     // NAME computes the absolute value of each element.
-- go: AbsoluteMasked
-  commutative: false
-  masked: true
-  extension: "AVX512.*"
-  documentation: !string |-
-    // NAME computes the absolute value of each element.
 - go: Sign
   # Applies sign of second operand to first: sign(val, sign_src)
   commutative: false
@@ -30,9 +18,8 @@
     // NAME returns the product of the first operand with -1, 0, or 1,
     // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
-- go: PopCountMasked
+- go: PopCount
   commutative: false
-  masked: true
-  extension: "AVX512.*" # VPOPCNT instructions are AVX512 (BITALG or VPOPCNTDQ)
+  extension: "AVX512.*"
   documentation: !string |-
     // NAME counts the number of set bits in each element.
diff --git a/internal/simdgen/ops/IntOnlyArith/go.yaml b/internal/simdgen/ops/IntOnlyArith/go.yaml
index 3ccce6f0..4c73be26 100644
--- a/internal/simdgen/ops/IntOnlyArith/go.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/go.yaml
@@ -10,14 +10,6 @@
   - *uint_t
   out:
   - *uint_t
-- go: AverageMasked
-  asm: "VPAVG[BW]"
-  in:
-  - class: mask
-  - *uint_t
-  - *uint_t
-  out:
-  - *uint_t
 
 # Absolute Value (signed byte, word, dword, qword)
 # Instructions: VPABSB, VPABSW, VPABSD, VPABSQ
@@ -29,13 +21,6 @@
     base: int
   out:
   - *int_t # Output is magnitude, fits in the same signed type
-- go: AbsoluteMasked
-  asm: "VPABS[BWDQ]"
-  in:
-  - class: mask
-  - *int_t
-  out:
-  - *int_t
 
 # Sign Operation (signed byte, word, dword)
 # Applies sign of second operand to the first.
@@ -51,11 +36,10 @@
 # Population Count (count set bits in each element)
 # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
 #               VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
-- go: PopCountMasked
+- go: PopCount
   asm: "VPOPCNT[BWDQ]"
   in:
-  - class: mask
   - &any
     go: $t
   out:
-  - *any
\ No newline at end of file
+  - *any
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 65f7462e..c90942de 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -5,13 +5,6 @@
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
-- go: PairDotProdMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies the elements and add the pairs together,
-    // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: SaturatedUnsignedSignedPairDotProd
   commutative: false
@@ -19,13 +12,6 @@
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
-- go: SaturatedUnsignedSignedPairDotProdMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies the elements and add the pairs together with saturation,
-    // yielding a vector of half as many elements with twice the input element size.
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 - go: DotProdBroadcast
   commutative: true
@@ -37,59 +23,32 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on groups of 4 elements of x and y and then adds z.
-- go: UnsignedSignedQuadDotProdAccumulateMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: PairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on pairs of elements of x and y and then adds z.
-- go: PairDotProdAccumulateMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs dot products on pairs of elements of x and y and then adds z.
 - go: SaturatedPairDotProdAccumulate
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on pairs of elements of x and y and then adds z.
-- go: SaturatedPairDotProdAccumulateMasked
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs dot products on pairs of elements of x and y and then adds z.
-- go: FusedMultiplyAddMasked
-  masked: true
+- go: FusedMultiplyAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs (x * y) + z.
-- go: FusedMultiplyAddSubMasked
-  masked: true
+- go: FusedMultiplyAddSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-- go: FusedMultiplySubAddMasked
-  masked: true
+- go: FusedMultiplySubAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
index 76512b1e..2fb3e52f 100644
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -10,37 +10,19 @@
   - &int2 # The elemBits are different
     go: $t2
     base: int
-- go: PairDotProdMasked
-  asm: VPMADDWD
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int2
 - go: SaturatedUnsignedSignedPairDotProd
   asm: VPMADDUBSW
   in:
   - &uint
     go: $t
     base: uint
+    overwriteElementBits: 8
   - &int3
     go: $t3
     base: int
-  out:
-  - *int2
-- go: SaturatedUnsignedSignedPairDotProdMasked
-  asm: VPMADDUBSW
-  in:
-  - class: mask
-  - go: $t1
-    base: uint
-    overwriteElementBits: 8
-  - go: $t2
-    base: int
     overwriteElementBits: 8
   out:
-  - *int3
+  - *int2
 - go: DotProdBroadcast
   asm: VDPP[SD]
   in:
@@ -69,16 +51,6 @@
     overwriteElementBits: 8
   out:
   - *qdpa_acc
-- go: UnsignedSignedQuadDotProdAccumulateMasked
-  asm: "VPDPBUSD"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - *qdpa_acc
-  - class: mask
-  - *qdpa_src1
-  - *qdpa_src2
-  out:
-  - *qdpa_acc
 - go: SaturatedUnsignedSignedQuadDotProdAccumulate
   asm: "VPDPBUSDS"
   operandOrder: "31" # switch operand 3 and 1
@@ -88,16 +60,6 @@
   - *qdpa_src2
   out:
   - *qdpa_acc
-- go: SaturatedUnsignedSignedQuadDotProdAccumulateMasked
-  asm: "VPDPBUSDS"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - *qdpa_acc
-  - class: mask
-  - *qdpa_src1
-  - *qdpa_src2
-  out:
-  - *qdpa_acc
 - go: PairDotProdAccumulate
   asm: "VPDPWSSD"
   operandOrder: "31" # switch operand 3 and 1
@@ -116,16 +78,6 @@
     overwriteElementBits: 16
   out:
   - *pdpa_acc
-- go: PairDotProdAccumulateMasked
-  asm: "VPDPWSSD"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - *pdpa_acc
-  - class: mask
-  - *pdpa_src1
-  - *pdpa_src2
-  out:
-  - *pdpa_acc
 - go: SaturatedPairDotProdAccumulate
   asm: "VPDPWSSDS"
   operandOrder: "31" # switch operand 3 and 1
@@ -135,41 +87,28 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
-- go: SaturatedPairDotProdAccumulateMasked
-  asm: "VPDPWSSDS"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - *pdpa_acc
-  - class: mask
-  - *pdpa_src1
-  - *pdpa_src2
-  out:
-  - *pdpa_acc
-- go: FusedMultiplyAddMasked
+- go: FusedMultiplyAdd
   asm: "VFMADD213PS|VFMADD213PD"
   in:
   - &fma_op
    go: $t
    base: float
-  - class: mask
   - *fma_op
   - *fma_op
   out:
   - *fma_op
-- go: FusedMultiplyAddSubMasked
+- go: FusedMultiplyAddSub
   asm: "VFMADDSUB213PS|VFMADDSUB213PD"
   in:
   - *fma_op
-  - class: mask
   - *fma_op
   - *fma_op
   out:
   - *fma_op
-- go: FusedMultiplySubAddMasked
+- go: FusedMultiplySubAdd
   asm: "VFMSUBADD213PS|VFMSUBADD213PD"
   in:
   - *fma_op
-  - class: mask
   - *fma_op
   - *fma_op
   out:
diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml
index ce87994f..9ac0d3d4 100644
--- a/internal/simdgen/ops/MinMax/categories.yaml
+++ b/internal/simdgen/ops/MinMax/categories.yaml
@@ -4,20 +4,8 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes the maximum of corresponding elements.
-- go: MaxMasked
-  commutative: true
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes the maximum of corresponding elements.
 - go: Min
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes the minimum of corresponding elements.
-- go: MinMasked
-  commutative: true
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes the minimum of corresponding elements.
diff --git a/internal/simdgen/ops/MinMax/go.yaml b/internal/simdgen/ops/MinMax/go.yaml
index db4286f3..55f1e18b 100644
--- a/internal/simdgen/ops/MinMax/go.yaml
+++ b/internal/simdgen/ops/MinMax/go.yaml
@@ -17,20 +17,6 @@
   - *uint
   out: &1uint
   - *uint
-- go: MaxMasked
-  asm: "V?PMAXS[BWDQ]"
-  in: &1mask2int
-  - class: mask
-  - *int
-  - *int
-  out: *1int
-- go: MaxMasked
-  asm: "V?PMAXU[BWDQ]"
-  in: &1mask2uint
-  - class: mask
-  - *uint
-  - *uint
-  out: *1uint
 
 - go: Min
   asm: "V?PMINS[BWDQ]"
@@ -40,14 +26,6 @@
   asm: "V?PMINU[BWDQ]"
   in: *2uint
   out: *1uint
-- go: MinMasked
-  asm: "V?PMINS[BWDQ]"
-  in: *1mask2int
-  out: *1int
-- go: MinMasked
-  asm: "V?PMINU[BWDQ]"
-  in: *1mask2uint
-  out: *1uint
 
 - go: Max
   asm: "V?MAXP[SD]"
@@ -58,18 +36,7 @@
   - *float
   out: &1float
   - *float
-- go: MaxMasked
-  asm: "V?MAXP[SD]"
-  in: &1mask2float
-  - class: mask
-  - *float
-  - *float
-  out: *1float
 - go: Min
   asm: "V?MINP[SD]"
   in: *2float
   out: *1float
-- go: MinMasked
-  asm: "V?MINP[SD]"
-  in: *1mask2float
-  out: *1float
\ No newline at end of file
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index dd30ca8a..a6dd7bab 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -26,17 +26,8 @@
     // NAME performs a full permutation of vector x using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
-- go: PermuteMasked
+- go: Permute2 # Permute2 is only available on or after AVX512
   commutative: false
-  masked: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a full permutation of vector y using indices:
-    // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-    // Only the needed bits to represent x's index are used in indices' elements.
-- go: Permute2Masked # Permute2Masked is only available on or after AVX512
-  commutative: false
-  masked: true
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a full permutation of vector x, y using indices:
@@ -45,7 +36,6 @@
     // Only the needed bits to represent xy's index are used in indices' elements.
 - go: Compress
   commutative: false
-  # The mask in Compress is a control mask rather than a write mask, so we don't mark it "masked"
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs a compression on vector x using mask by
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index 44a1c3c3..c1dd6e4d 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -192,17 +192,7 @@
   out:
   - *any
 
-- go: PermuteMasked
-  asm: "VPERM[BWDQ]|VPERMP[SD]"
-  operandOrder: "21Type1"
-  in:
-  - class: mask
-  - *anyindices
-  - *any
-  out:
-  - *any
-
-- go: Permute2Masked
+- go: Permute2
   asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
   # Because we are overwriting the receiver's type, we 
   # have to move the receiver to be a parameter so that
@@ -210,7 +200,6 @@
   operandOrder: "231Type1" 
   in:
   - *anyindices # result in arg 0
-  - class: mask
   - *any
   - *any
   out:
@@ -219,7 +208,8 @@
 - go: Compress
   asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]"
   in:
+    # The mask in Compress is a control mask rather than a write mask, so it's not optional.
   - class: mask
   - *any
   out:
-  - *any
\ No newline at end of file
+  - *any
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index 8dc51f45..9a9b8328 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -20,28 +20,3 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies elements and stores the low part of the result.
-- go: MulMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies corresponding elements of two vectors.
-- go: MulEvenWidenMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies even-indexed elements, widening the result.
-    // Result[i] = v1.Even[i] * v2.Even[i].
-- go: MulHighMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies elements and stores the high part of the result.
-- go: MulLowMasked
-  masked: true
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies elements and stores the low part of the result.
diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml
index 9ae3a850..c9ddeb4a 100644
--- a/internal/simdgen/ops/Mul/go.yaml
+++ b/internal/simdgen/ops/Mul/go.yaml
@@ -10,14 +10,6 @@
   - *fp
   out:
   - *fp
-- go: MulMasked
-  asm: "VMULP[SD]"
-  in:
-  - class: mask
-  - *fp
-  - *fp
-  out:
-  - *fp
 
 # Integer multiplications.
 
@@ -45,26 +37,9 @@
   - &uint2
     go: $t2
     base: uint
-- go: MulEvenWidenMasked
-  asm: "VPMULDQ"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int2
-- go: MulEvenWidenMasked
-  asm: "VPMULUDQ"
-  in:
-  - class: mask
-  - *uint
-  - *uint
-  out:
-  - *uint2
 
 # MulHigh
 # Word only.
-# Non-masked
 - go: MulHigh
   asm: "VPMULHW"
   in:
@@ -79,26 +54,9 @@
   - *uint
   out:
   - *uint2
-- go: MulHighMasked
-  asm: "VPMULHW"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int2
-- go: MulHighMasked
-  asm: "VPMULHUW"
-  in:
-  - class: mask
-  - *uint
-  - *uint
-  out:
-  - *uint2
 
 # MulLow
 # Signed int only.
-# Non-masked
 - go: MulLow
   asm: "VPMULL[WDQ]"
   in:
@@ -106,11 +64,3 @@
   - *int
   out:
   - *int2
-- go: MulLowMasked
-  asm: "VPMULL[WDQ]"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int2
\ No newline at end of file
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
index 71e78251..f9a92652 100644
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -5,13 +5,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-- go: ShiftAllLeftMasked
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
   signed: false
   nameAndSizeCheck: true
@@ -19,14 +12,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-- go: ShiftAllRightMasked
-  signed: false
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
   signed: true
   nameAndSizeCheck: true
@@ -34,27 +19,12 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-- go: ShiftAllRightMasked
-  signed: true
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: ShiftLeft
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-- go: ShiftLeftMasked
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   signed: false
   nameAndSizeCheck: true
@@ -62,14 +32,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-- go: ShiftRightMasked
-  signed: false
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRight
   signed: true
   nameAndSizeCheck: true
@@ -77,69 +39,53 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-- go: ShiftRightMasked
-  signed: true
-  nameAndSizeCheck: true
-  masked: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-- go: RotateAllLeftMasked
+- go: RotateAllLeft
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element to the left by the number of bits specified by the immediate.
-- go: RotateLeftMasked
+- go: RotateLeft
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-- go: RotateAllRightMasked
+- go: RotateAllRight
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element to the right by the number of bits specified by the immediate.
-- go: RotateRightMasked
+- go: RotateRight
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-- go: ShiftAllLeftAndFillUpperFromMasked
+- go: ShiftAllLeftAndFillUpperFrom
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-- go: ShiftAllRightAndFillUpperFromMasked
+- go: ShiftAllRightAndFillUpperFrom
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-- go: ShiftLeftAndFillUpperFromMasked
+- go: ShiftLeftAndFillUpperFrom
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-- go: ShiftRightAndFillUpperFromMasked
+- go: ShiftRightAndFillUpperFrom
   nameAndSizeCheck: true
-  masked: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml
index 637de935..ff4c3156 100644
--- a/internal/simdgen/ops/ShiftRotate/go.yaml
+++ b/internal/simdgen/ops/ShiftRotate/go.yaml
@@ -11,14 +11,6 @@
     treatLikeAScalarOfSize: 64
   out:
   - *any
-- go: ShiftAllLeftMasked
-  asm: "VPSLL[WDQ]"
-  in:
-  - class: mask
-  - *any
-  - *vecAsScalar64
-  out:
-  - *any
 - go: ShiftAllRight
   signed: false
   asm: "VPSRL[WDQ]"
@@ -29,15 +21,6 @@
   - *vecAsScalar64
   out:
   - *uint
-- go: ShiftAllRightMasked
-  signed: false
-  asm: "VPSRL[WDQ]"
-  in:
-  - class: mask
-  - *uint
-  - *vecAsScalar64
-  out:
-  - *uint
 - go: ShiftAllRight
   signed: true
   asm: "VPSRA[WDQ]"
@@ -48,15 +31,6 @@
   - *vecAsScalar64
   out:
   - *int
-- go: ShiftAllRightMasked
-  signed: true
-  asm: "VPSRA[WDQ]"
-  in:
-  - class: mask
-  - *int
-  - *vecAsScalar64
-  out:
-  - *int
 
 # Shift* (variable)
 - go: ShiftLeft
@@ -66,14 +40,6 @@
   - *any
   out:
   - *any
-- go: ShiftLeftMasked
-  asm: "VPSLLV[WD]"
-  in:
-  - class: mask
-  - *any
-  - *any
-  out:
-  - *any
 # XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite
 # it to 64.
 - go: ShiftLeft
@@ -85,14 +51,6 @@
   - *anyOverwriteElemBits
   out:
   - *anyOverwriteElemBits
-- go: ShiftLeftMasked
-  asm: "VPSLLVQ"
-  in:
-  - class: mask
-  - *anyOverwriteElemBits
-  - *anyOverwriteElemBits
-  out:
-  - *anyOverwriteElemBits
 - go: ShiftRight
   signed: false
   asm: "VPSRLV[WD]"
@@ -101,15 +59,6 @@
   - *uint
   out:
   - *uint
-- go: ShiftRightMasked
-  signed: false
-  asm: "VPSRLV[WD]"
-  in:
-  - class: mask
-  - *uint
-  - *uint
-  out:
-  - *uint
 # XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
 - go: ShiftRight
   signed: false
@@ -122,15 +71,6 @@
   - *uintOverwriteElemBits
   out:
   - *uintOverwriteElemBits
-- go: ShiftRightMasked
-  signed: false
-  asm: "VPSRLVQ"
-  in:
-  - class: mask
-  - *uintOverwriteElemBits
-  - *uintOverwriteElemBits
-  out:
-  - *uintOverwriteElemBits
 - go: ShiftRight
   signed: true
   asm: "VPSRAV[WDQ]"
@@ -139,21 +79,11 @@
   - *int
   out:
   - *int
-- go: ShiftRightMasked
-  signed: true
-  asm: "VPSRAV[WDQ]"
-  in:
-  - class: mask
-  - *int
-  - *int
-  out:
-  - *int
 
 # Rotate
-- go: RotateAllLeftMasked
+- go: RotateAllLeft
   asm: "VPROL[DQ]"
   in:
-  - class: mask
   - *any
   - &pureImm
     class: immediate
@@ -161,65 +91,58 @@
     name: shift
   out:
   - *any
-- go: RotateAllRightMasked
+- go: RotateAllRight
   asm: "VPROR[DQ]"
   in:
-  - class: mask
   - *any
   - *pureImm
   out:
   - *any
-- go: RotateLeftMasked
+- go: RotateLeft
   asm: "VPROLV[DQ]"
   in:
-  - class: mask
   - *any
   - *any
   out:
   - *any
-- go: RotateRightMasked
+- go: RotateRight
   asm: "VPRORV[DQ]"
   in:
-  - class: mask
   - *any
   - *any
   out:
   - *any
 
 # Bizzare shifts.
-- go: ShiftAllLeftAndFillUpperFromMasked
+- go: ShiftAllLeftAndFillUpperFrom
   asm: "VPSHLD[WDQ]"
   in:
-  - class: mask
   - *any
   - *any
   - *pureImm
   out:
   - *any
-- go: ShiftAllRightAndFillUpperFromMasked
+- go: ShiftAllRightAndFillUpperFrom
   asm: "VPSHRD[WDQ]"
   in:
-  - class: mask
   - *any
   - *any
   - *pureImm
   out:
   - *any
-- go: ShiftLeftAndFillUpperFromMasked
+- go: ShiftLeftAndFillUpperFrom
   asm: "VPSHLDV[WDQ]"
   in:
   - *any
-  - class: mask
   - *any
   - *any
   out:
   - *any
-- go: ShiftRightAndFillUpperFromMasked
+- go: ShiftRightAndFillUpperFrom
   asm: "VPSHRDV[WDQ]"
   in:
   - *any
-  - class: mask
   - *any
   - *any
   out:
-  - *any
\ No newline at end of file
+  - *any
diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml
index 17f5be55..f7a01cb3 100644
--- a/internal/simdgen/types.yaml
+++ b/internal/simdgen/types.yaml
@@ -37,6 +37,7 @@ in: !repeat
   - {class: vreg, go: Uint64x8,   base: "uint",  elemBits: 64, bits: 512, lanes:  8}
   - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512, lanes: 16}
   - {class: vreg, go: Float64x8,  base: "float", elemBits: 64, bits: 512, lanes:  8}
+
   - {class: mask, go: Mask8x16,   base: "int",   elemBits:  8, bits: 128, lanes: 16}
   - {class: mask, go: Mask16x8,   base: "int",   elemBits: 16, bits: 128, lanes:  8}
   - {class: mask, go: Mask32x4,   base: "int",   elemBits: 32, bits: 128, lanes:  4}
@@ -83,5 +84,7 @@ in: !repeat
   - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 128, bits: 256, lanes: 4}
 
   - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now.
+inVariant: !repeat
+- *types
 out: !repeat
 - *types
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index dddf5395..6a3feb36 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -22,6 +22,18 @@ const (
 	GREG_CLASS    = 2 // classify as a general register
 )
 
+// instVariant is a bitmap indicating a variant of an instruction that has
+// optional parameters.
+type instVariant uint8
+
+const (
+	instVariantNone instVariant = 0
+
+	// instVariantMasked indicates that this is the masked variant of an
+	// optionally-masked instruction.
+	instVariantMasked instVariant = 1 << iota
+)
+
 var operandRemarks int
 
 // TODO: Doc. Returns Values with Def domains.
@@ -58,12 +70,16 @@ func loadXED(xedPath string) []*unify.Value {
 			return
 		}
 
-		uval := instToUVal(inst, ops)
-		defs = append(defs, uval)
+		applyQuirks(inst, ops)
+
+		defsPos := len(defs)
+		defs = append(defs, instToUVal(inst, ops)...)
 
 		if *flagDebugXED {
-			y, _ := yaml.Marshal(uval)
-			fmt.Printf("==>\n%s\n", y)
+			for i := defsPos; i < len(defs); i++ {
+				y, _ := yaml.Marshal(defs[i])
+				fmt.Printf("==>\n%s\n", y)
+			}
 		}
 	})
 	if err != nil {
@@ -72,6 +88,35 @@ func loadXED(xedPath string) []*unify.Value {
 	return defs
 }
 
+var (
+	maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]`)
+	maskOptionalRe = regexp.MustCompile(`VPCMP(EQ|GT|U)?[BWDQ]|VCMPP[SD]`)
+)
+
+func applyQuirks(inst *xeddata.Inst, ops []operand) {
+	opc := inst.Opcode()
+	switch {
+	case maskRequiredRe.MatchString(opc):
+		// The mask on these instructions is marked optional, but the
+		// instruction is pointless without the mask.
+		for i, op := range ops {
+			if op, ok := op.(operandMask); ok {
+				op.optional = false
+				ops[i] = op
+			}
+		}
+
+	case maskOptionalRe.MatchString(opc):
+		// Conversely, these masks should be marked optional and aren't.
+		for i, op := range ops {
+			if op, ok := op.(operandMask); ok && op.action.r {
+				op.optional = true
+				ops[i] = op
+			}
+		}
+	}
+}
+
 type operandCommon struct {
 	action operandAction
 }
@@ -121,6 +166,9 @@ type operandMask struct {
 	// Bits in the mask is w/bits.
 
 	allMasks bool // If set, size cannot be inferred because all operands are masks.
+
+	// Mask can be omitted, in which case it defaults to K0/"no mask"
+	optional bool
 }
 
 type operandImm struct {
@@ -233,8 +281,12 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
 	} else if strings.HasPrefix(lhs, "REG") {
 		if op.Width == "mskw" {
 			// The mask operand doesn't specify a width. We have to infer it.
+			//
+			// XED uses the marker ZEROSTR to indicate that a mask operand is
+			// optional and, if omitted, implies K0, aka "no mask".
 			return operandMask{
 				operandCommon: common,
+				optional:      op.Attributes["TXT=ZEROSTR"],
 			}, nil
 		} else {
 			class, regBits := decodeReg(op)
@@ -397,38 +449,63 @@ func inferMaskSizes(ops []operand) error {
 	return nil
 }
 
-func operandsToUVals(ops []operand) (in, out unify.Tuple) {
-	var inVals, outVals []*unify.Value
-	for asmPos, op := range ops {
+// addOperandstoDef adds "in", "inVariant", and "out" to an instruction Def.
+//
+// Optional mask input operands are added to the inVariant field if
+// variant&instVariantMasked, and omitted otherwise.
+func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVariant) {
+	var inVals, inVar, outVals []*unify.Value
+	asmPos := 0
+	for _, op := range ops {
 		var db unify.DefBuilder
 		op.addToDef(&db)
-
 		db.Add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))
 
 		action := op.common().action
+		asmCount := 1 // # of assembly operands; 0 or 1
 		if action.r {
 			inVal := unify.NewValue(db.Build())
-			inVals = append(inVals, inVal)
+			// If this is an optional mask, put it in the input variant tuple.
+			if mask, ok := op.(operandMask); ok && mask.optional {
+				if variant&instVariantMasked != 0 {
+					inVar = append(inVar, inVal)
+				} else {
+					// This operand doesn't appear in the assembly at all.
+					asmCount = 0
+				}
+			} else {
+				// Just a regular input operand.
+				inVals = append(inVals, inVal)
+			}
 		}
 		if action.w {
 			outVal := unify.NewValue(db.Build())
 			outVals = append(outVals, outVal)
 		}
+
+		asmPos += asmCount
 	}
 
-	return unify.NewTuple(inVals...), unify.NewTuple(outVals...)
+	instDB.Add("in", unify.NewValue(unify.NewTuple(inVals...)))
+	instDB.Add("inVariant", unify.NewValue(unify.NewTuple(inVar...)))
+	instDB.Add("out", unify.NewValue(unify.NewTuple(outVals...)))
 }
 
-func instToUVal(inst *xeddata.Inst, ops []operand) *unify.Value {
-	// Map operands to unifier values.
-	ins, outs := operandsToUVals(ops)
+func instToUVal(inst *xeddata.Inst, ops []operand) []*unify.Value {
+	var vals []*unify.Value
+	vals = append(vals, instToUVal1(inst, ops, instVariantNone))
+	if hasOptionalMask(ops) {
+		vals = append(vals, instToUVal1(inst, ops, instVariantMasked))
+	}
+	return vals
+}
 
+func instToUVal1(inst *xeddata.Inst, ops []operand, variant instVariant) *unify.Value {
 	// TODO: "feature"
 	var db unify.DefBuilder
 	db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64")))
 	db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode())))
-	db.Add("in", unify.NewValue(ins))
-	db.Add("out", unify.NewValue(outs))
+	addOperandsToDef(ops, &db, variant)
 	db.Add("extension", unify.NewValue(unify.NewStringExact(inst.Extension)))
 	db.Add("isaset", unify.NewValue(unify.NewStringExact(inst.ISASet)))
 
@@ -454,6 +531,16 @@ func instToUVal(inst *xeddata.Inst, ops []operand) *unify.Value {
 	return unify.NewValuePos(db.Build(), pos)
 }
 
+// hasOptionalMask returns whether there is an optional mask operand in ops.
+func hasOptionalMask(ops []operand) bool {
+	for _, op := range ops {
+		if op, ok := op.(operandMask); ok && op.optional {
+			return true
+		}
+	}
+	return false
+}
+
 func singular[T comparable](xs []T) (T, bool) {
 	if len(xs) == 0 {
 		return *new(T), false

From f4fa54fbebd1d3dec45e7c293b1c63c751fec9f5 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Wed, 23 Jul 2025 08:40:44 +0000
Subject: [PATCH 158/200] internal/simdgen: support load from bits for mask

This CL adds the code generation to store K masks to bits.

This will enable more flexible and performant SIMD programming.

This CL generates CL 689795.

Change-Id: I3fe99fb3dc5073f267c9a3831fde04bb14834d90
Reviewed-on: https://go-review.googlesource.com/c/arch/+/689775
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdIntrinsics.go |  1 +
 internal/simdgen/gen_simdTypes.go      | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 355c8d14..7140eda2 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -74,6 +74,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
 	addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
 	addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
+	addF(simdPackage, "{{.Name}}.StoreToBits", simdStoreMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
 {{end}}
 
 {{define "footer"}}}
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 50553b4e..35b4a720 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -95,12 +95,20 @@ func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
 
 {{- else}}
 
-// {{.Name}}FromBits constructs a {{.Name}} from an a bitmap, where 1 means set for the indexed element, 0 means unset.
+// Load{{.Name}}FromBits constructs a {{.Name}} from a bitmap, where 1 means set for the indexed element, 0 means unset.
 // Only the lower {{.Lanes}} bits of y are used.
 //
+// CPU Features: AVX512
 //go:noescape
 func Load{{.Name}}FromBits(y *uint64) {{.Name}}
 
+// StoreToBits stores a {{.Name}} as a bitmap, where 1 means set for the indexed element, 0 means unset.
+// Only the lower {{.Lanes}} bits of y are used.
+//
+// CPU Features: AVX512
+//go:noescape
+func (x {{.Name}}) StoreToBits(y *uint64)
+
 {{end}}
 {{end}}
 `

From 2b75d2ffd5e9104cbaab1dc2f5092d710406e626 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 16 Jul 2025 14:19:04 -0400
Subject: [PATCH 159/200] internal/simdgen: remove automatically generated
 tests

This pairs with Go dev.simd CL 686057 that
adds test infrastucture in that repo.

Change-Id: I1968933e0ce0a32598c303b310e0efe1e49e12ee
Reviewed-on: https://go-review.googlesource.com/c/arch/+/689275
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdTypes.go | 228 ------------------------------
 internal/simdgen/godefs.go        |   1 -
 2 files changed, 229 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 35b4a720..98d2b5a3 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -292,234 +292,6 @@ func (x {{.Name}}) Or(y {{.Name}}) {{.Name}}
 {{end}}
 `
 
-const simdTestsWrapperTmpl = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
-
-//go:build goexperiment.simd
-
-package simd_test
-
-import (
-	"simd"
-	"testing"
-)
-{{end}}
-{{define "op"}}
-func test{{.OpShape}}(t *testing.T, {{.BaseArgDefList}}, want []{{.ResBaseType}}, which string) {
-	t.Helper()
-	var gotv simd.{{.ResVecType}}
-	got := make([]{{.ResBaseType}}, len(want)){{range $i, $a := .ArgVecTypes}}
-	vec{{$i}} := simd.Load{{$a}}Slice(v{{$i}}){{end}}
-	switch which {
-{{range .Ops}}case "{{.}}":
-		gotv = vec0.{{.}}({{$.VecArgList}}){{$.OptionalMaskToInt}}
-{{end}}
-	default:
-		t.Errorf("Unknown method: {{.Arg0VecType}}.%s", which)
-	}
-	gotv.StoreSlice(got)
-	for i := range len(want) {
-        if got[i] != want[i] {
-            t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
-        }
-    }
-}
-{{end}}
-{{define "untestedOpHeader"}}
-/* The operations below cannot be tested via wrappers, please test them directly */
-{{end}}
-{{define "untestedOp"}}
-// {{.}}{{end}}
-`
-
-// writeSIMDTestsWrapper generates the test wrappers and writes it to simd_amd64_testwrappers.go
-// within the specified directory.
-func writeSIMDTestsWrapper(ops []Operation) *bytes.Buffer {
-	t := templateOf(simdTestsWrapperTmpl, "simdTestWrappers")
-	buffer := new(bytes.Buffer)
-
-	if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil {
-		panic(fmt.Errorf("failed to execute fileHeader template: %w", err))
-	}
-
-	// The comment shows an example of Uint8x64.Add
-	type opData struct {
-		OpShape           string   // "Uint8x64Uint8x64Uint8x64"
-		BaseArgDefList    string   // "v0 uint8[], v1 uint8[]"
-		VecArgList        string   // "vec1"
-		ResBaseType       string   // "uint8"
-		ResVecType        string   // "Uint8x64"
-		Arg0VecType       string   // "Uint8x64"
-		ArgVecTypes       []string // ["Uint8x64", "Uint8x64"]
-		OptionalMaskToInt string   // ".AsInt8x64()" or ""
-		Ops               []string // ["Add", "Sub"]
-	}
-
-	opsByShape := make(map[string]opData)
-	opsSkipped := map[string]struct{}{}
-outerLoop:
-	for _, o := range ops {
-		_, _, _, immType, gOp := o.shape()
-
-		if immType == VarImm || immType == ConstVarImm {
-			// Operations with variable immediates should be called directly
-			// instead of through wrappers.
-			opsSkipped[o.Go] = struct{}{}
-			continue
-		}
-		if vasIdx, err := checkVecAsScalar(o); err != nil {
-			panic(err)
-		} else if vasIdx != -1 {
-			// TODO: these could be tested via wrappers, implement this.
-			opsSkipped[o.Go] = struct{}{}
-			continue
-		}
-		if o.OperandOrder != nil {
-			// We need to check if the customize order change the function signature.
-			// It is only safe to proceed generating the test wrappers if the function
-			// signature stays the same.
-			// Filtering out unqualified cases as a hack now, this test wrapper
-			// infrastrcuture should be changing soon so it should be fine.
-			switch *o.OperandOrder {
-			case "21":
-				// No op because it's only set in AndNot, and opr[2] and opr[1] has the same shape
-			default:
-				opsSkipped[o.Go] = struct{}{}
-				continue outerLoop
-			}
-		}
-
-		var shape string
-		var baseArgDefList []string
-		var vecArgList []string
-		var argVecTypes []string
-		var vec string
-		var vecOp Operand
-		allSameVec := true
-		masked := strings.HasSuffix(gOp.Go, "Masked")
-		skippedMaskCnt := 0
-		vecCnt := 0
-		for i, in := range gOp.In {
-			baseArgDefList = append(baseArgDefList, fmt.Sprintf("v%d []%s%d", i, *in.Base, *in.ElemBits))
-			if i != 0 {
-				maskConversion := ""
-				if in.Class == "mask" {
-					maskConversion = fmt.Sprintf(".As%s()", *in.Go)
-				}
-				vecArgList = append(vecArgList, fmt.Sprintf("vec%d%s", i, maskConversion))
-			}
-			// gOp will only have either mask or vreg operand, so the following check
-			// is sufficient to detect whether it's a pure vreg or masked pure vreg operation
-			// with all the same vectors.
-			if in.Class == "mask" {
-				if masked && skippedMaskCnt == 0 {
-					skippedMaskCnt++
-				} else {
-					allSameVec = false
-				}
-			} else {
-				if len(vec) > 0 {
-					if vec != *in.Go {
-						allSameVec = false
-					}
-				}
-				vecCnt++
-				vec = *in.Go
-				vecOp = in
-			}
-			shape += *in.Go
-			argVecTypes = append(argVecTypes, strings.ReplaceAll(*in.Go, "Mask", "Int"))
-		}
-		isCompare := false
-		isWiden := false
-		outOp := gOp.Out[0]
-		if *outOp.Go != vec {
-			if allSameVec && outOp.Class == "mask" && *outOp.Bits == *vecOp.Bits && *outOp.Lanes == *vecOp.Lanes {
-				isCompare = true
-			}
-			if allSameVec && outOp.Class == "vreg" && *outOp.Bits == *vecOp.Bits && *outOp.Base == *vecOp.Base && *outOp.Lanes == *vecOp.Lanes/2 {
-				isWiden = true
-			}
-			if !isCompare && !isWiden {
-				allSameVec = false
-			}
-		}
-		shape += *gOp.Out[0].Go
-		if allSameVec {
-			numToName := map[int]string{1: "Unary", 2: "Binary", 3: "Ternary"}
-			if _, ok := numToName[vecCnt]; !ok {
-				panic(fmt.Errorf("unknown shape: %s", shape))
-			}
-			shape = vec + numToName[vecCnt]
-			if masked {
-				shape += "Masked"
-			}
-			if isCompare {
-				if vecCnt == 2 {
-					// Remove "Binary"
-					shape = strings.ReplaceAll(shape, "Binary", "")
-				}
-				shape += "Compare"
-			}
-			if isWiden {
-				shape += "Widen"
-			}
-		}
-		optionalMaskToInt := ""
-		if gOp.Out[0].Class == "mask" {
-			optionalMaskToInt = fmt.Sprintf(".As%s()", strings.ReplaceAll(*gOp.Out[0].Go, "Mask", "Int"))
-		}
-		if _, ok := opsByShape[shape]; !ok {
-			opsByShape[shape] = opData{
-				OpShape:           shape,
-				BaseArgDefList:    strings.Join(baseArgDefList, ", "),
-				VecArgList:        strings.Join(vecArgList, ", "),
-				ResBaseType:       fmt.Sprintf("%s%d", *gOp.Out[0].Base, *gOp.Out[0].ElemBits),
-				ResVecType:        strings.ReplaceAll(*gOp.Out[0].Go, "Mask", "Int"),
-				Arg0VecType:       *gOp.In[0].Go,
-				ArgVecTypes:       argVecTypes,
-				OptionalMaskToInt: optionalMaskToInt,
-			}
-		}
-		data := opsByShape[shape]
-		data.Ops = append(data.Ops, gOp.Go)
-		opsByShape[shape] = data
-	}
-
-	compareOpData := func(x, y opData) int {
-		return compareNatural(x.OpShape, y.OpShape)
-	}
-	data := make([]opData, 0)
-	for _, d := range opsByShape {
-		slices.SortFunc(d.Ops, compareNatural)
-		data = append(data, d)
-	}
-	slices.SortFunc(data, compareOpData)
-
-	for _, d := range data {
-		if err := t.ExecuteTemplate(buffer, "op", d); err != nil {
-			panic(fmt.Errorf("failed to execute op template for op shape %s: %w", d.OpShape, err))
-		}
-	}
-
-	if len(opsSkipped) != 0 {
-		if err := t.ExecuteTemplate(buffer, "untestedOpHeader", nil); err != nil {
-			panic(fmt.Errorf("failed to execute untestedOpHeader"))
-		}
-		opsK := []string{}
-		for k := range opsSkipped {
-			opsK = append(opsK, k)
-		}
-		slices.SortFunc(opsK, strings.Compare)
-		for _, k := range opsK {
-			if err := t.ExecuteTemplate(buffer, "untestedOp", k); err != nil {
-				panic(fmt.Errorf("failed to execute untestedOp"))
-			}
-		}
-	}
-
-	return buffer
-}
-
 // parseSIMDTypes groups go simd types by their vector sizes, and
 // returns a map whose key is the vector size, value is the simd type.
 func parseSIMDTypes(ops []Operation) simdTypeMap {
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 741214bb..1bdfec1b 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -348,7 +348,6 @@ func writeGoDefs(path string, cl unify.Closure) error {
 
 	formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go")
 	formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/ops_amd64.go")
-	formatWriteAndClose(writeSIMDTestsWrapper(deduped), path, "src/"+simdPackage+"/simd_wrapped_test.go")
 	formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go")
 	formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go")
 	formatWriteAndClose(writeSIMDMachineOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go")

From 1167cd0b22b37def53455af89123822867ea2bd0 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 21 Jul 2025 13:32:05 -0400
Subject: [PATCH 160/200] internal/simdgen: add generated declarations for AVX2
 masked load/store

generates Go dev.simd CL 689335 (which also includes one basic test)

Change-Id: Icd948396a3ca265b307747437efbc0e6f4548c76
Reviewed-on: https://go-review.googlesource.com/c/arch/+/689276
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdIntrinsics.go | 12 +++++
 internal/simdgen/gen_simdTypes.go      | 64 +++++++++++++++++++++-----
 2 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 7140eda2..d114b4cd 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -69,6 +69,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "{{.Name}}.Store", simdStore(), sys.AMD64)
 {{end}}
 
+{{define "maskedLoadStore"}}	addF(simdPackage, "LoadMasked{{.Name}}", simdMaskedLoad(ssa.OpLoadMasked{{.ElemBits}}), sys.AMD64)
+	addF(simdPackage, "{{.Name}}.StoreMasked", simdMaskedStore(ssa.OpStoreMasked{{.ElemBits}}), sys.AMD64)
+{{end}}
+
 {{define "mask"}}	addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
 	addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
 	addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
@@ -118,6 +122,14 @@ func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 		}
 	}
 
+	for _, typ := range typesFromTypeMap(typeMap) {
+		if typ.MaskedLoadStoreFilter() {
+			if err := t.ExecuteTemplate(buffer, "maskedLoadStore", typ); err != nil {
+				panic(fmt.Errorf("failed to execute maskedLoadStore template: %w", err))
+			}
+		}
+	}
+
 	for _, mask := range masksFromTypeMap(typeMap) {
 		if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil {
 			panic(fmt.Errorf("failed to execute mask template: %w", err))
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 98d2b5a3..6739b9fc 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -20,8 +20,18 @@ type simdType struct {
 	Type                    string // Either "mask" or "vreg"
 	VectorCounterpart       string // For mask use only: just replacing the "Mask" in [simdType.Name] with "Int"
 	ReshapedVectorWithAndOr string // For mask use only: vector AND and OR are only available in some shape with element width 32.
-	Size                    int    // The size of the type
-	ElemBits                int    // Size / Lanes
+	Size                    int    // The size of the vector type
+}
+
+func (x simdType) ElemBits() int {
+	return x.Size / x.Lanes
+}
+
+// MaskedLoadStoreFilter encodes which simd type type currently
+// get masked loads/stores generated, it is used in two places,
+// this forces coordination.
+func (x simdType) MaskedLoadStoreFilter() bool {
+	return x.Size < 512 && x.ElemBits() >= 32 && x.Type != "mask"
 }
 
 func compareSimdTypes(x, y simdType) int {
@@ -36,7 +46,7 @@ func compareSimdTypes(x, y simdType) int {
 		return c
 	}
 	// base type size, 8 < 16 < 32 < 64
-	if c := x.Size/x.Lanes - y.Size/y.Lanes; c != 0 {
+	if c := x.ElemBits() - y.ElemBits(); c != 0 {
 		return c
 	}
 	// vector size last
@@ -78,8 +88,10 @@ type {{.Name}} struct {
 {{.Fields}}
 }
 
-{{- if ne .Type "mask"}}
+{{end}}
+`
 
+const simdLoadStoreTemplate = `
 // Len returns the number of elements in a {{.Name}}
 func (x {{.Name}}) Len() int { return {{.Lanes}} }
 
@@ -92,9 +104,9 @@ func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}}
 //
 //go:noescape
 func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
+`
 
-{{- else}}
-
+const simdMaskFromBitsTemplate = `
 // Load{{.Name}}FromBits constructs a {{.Name}} from a bitmap, where 1 means set for the indexed element, 0 means unset.
 // Only the lower {{.Lanes}} bits of y are used.
 //
@@ -108,9 +120,20 @@ func Load{{.Name}}FromBits(y *uint64) {{.Name}}
 // CPU Features: AVX512
 //go:noescape
 func (x {{.Name}}) StoreToBits(y *uint64)
+`
 
-{{end}}
-{{end}}
+const simdMaskedLoadStoreTemplate = `
+// LoadMasked{{.Name}} loads a {{.Name}} from an array,
+// at those elements enabled by mask
+//
+//go:noescape
+func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}}
+
+// StoreMasked stores a {{.Name}} to an array,
+// at those elements enabled by mask
+//
+//go:noescape
+func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}})
 `
 
 const simdStubsTmpl = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
@@ -317,14 +340,14 @@ func parseSIMDTypes(ops []Operation) simdTypeMap {
 		if arg.Class == "mask" {
 			vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int")
 			reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32)
-			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits, *arg.Bits / lanes})
+			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits})
 			// In case the vector counterpart of a mask is not present, put its vector counterpart typedef into the map as well.
 			if _, ok := seen[vectorCounterpart]; !ok {
 				seen[vectorCounterpart] = struct{}{}
-				ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits, *arg.Bits / lanes})
+				ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits})
 			}
 		} else {
-			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits, *arg.Bits / lanes})
+			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits})
 		}
 	}
 	for _, op := range ops {
@@ -383,6 +406,10 @@ func typesFromTypeMap(typeMap simdTypeMap) []simdType {
 // writeSIMDTypes generates the simd vector types into a bytes.Buffer
 func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 	t := templateOf(simdTypesTemplates, "types_amd64")
+	loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64")
+	maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64")
+	maskFromBits := templateOf(simdMaskFromBitsTemplate, "maskFromBits_amd64")
+
 	buffer := new(bytes.Buffer)
 
 	if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil {
@@ -411,6 +438,21 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 			if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil {
 				panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err))
 			}
+			if typeDef.Type != "mask" {
+				if err := loadStore.ExecuteTemplate(buffer, "loadstore_amd64", typeDef); err != nil {
+					panic(fmt.Errorf("failed to execute loadstore template for type %s: %w", typeDef.Name, err))
+				}
+				// restrict to AVX2 masked loads/stores first.
+				if typeDef.MaskedLoadStoreFilter() {
+					if err := maskedLoadStore.ExecuteTemplate(buffer, "maskedloadstore_amd64", typeDef); err != nil {
+						panic(fmt.Errorf("failed to execute maskedloadstore template for type %s: %w", typeDef.Name, err))
+					}
+				}
+			} else {
+				if err := maskFromBits.ExecuteTemplate(buffer, "maskFromBits_amd64", typeDef); err != nil {
+					panic(fmt.Errorf("failed to execute maskFromBits template for type %s: %w", typeDef.Name, err))
+				}
+			}
 		}
 	}
 

From 357d0b5ab3e25a75a99794118ec4084ba90a6485 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 22 Jul 2025 14:48:33 -0400
Subject: [PATCH 161/200] internal/simdgen: modify sorting for generic/ssa ops,
 rules

there was some incomplete ordering that sometimes
caused gratuitous changes.

Change-Id: I919136c0ab954a3c3151e2745b7626ba83352c52
Reviewed-on: https://go-review.googlesource.com/c/arch/+/689655
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdGenericOps.go | 7 +++----
 internal/simdgen/gen_simdMachineOps.go | 9 ++++-----
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index 4eb47b44..c345793a 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -32,7 +32,6 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 	buffer := new(bytes.Buffer)
 
 	type genericOpsData struct {
-		sortKey string
 		OpName  string
 		OpInLen int
 		Comm    bool
@@ -44,7 +43,7 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 	var opsData opData
 	for _, op := range ops {
 		_, _, _, immType, gOp := op.shape()
-		gOpData := genericOpsData{*gOp.In[0].Go + gOp.Go, genericName(gOp), len(gOp.In), op.Commutative}
+		gOpData := genericOpsData{genericName(gOp), len(gOp.In), op.Commutative}
 		if immType == VarImm || immType == ConstVarImm {
 			opsData.OpsImm = append(opsData.OpsImm, gOpData)
 		} else {
@@ -52,10 +51,10 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 		}
 	}
 	sort.Slice(opsData.Ops, func(i, j int) bool {
-		return opsData.Ops[i].sortKey < opsData.Ops[j].sortKey
+		return compareNatural(opsData.Ops[i].OpName, opsData.Ops[j].OpName) < 0
 	})
 	sort.Slice(opsData.OpsImm, func(i, j int) bool {
-		return opsData.OpsImm[i].sortKey < opsData.OpsImm[j].sortKey
+		return compareNatural(opsData.OpsImm[i].OpName, opsData.OpsImm[j].OpName) < 0
 	})
 
 	err := t.Execute(buffer, opsData)
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 39bf2ec1..22893a22 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -33,7 +33,6 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 	buffer := new(bytes.Buffer)
 
 	type opData struct {
-		sortKey      string
 		OpName       string
 		Asm          string
 		OpInLen      int
@@ -108,16 +107,16 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 			resultInArg0 = true
 		}
 		if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
-			opsDataImm = append(opsDataImm, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
+			opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
 		} else {
-			opsData = append(opsData, opData{*gOp.In[0].Go + gOp.Go, asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
+			opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
 		}
 	}
 	sort.Slice(opsData, func(i, j int) bool {
-		return opsData[i].sortKey < opsData[j].sortKey
+		return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
 	})
 	sort.Slice(opsDataImm, func(i, j int) bool {
-		return opsDataImm[i].sortKey < opsDataImm[j].sortKey
+		return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
 	})
 	err := t.Execute(buffer, machineOpsData{opsData, opsDataImm})
 	if err != nil {

From d3ce7fc27510fe87f7631081224000bbd26f342b Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 22 Jul 2025 14:47:55 -0400
Subject: [PATCH 162/200] internal/simdgen: add some conversion ops

Generates dev.simd CL 689716

Change-Id: I6444cdaf94a560d50828fc6291e790f651f42f8e
Reviewed-on: https://go-review.googlesource.com/c/arch/+/689735
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              | 11 ++++++++++
 internal/simdgen/go.yaml                      | 20 ++++++++++++++++++
 internal/simdgen/ops/Converts/categories.yaml | 12 +++++++++++
 internal/simdgen/ops/Converts/go.yaml         | 21 +++++++++++++++++++
 4 files changed, 64 insertions(+)
 create mode 100644 internal/simdgen/ops/Converts/categories.yaml
 create mode 100644 internal/simdgen/ops/Converts/go.yaml

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index c13fd431..0f883bfa 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -115,6 +115,17 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME compares for greater than.
+- go: ConvertToInt32
+  commutative: false
+  extension: "AVX.*"
+  documentation: !string |-
+    // ConvertToInt32 converts element values to int32.
+
+- go: ConvertToUint32
+  commutative: false
+  extension: "AVX.*"
+  documentation: !string |-
+    // ConvertToUint32Masked converts element values to uint32.
 - go: Div
   commutative: false
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index c58d692e..df8f341c 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -240,6 +240,26 @@
     const: 0
   out:
   - class: mask
+- go: ConvertToInt32
+  asm: "VCVTTPS2DQ"
+  in:
+  - &fp
+    go: $t
+    base: float
+  out:
+  - &i32
+    go: $u
+    base: int
+    elemBits: 32
+- go: ConvertToUint32
+  asm: "VCVTPS2UDQ"
+  in:
+  - *fp
+  out:
+  - &u32
+    go: $u
+    base: uint
+    elemBits: 32
 - go: Div
   asm: "V?DIVP[SD]"
   in: &2fp
diff --git a/internal/simdgen/ops/Converts/categories.yaml b/internal/simdgen/ops/Converts/categories.yaml
new file mode 100644
index 00000000..16316ed3
--- /dev/null
+++ b/internal/simdgen/ops/Converts/categories.yaml
@@ -0,0 +1,12 @@
+!sum
+- go: ConvertToInt32
+  commutative: false
+  extension: "AVX.*"
+  documentation: !string |-
+    // ConvertToInt32 converts element values to int32.
+
+- go: ConvertToUint32
+  commutative: false
+  extension: "AVX.*"
+  documentation: !string |-
+    // ConvertToUint32Masked converts element values to uint32.
diff --git a/internal/simdgen/ops/Converts/go.yaml b/internal/simdgen/ops/Converts/go.yaml
new file mode 100644
index 00000000..4e251728
--- /dev/null
+++ b/internal/simdgen/ops/Converts/go.yaml
@@ -0,0 +1,21 @@
+!sum
+- go: ConvertToInt32
+  asm: "VCVTTPS2DQ"
+  in:
+  - &fp
+    go: $t
+    base: float
+  out:
+  - &i32
+    go: $u
+    base: int
+    elemBits: 32
+- go: ConvertToUint32
+  asm: "VCVTPS2UDQ"
+  in:
+  - *fp
+  out:
+  - &u32
+    go: $u
+    base: uint
+    elemBits: 32

From 0f343f3f4c0a732316241d19d6335927e0b34b19 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 23 Jul 2025 14:31:49 -0400
Subject: [PATCH 163/200] internal/simdgen: add declarations+intrinsics for
 mask-from-value

This generates Go dev.simd CL 689936.

Change-Id: Ib63abe15f3c6c4ca01583f4cc72636ceb67eb528
Reviewed-on: https://go-review.googlesource.com/c/arch/+/689955
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdIntrinsics.go |  1 +
 internal/simdgen/gen_simdTypes.go      | 30 ++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index d114b4cd..d3b35218 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -79,6 +79,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
 	addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
 	addF(simdPackage, "{{.Name}}.StoreToBits", simdStoreMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
+	addF(simdPackage, "{{.Name}}FromBits", simdCvtMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
 {{end}}
 
 {{define "footer"}}}
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 6739b9fc..d1e4d495 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -27,6 +27,25 @@ func (x simdType) ElemBits() int {
 	return x.Size / x.Lanes
 }
 
+// LanesContainer returns the smallest int/uint bit size that is
+// large enough to hold one bit for each lane.  E.g., Mask32x4
+// is 4 lanes, and a uint8 is the smallest uint that has 4 bits.
+func (x simdType) LanesContainer() int {
+	if x.Lanes > 64 {
+		panic("too many lanes")
+	}
+	if x.Lanes > 32 {
+		return 64
+	}
+	if x.Lanes > 16 {
+		return 32
+	}
+	if x.Lanes > 8 {
+		return 16
+	}
+	return 8
+}
+
 // MaskedLoadStoreFilter encodes which simd type type currently
 // get masked loads/stores generated, it is used in two places,
 // this forces coordination.
@@ -122,6 +141,13 @@ func Load{{.Name}}FromBits(y *uint64) {{.Name}}
 func (x {{.Name}}) StoreToBits(y *uint64)
 `
 
+const simdMaskFromValTemplate = `
+// {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset.
+// Only the lower {{.Lanes}} bits of y are used.
+//
+func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}}
+`
+
 const simdMaskedLoadStoreTemplate = `
 // LoadMasked{{.Name}} loads a {{.Name}} from an array,
 // at those elements enabled by mask
@@ -409,6 +435,7 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 	loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64")
 	maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64")
 	maskFromBits := templateOf(simdMaskFromBitsTemplate, "maskFromBits_amd64")
+	maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64")
 
 	buffer := new(bytes.Buffer)
 
@@ -452,6 +479,9 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 				if err := maskFromBits.ExecuteTemplate(buffer, "maskFromBits_amd64", typeDef); err != nil {
 					panic(fmt.Errorf("failed to execute maskFromBits template for type %s: %w", typeDef.Name, err))
 				}
+				if err := maskFromVal.ExecuteTemplate(buffer, "maskFromVal_amd64", typeDef); err != nil {
+					panic(fmt.Errorf("failed to execute maskFromVal template for type %s: %w", typeDef.Name, err))
+				}
 			}
 		}
 	}

From 968e15c5e5f7bc777d67019c792a8a25ef55eab7 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Thu, 24 Jul 2025 14:55:59 -0400
Subject: [PATCH 164/200] internal/simdgen: enable k-masked load/store on
 AVX512

includes a fix to the comments for the move-value-to-mask functions

Generates Go dev.simd CL 690336

Change-Id: I2c98f0525a0e95d4eaa2ee221774a48607ac083a
Reviewed-on: https://go-review.googlesource.com/c/arch/+/690315
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdTypes.go | 37 ++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index d1e4d495..d3791d78 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -50,7 +50,37 @@ func (x simdType) LanesContainer() int {
 // get masked loads/stores generated, it is used in two places,
 // this forces coordination.
 func (x simdType) MaskedLoadStoreFilter() bool {
-	return x.Size < 512 && x.ElemBits() >= 32 && x.Type != "mask"
+	return x.Size == 512 || x.ElemBits() >= 32 && x.Type != "mask"
+}
+
+func (x simdType) IntelSizeSuffix() string {
+	switch x.ElemBits() {
+	case 8:
+		return "B"
+	case 16:
+		return "W"
+	case 32:
+		return "D"
+	case 64:
+		return "Q"
+	}
+	panic("oops")
+}
+
+func (x simdType) MaskedLoadDoc() string {
+	if x.Size == 512 || x.ElemBits() < 32 {
+		return fmt.Sprintf("// Asm: VMOVDQU%d.Z, CPU Feature: AVX512", x.ElemBits())
+	} else {
+		return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
+	}
+}
+
+func (x simdType) MaskedStoreDoc() string {
+	if x.Size == 512 || x.ElemBits() < 32 {
+		return fmt.Sprintf("// Asm: VMOVDQU%d, CPU Feature: AVX512", x.ElemBits())
+	} else {
+		return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
+	}
 }
 
 func compareSimdTypes(x, y simdType) int {
@@ -145,6 +175,7 @@ const simdMaskFromValTemplate = `
 // {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset.
 // Only the lower {{.Lanes}} bits of y are used.
 //
+// Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512"
 func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}}
 `
 
@@ -152,12 +183,16 @@ const simdMaskedLoadStoreTemplate = `
 // LoadMasked{{.Name}} loads a {{.Name}} from an array,
 // at those elements enabled by mask
 //
+{{.MaskedLoadDoc}}
+//
 //go:noescape
 func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}}
 
 // StoreMasked stores a {{.Name}} to an array,
 // at those elements enabled by mask
 //
+{{.MaskedStoreDoc}}
+//
 //go:noescape
 func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}})
 `

From 0354b497e1da15e8daee321f92b0186061d34c90 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Thu, 31 Jul 2025 23:44:40 +0000
Subject: [PATCH 165/200] internal/simdgen: change Shift*AndFillUpperFrom to
 Shift*Concat

This CL generates CL 692215.

Change-Id: Idccaeeef2f0d3ca6e8113df5c95d72f9e11830b6
Reviewed-on: https://go-review.googlesource.com/c/arch/+/692216
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml                 | 8 ++++----
 internal/simdgen/go.yaml                         | 8 ++++----
 internal/simdgen/ops/ShiftRotate/categories.yaml | 8 ++++----
 internal/simdgen/ops/ShiftRotate/go.yaml         | 8 ++++----
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 0f883bfa..996955bf 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -465,28 +465,28 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-- go: ShiftAllLeftAndFillUpperFrom
+- go: ShiftAllLeftConcat
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-- go: ShiftAllRightAndFillUpperFrom
+- go: ShiftAllRightConcat
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-- go: ShiftLeftAndFillUpperFrom
+- go: ShiftLeftConcat
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-- go: ShiftRightAndFillUpperFrom
+- go: ShiftRightConcat
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index df8f341c..1e836912 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -945,7 +945,7 @@
   - *any
 
 # Bizzare shifts.
-- go: ShiftAllLeftAndFillUpperFrom
+- go: ShiftAllLeftConcat
   asm: "VPSHLD[WDQ]"
   in:
   - *any
@@ -953,7 +953,7 @@
   - *pureImm
   out:
   - *any
-- go: ShiftAllRightAndFillUpperFrom
+- go: ShiftAllRightConcat
   asm: "VPSHRD[WDQ]"
   in:
   - *any
@@ -961,7 +961,7 @@
   - *pureImm
   out:
   - *any
-- go: ShiftLeftAndFillUpperFrom
+- go: ShiftLeftConcat
   asm: "VPSHLDV[WDQ]"
   in:
   - *any
@@ -969,7 +969,7 @@
   - *any
   out:
   - *any
-- go: ShiftRightAndFillUpperFrom
+- go: ShiftRightConcat
   asm: "VPSHRDV[WDQ]"
   in:
   - *any
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
index f9a92652..5528b4d8 100644
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -63,28 +63,28 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-- go: ShiftAllLeftAndFillUpperFrom
+- go: ShiftAllLeftConcat
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-- go: ShiftAllRightAndFillUpperFrom
+- go: ShiftAllRightConcat
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-- go: ShiftLeftAndFillUpperFrom
+- go: ShiftLeftConcat
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-- go: ShiftRightAndFillUpperFrom
+- go: ShiftRightConcat
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml
index ff4c3156..4ade55d7 100644
--- a/internal/simdgen/ops/ShiftRotate/go.yaml
+++ b/internal/simdgen/ops/ShiftRotate/go.yaml
@@ -114,7 +114,7 @@
   - *any
 
 # Bizzare shifts.
-- go: ShiftAllLeftAndFillUpperFrom
+- go: ShiftAllLeftConcat
   asm: "VPSHLD[WDQ]"
   in:
   - *any
@@ -122,7 +122,7 @@
   - *pureImm
   out:
   - *any
-- go: ShiftAllRightAndFillUpperFrom
+- go: ShiftAllRightConcat
   asm: "VPSHRD[WDQ]"
   in:
   - *any
@@ -130,7 +130,7 @@
   - *pureImm
   out:
   - *any
-- go: ShiftLeftAndFillUpperFrom
+- go: ShiftLeftConcat
   asm: "VPSHLDV[WDQ]"
   in:
   - *any
@@ -138,7 +138,7 @@
   - *any
   out:
   - *any
-- go: ShiftRightAndFillUpperFrom
+- go: ShiftRightConcat
   asm: "VPSHRDV[WDQ]"
   in:
   - *any

From 4967ce7c35d0e540671a81128edf4ae59370026b Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Thu, 31 Jul 2025 23:53:12 +0000
Subject: [PATCH 166/200] internal/simdgen: change PairDotProdAccumulate to
 AddDotProd

This CL generates CL 692156.

Change-Id: Ic38a9e0e3febb63465afca065e1b9fb98c0e81ca
Reviewed-on: https://go-review.googlesource.com/c/arch/+/692219
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/categories.yaml           | 8 ++++----
 internal/simdgen/go.yaml                   | 6 ++----
 internal/simdgen/ops/MLOps/categories.yaml | 8 ++++----
 internal/simdgen/ops/MLOps/go.yaml         | 6 ++----
 4 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 996955bf..24fa6165 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -304,16 +304,16 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-- go: PairDotProdAccumulate
+- go: AddDotProd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // NAME performs dot products on pairs of elements of x and y and then adds z.
-- go: SaturatedPairDotProdAccumulate
+    // NAME performs dot products on pairs of elements of y and z and then adds x.
+- go: SaturatedAddDotProd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // NAME performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of y and z and then adds x.
 - go: FusedMultiplyAdd
   commutative: false
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 1e836912..eb435407 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -457,9 +457,8 @@
   - *qdpa_src2
   out:
   - *qdpa_acc
-- go: PairDotProdAccumulate
+- go: AddDotProd
   asm: "VPDPWSSD"
-  operandOrder: "31" # switch operand 3 and 1
   in:
   - &pdpa_acc
     go: $t_acc
@@ -475,9 +474,8 @@
     overwriteElementBits: 16
   out:
   - *pdpa_acc
-- go: SaturatedPairDotProdAccumulate
+- go: SaturatedAddDotProd
   asm: "VPDPWSSDS"
-  operandOrder: "31" # switch operand 3 and 1
   in:
   - *pdpa_acc
   - *pdpa_src1
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index c90942de..b3508d25 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -28,16 +28,16 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-- go: PairDotProdAccumulate
+- go: AddDotProd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // NAME performs dot products on pairs of elements of x and y and then adds z.
-- go: SaturatedPairDotProdAccumulate
+    // NAME performs dot products on pairs of elements of y and z and then adds x.
+- go: SaturatedAddDotProd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // NAME performs dot products on pairs of elements of x and y and then adds z.
+    // NAME performs dot products on pairs of elements of y and z and then adds x.
 - go: FusedMultiplyAdd
   commutative: false
   extension: "AVX.*"
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
index 2fb3e52f..8da2071d 100644
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -60,9 +60,8 @@
   - *qdpa_src2
   out:
   - *qdpa_acc
-- go: PairDotProdAccumulate
+- go: AddDotProd
   asm: "VPDPWSSD"
-  operandOrder: "31" # switch operand 3 and 1
   in:
   - &pdpa_acc
     go: $t_acc
@@ -78,9 +77,8 @@
     overwriteElementBits: 16
   out:
   - *pdpa_acc
-- go: SaturatedPairDotProdAccumulate
+- go: SaturatedAddDotProd
   asm: "VPDPWSSDS"
-  operandOrder: "31" # switch operand 3 and 1
   in:
   - *pdpa_acc
   - *pdpa_src1

From 2f2bc4cacd1ea78a54aefe396e099548d3ff7f5f Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 1 Aug 2025 19:12:32 +0000
Subject: [PATCH 167/200] internal/simdgen: make bitwise logic avaialble to all
 u?int vectors

This CL generates CL 692356.

Change-Id: I4d5da85d4ff7f83df52f4e2e1e082e8ccd6a5883
Reviewed-on: https://go-review.googlesource.com/c/arch/+/692555
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/go.yaml                  | 60 +++++++++++++++++++++++
 internal/simdgen/ops/BitwiseLogic/go.yaml | 60 +++++++++++++++++++++++
 2 files changed, 120 insertions(+)

diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index eb435407..ddab9c38 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -118,6 +118,28 @@
   out:
   - *any
 
+- go: And
+  asm: "VPANDD" # Fill in the gap, And is missing for Uint8x64 and Int8x64
+  inVariant: []
+  in: &twoI8x64
+  - &i8x64
+    go: $t
+    overwriteElementBits: 8
+  - *i8x64
+  out: &oneI8x64
+  - *i8x64
+
+- go: And
+  asm: "VPANDD" # Fill in the gap, And is missing for Uint16x32 and Int16x32
+  inVariant: []
+  in: &twoI16x32
+  - &i16x32
+    go: $t
+    overwriteElementBits: 16
+  - *i16x32
+  out: &oneI16x32
+  - *i16x32
+
 - go: AndNot
   asm: "VPANDN[DQ]?"
   operandOrder: "21" # switch the arg order
@@ -127,6 +149,20 @@
   out:
   - *any
 
+- go: AndNot
+  asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint8x64 and Int8x64
+  operandOrder: "21" # switch the arg order
+  inVariant: []
+  in: *twoI8x64
+  out: *oneI8x64
+
+- go: AndNot
+  asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint16x32 and Int16x32
+  operandOrder: "21" # switch the arg order
+  inVariant: []
+  in: *twoI16x32
+  out: *oneI16x32
+
 - go: Or
   asm: "VPOR[DQ]?"
   in:
@@ -135,6 +171,18 @@
   out:
   - *any
 
+- go: Or
+  asm: "VPORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
+  inVariant: []
+  in: *twoI8x64
+  out: *oneI8x64
+
+- go: Or
+  asm: "VPORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
+  inVariant: []
+  in: *twoI16x32
+  out: *oneI16x32
+
 - go: Xor
   asm: "VPXOR[DQ]?"
   in:
@@ -142,6 +190,18 @@
   - *any
   out:
   - *any
+
+- go: Xor
+  asm: "VPXORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
+  inVariant: []
+  in: *twoI8x64
+  out: *oneI8x64
+
+- go: Xor
+  asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
+  inVariant: []
+  in: *twoI16x32
+  out: *oneI16x32
 # Ints
 - go: Equal
   asm: "V?PCMPEQ[BWDQ]"
diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml
index 0d0f1c8c..ab344438 100644
--- a/internal/simdgen/ops/BitwiseLogic/go.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/go.yaml
@@ -42,6 +42,28 @@
   out:
   - *any
 
+- go: And
+  asm: "VPANDD" # Fill in the gap, And is missing for Uint8x64 and Int8x64
+  inVariant: []
+  in: &twoI8x64
+  - &i8x64
+    go: $t
+    overwriteElementBits: 8
+  - *i8x64
+  out: &oneI8x64
+  - *i8x64
+
+- go: And
+  asm: "VPANDD" # Fill in the gap, And is missing for Uint16x32 and Int16x32
+  inVariant: []
+  in: &twoI16x32
+  - &i16x32
+    go: $t
+    overwriteElementBits: 16
+  - *i16x32
+  out: &oneI16x32
+  - *i16x32
+
 - go: AndNot
   asm: "VPANDN[DQ]?"
   operandOrder: "21" # switch the arg order
@@ -51,6 +73,20 @@
   out:
   - *any
 
+- go: AndNot
+  asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint8x64 and Int8x64
+  operandOrder: "21" # switch the arg order
+  inVariant: []
+  in: *twoI8x64
+  out: *oneI8x64
+
+- go: AndNot
+  asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint16x32 and Int16x32
+  operandOrder: "21" # switch the arg order
+  inVariant: []
+  in: *twoI16x32
+  out: *oneI16x32
+
 - go: Or
   asm: "VPOR[DQ]?"
   in:
@@ -59,6 +95,18 @@
   out:
   - *any
 
+- go: Or
+  asm: "VPORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
+  inVariant: []
+  in: *twoI8x64
+  out: *oneI8x64
+
+- go: Or
+  asm: "VPORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
+  inVariant: []
+  in: *twoI16x32
+  out: *oneI16x32
+
 - go: Xor
   asm: "VPXOR[DQ]?"
   in:
@@ -66,3 +114,15 @@
   - *any
   out:
   - *any
+
+- go: Xor
+  asm: "VPXORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
+  inVariant: []
+  in: *twoI8x64
+  out: *oneI8x64
+
+- go: Xor
+  asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
+  inVariant: []
+  in: *twoI16x32
+  out: *oneI16x32
\ No newline at end of file

From 17d837876ca6305800d018a702fadb1a56620870 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 1 Aug 2025 16:05:20 -0400
Subject: [PATCH 168/200] internal/simdgen: rename some methods

Generates dev.simd CL 692357.

these are the "easy" renamings:

SaturatedOp -> OpSaturated
PairwiseOp -> OpPairs
OpWithPrecision -> OpScaled
DiffWithOpWithPrecision -> OpScaledResidue

Change-Id: I494efdc5b09d39dc1628fc667a71574fc5725515
Reviewed-on: https://go-review.googlesource.com/c/arch/+/692556
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              | 35 ++++++++-----------
 internal/simdgen/go.yaml                      | 32 ++++++++---------
 internal/simdgen/ops/AddSub/categories.yaml   | 12 +++----
 internal/simdgen/ops/AddSub/go.yaml           | 24 ++++++-------
 .../simdgen/ops/FPonlyArith/categories.yaml   | 18 +++++-----
 internal/simdgen/ops/FPonlyArith/go.yaml      |  6 ++--
 internal/simdgen/ops/Mul/categories.yaml      |  5 ---
 internal/simdgen/ops/Mul/go.yaml              |  2 +-
 8 files changed, 62 insertions(+), 72 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 24fa6165..dcba4b73 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -4,7 +4,7 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME adds corresponding elements of two vectors.
-- go: SaturatedAdd
+- go: AddSaturated
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
@@ -14,30 +14,30 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME subtracts corresponding elements of two vectors.
-- go: SaturatedSub
+- go: SubSaturated
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME subtracts corresponding elements of two vectors with saturation.
-- go: PairwiseAdd
+- go: AddPairs
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME horizontally adds adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-- go: PairwiseSub
+- go: SubPairs
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME horizontally subtracts adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
-- go: SaturatedPairwiseAdd
+- go: AddPairsSaturated
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME horizontally adds adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-- go: SaturatedPairwiseSub
+- go: SubPairsSaturated
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -146,7 +146,7 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes an approximate reciprocal of the square root of each element.
-- go: MulByPowOf2
+- go: Scale
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -157,13 +157,13 @@
   constImm: 0
   documentation: !string |-
     // NAME rounds elements to the nearest integer.
-- go: RoundWithPrecision
+- go: RoundScaled
   commutative: false
   extension: "AVX.*"
   constImm: 0
   documentation: !string |-
     // NAME rounds elements with specified precision.
-- go: DiffWithRoundWithPrecision
+- go: RoundScaledResidue
   commutative: false
   extension: "AVX.*"
   constImm: 0
@@ -175,13 +175,13 @@
   constImm: 1
   documentation: !string |-
     // NAME rounds elements down to the nearest integer.
-- go: FloorWithPrecision
+- go: FloorScaled
   commutative: false
   extension: "AVX.*"
   constImm: 1
   documentation: !string |-
     // NAME rounds elements down with specified precision.
-- go: DiffWithFloorWithPrecision
+- go: FloorScaledResidue
   commutative: false
   extension: "AVX.*"
   constImm: 1
@@ -193,13 +193,13 @@
   constImm: 2
   documentation: !string |-
     // NAME rounds elements up to the nearest integer.
-- go: CeilWithPrecision
+- go: CeilScaled
   commutative: false
   extension: "AVX.*"
   constImm: 2
   documentation: !string |-
     // NAME rounds elements up with specified precision.
-- go: DiffWithCeilWithPrecision
+- go: CeilScaledResidue
   commutative: false
   extension: "AVX.*"
   constImm: 2
@@ -211,13 +211,13 @@
   constImm: 3
   documentation: !string |-
     // NAME truncates elements towards zero.
-- go: TruncWithPrecision
+- go: TruncScaled
   commutative: false
   extension: "AVX.*"
   constImm: 3
   documentation: !string |-
     // NAME truncates elements with specified precision.
-- go: DiffWithTruncWithPrecision
+- go: TruncScaledResidue
   commutative: false
   extension: "AVX.*"
   constImm: 3
@@ -396,11 +396,6 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies elements and stores the high part of the result.
-- go: MulLow
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies elements and stores the low part of the result.
 - go: ShiftAllLeft
   nameAndSizeCheck: true
   commutative: false
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index ddab9c38..8c893ba6 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -8,8 +8,8 @@
   - *any
   out:
   - *any
-# Saturated Add
-- go: SaturatedAdd
+# Add Saturated
+- go: AddSaturated
   asm: "VPADDS[BWDQ]"
   in:
   - &int
@@ -18,7 +18,7 @@
   - *int
   out:
   - *int
-- go: SaturatedAdd
+- go: AddSaturated
   asm: "VPADDS[BWDQ]"
   in:
   - &uint
@@ -36,42 +36,42 @@
   - *any
   out: &1any
   - *any
-# Saturated Sub
-- go: SaturatedSub
+# Sub Saturated
+- go: SubSaturated
   asm: "VPSUBS[BWDQ]"
   in: &2int
   - *int
   - *int
   out: &1int
   - *int
-- go: SaturatedSub
+- go: SubSaturated
   asm: "VPSUBS[BWDQ]"
   in:
   - *uint
   - *uint
   out:
   - *uint
-- go: PairwiseAdd
+- go: AddPairs
   asm: "VPHADD[DW]"
   in: *2any
   out: *1any
-- go: PairwiseSub
+- go: SubPairs
   asm: "VPHSUB[DW]"
   in: *2any
   out: *1any
-- go: PairwiseAdd
+- go: AddPairs
   asm: "VHADDP[SD]" # floats
   in: *2any
   out: *1any
-- go: PairwiseSub
+- go: SubPairs
   asm: "VHSUBP[SD]"  # floats
   in: *2any
   out: *1any
-- go: SaturatedPairwiseAdd
+- go: AddPairsSaturated
   asm: "VPHADDS[DW]"
   in: *2int
   out: *1int
-- go: SaturatedPairwiseSub
+- go: SubPairsSaturated
   asm: "VPHSUBS[DW]"
   in: *2int
   out: *1int
@@ -342,7 +342,7 @@
   asm: "V?RSQRT(14)?P[SD]"
   in: *1fp
   out: *1fp
-- go: MulByPowOf2
+- go: Scale
   asm: "VSCALEFP[SD]"
   in: *2fp
   out: *1fp
@@ -355,7 +355,7 @@
     const: 0 # place holder
   out: *1fp
 
-- go: "(Round|Ceil|Floor|Trunc)WithPrecision"
+- go: "(Round|Ceil|Floor|Trunc)Scaled"
   asm: "VRNDSCALEP[SD]"
   in:
   - *fp
@@ -364,7 +364,7 @@
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
     name: prec
   out: *1fp
-- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecision"
+- go: "(Round|Ceil|Floor|Trunc)ScaledResidue"
   asm: "VREDUCEP[SD]"
   in:
   - *fp
@@ -881,7 +881,7 @@
 
 # MulLow
 # Signed int only.
-- go: MulLow
+- go: Mul
   asm: "VPMULL[WDQ]"
   in:
   - *int
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index 2ffd1e23..4e492516 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -4,7 +4,7 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME adds corresponding elements of two vectors.
-- go: SaturatedAdd
+- go: AddSaturated
   commutative: true
   extension: "AVX.*"
   documentation: !string |-
@@ -14,30 +14,30 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME subtracts corresponding elements of two vectors.
-- go: SaturatedSub
+- go: SubSaturated
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME subtracts corresponding elements of two vectors with saturation.
-- go: PairwiseAdd
+- go: AddPairs
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME horizontally adds adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-- go: PairwiseSub
+- go: SubPairs
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME horizontally subtracts adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
-- go: SaturatedPairwiseAdd
+- go: AddPairsSaturated
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME horizontally adds adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-- go: SaturatedPairwiseSub
+- go: SubPairsSaturated
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml
index c952c150..45726cd6 100644
--- a/internal/simdgen/ops/AddSub/go.yaml
+++ b/internal/simdgen/ops/AddSub/go.yaml
@@ -8,8 +8,8 @@
   - *any
   out:
   - *any
-# Saturated Add
-- go: SaturatedAdd
+# Add Saturated
+- go: AddSaturated
   asm: "VPADDS[BWDQ]"
   in:
   - &int
@@ -18,7 +18,7 @@
   - *int
   out:
   - *int
-- go: SaturatedAdd
+- go: AddSaturated
   asm: "VPADDS[BWDQ]"
   in:
   - &uint
@@ -36,42 +36,42 @@
   - *any
   out: &1any
   - *any
-# Saturated Sub
-- go: SaturatedSub
+# Sub Saturated
+- go: SubSaturated
   asm: "VPSUBS[BWDQ]"
   in: &2int
   - *int
   - *int
   out: &1int
   - *int
-- go: SaturatedSub
+- go: SubSaturated
   asm: "VPSUBS[BWDQ]"
   in:
   - *uint
   - *uint
   out:
   - *uint
-- go: PairwiseAdd
+- go: AddPairs
   asm: "VPHADD[DW]"
   in: *2any
   out: *1any
-- go: PairwiseSub
+- go: SubPairs
   asm: "VPHSUB[DW]"
   in: *2any
   out: *1any
-- go: PairwiseAdd
+- go: AddPairs
   asm: "VHADDP[SD]" # floats
   in: *2any
   out: *1any
-- go: PairwiseSub
+- go: SubPairs
   asm: "VHSUBP[SD]"  # floats
   in: *2any
   out: *1any
-- go: SaturatedPairwiseAdd
+- go: AddPairsSaturated
   asm: "VPHADDS[DW]"
   in: *2int
   out: *1int
-- go: SaturatedPairwiseSub
+- go: SubPairsSaturated
   asm: "VPHSUBS[DW]"
   in: *2int
   out: *1int
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 0fb727d5..63ddbb34 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -19,7 +19,7 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes an approximate reciprocal of the square root of each element.
-- go: MulByPowOf2
+- go: Scale
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -30,13 +30,13 @@
   constImm: 0
   documentation: !string |-
     // NAME rounds elements to the nearest integer.
-- go: RoundWithPrecision
+- go: RoundScaled
   commutative: false
   extension: "AVX.*"
   constImm: 0
   documentation: !string |-
     // NAME rounds elements with specified precision.
-- go: DiffWithRoundWithPrecision
+- go: RoundScaledResidue
   commutative: false
   extension: "AVX.*"
   constImm: 0
@@ -48,13 +48,13 @@
   constImm: 1
   documentation: !string |-
     // NAME rounds elements down to the nearest integer.
-- go: FloorWithPrecision
+- go: FloorScaled
   commutative: false
   extension: "AVX.*"
   constImm: 1
   documentation: !string |-
     // NAME rounds elements down with specified precision.
-- go: DiffWithFloorWithPrecision
+- go: FloorScaledResidue
   commutative: false
   extension: "AVX.*"
   constImm: 1
@@ -66,13 +66,13 @@
   constImm: 2
   documentation: !string |-
     // NAME rounds elements up to the nearest integer.
-- go: CeilWithPrecision
+- go: CeilScaled
   commutative: false
   extension: "AVX.*"
   constImm: 2
   documentation: !string |-
     // NAME rounds elements up with specified precision.
-- go: DiffWithCeilWithPrecision
+- go: CeilScaledResidue
   commutative: false
   extension: "AVX.*"
   constImm: 2
@@ -84,13 +84,13 @@
   constImm: 3
   documentation: !string |-
     // NAME truncates elements towards zero.
-- go: TruncWithPrecision
+- go: TruncScaled
   commutative: false
   extension: "AVX.*"
   constImm: 3
   documentation: !string |-
     // NAME truncates elements with specified precision.
-- go: DiffWithTruncWithPrecision
+- go: TruncScaledResidue
   commutative: false
   extension: "AVX.*"
   constImm: 3
diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml
index 71d1cb5f..dfb0454e 100644
--- a/internal/simdgen/ops/FPonlyArith/go.yaml
+++ b/internal/simdgen/ops/FPonlyArith/go.yaml
@@ -21,7 +21,7 @@
   asm: "V?RSQRT(14)?P[SD]"
   in: *1fp
   out: *1fp
-- go: MulByPowOf2
+- go: Scale
   asm: "VSCALEFP[SD]"
   in: *2fp
   out: *1fp
@@ -34,7 +34,7 @@
     const: 0 # place holder
   out: *1fp
 
-- go: "(Round|Ceil|Floor|Trunc)WithPrecision"
+- go: "(Round|Ceil|Floor|Trunc)Scaled"
   asm: "VRNDSCALEP[SD]"
   in:
   - *fp
@@ -43,7 +43,7 @@
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
     name: prec
   out: *1fp
-- go: "DiffWith(Round|Ceil|Floor|Trunc)WithPrecision"
+- go: "(Round|Ceil|Floor|Trunc)ScaledResidue"
   asm: "VREDUCEP[SD]"
   in:
   - *fp
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index 9a9b8328..f4e2aed2 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -15,8 +15,3 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies elements and stores the high part of the result.
-- go: MulLow
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies elements and stores the low part of the result.
diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml
index c9ddeb4a..dd9f55fb 100644
--- a/internal/simdgen/ops/Mul/go.yaml
+++ b/internal/simdgen/ops/Mul/go.yaml
@@ -57,7 +57,7 @@
 
 # MulLow
 # Signed int only.
-- go: MulLow
+- go: Mul
   asm: "VPMULL[WDQ]"
   in:
   - *int

From a373a4b004606c4eca9d42eea9f824a03c388341 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 1 Aug 2025 22:20:43 +0000
Subject: [PATCH 169/200] internal/simdgen: add ShiftAll immediate variant.

Right now ShiftAll with immediate might make the compiler generate
erroneous instruction like MOV $3, X1.

This CL adds the immediate variant of ShiftAll and adds rewrite rules to
lower (VPSLL (Vec Const)) => (VPSLLImm [Const] (Vec)).

To facilitate this, this CL adds a mechanism to do partial code
generation: the immediate variant of ShiftAll only appears in machine
ops.

This CL also did some cleanups.

This CL generates CL 693157.

Change-Id: Ife898877e952f2e8d4ee1cb1efbfcf0c07e87189
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693136
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/categories.yaml              | 27 +++++++++++++
 internal/simdgen/gen_simdGenericOps.go        |  5 ++-
 internal/simdgen/gen_simdIntrinsics.go        | 39 ++++++++++---------
 internal/simdgen/gen_simdMachineOps.go        |  8 +---
 internal/simdgen/gen_simdTypes.go             |  6 ++-
 internal/simdgen/gen_simdrules.go             | 34 ++++++++++++----
 internal/simdgen/gen_simdssa.go               |  6 +--
 internal/simdgen/gen_utility.go               | 17 +++++++-
 internal/simdgen/go.yaml                      | 24 ++++++++++++
 internal/simdgen/godefs.go                    | 21 ++++++++++
 .../simdgen/ops/ShiftRotate/categories.yaml   | 27 +++++++++++++
 internal/simdgen/ops/ShiftRotate/go.yaml      | 24 ++++++++++++
 12 files changed, 195 insertions(+), 43 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index dcba4b73..0afa0b14 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -398,6 +398,7 @@
     // NAME multiplies elements and stores the high part of the result.
 - go: ShiftAllLeft
   nameAndSizeCheck: true
+  specialLower: sftimm
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -405,17 +406,43 @@
 - go: ShiftAllRight
   signed: false
   nameAndSizeCheck: true
+  specialLower: sftimm
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
   signed: true
+  specialLower: sftimm
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+- go: shiftAllLeftConst # no APIs, only ssa ops.
+  noTypes: "true"
+  noGenericOps: "true"
+  SSAVariant: "const" # to avoid its name colliding with reg version of this instruction, amend this to its ssa op name.
+  nameAndSizeCheck: true
+  commutative: false
+  extension: "AVX.*"
+- go: shiftAllRightConst # no APIs, only ssa ops.
+  noTypes: "true"
+  noGenericOps: "true"
+  SSAVariant: "const"
+  signed: false
+  nameAndSizeCheck: true
+  commutative: false
+  extension: "AVX.*"
+- go: shiftAllRightConst # no APIs, only ssa ops.
+  noTypes: "true"
+  noGenericOps: "true"
+  SSAVariant: "const"
+  signed: true
+  nameAndSizeCheck: true
+  commutative: false
+  extension: "AVX.*"
+
 - go: ShiftLeft
   nameAndSizeCheck: true
   commutative: false
diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index c345793a..daf941d7 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -42,8 +42,11 @@ func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 	}
 	var opsData opData
 	for _, op := range ops {
+		if op.NoGenericOps != nil && *op.NoGenericOps == "true" {
+			continue
+		}
 		_, _, _, immType, gOp := op.shape()
-		gOpData := genericOpsData{genericName(gOp), len(gOp.In), op.Commutative}
+		gOpData := genericOpsData{gOp.GenericName(), len(gOp.In), op.Commutative}
 		if immType == VarImm || immType == ConstVarImm {
 			opsData.OpsImm = append(opsData.OpsImm, gOpData)
 		} else {
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index d3b35218..ca339ac2 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -25,41 +25,41 @@ const simdPackage = "` + simdPackage + `"
 func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
 {{end}}
 
-{{define "op1"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op1"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op2_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op2_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op2_21Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op2_21Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op3_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op3_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op3_21Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op3_21Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op3_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op3_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op3_31"}}	addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen3_31(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op3_31"}}	addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen3_31(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op4_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op4_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op4_31"}}	addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen4_31(ssa.Op{{.Go}}{{(index .In 0).Go}}, {{.SSAType}}), sys.AMD64)
+{{define "op4_31"}}	addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen4_31(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
 {{end}}
-{{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op2Imm8_2I"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_2I(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op2Imm8_2I"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op3Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op3Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op3Imm8_2I"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op3Imm8_2I"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
-{{define "op4Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.Go}}{{(index .In 1).Go}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
+{{define "op4Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
 {{end}}
 
 {{define "vectorConversion"}}	addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
@@ -99,6 +99,9 @@ func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 	slices.SortFunc(ops, compareOperations)
 
 	for _, op := range ops {
+		if op.NoTypes != nil && *op.NoTypes == "true" {
+			continue
+		}
 		if s, op, err := classifyOp(op); err == nil {
 			if err := t.ExecuteTemplate(buffer, s, op); err != nil {
 				panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err))
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 22893a22..7c538a00 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -53,13 +53,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 	opsDataImm := make([]opData, 0)
 	for _, op := range ops {
 		shapeIn, shapeOut, maskType, _, gOp := op.shape()
-
-		asm := gOp.Asm
-		if maskType == OneMask {
-			asm += "Masked"
-		}
-
-		asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth())
+		asm := machineOpName(maskType, gOp)
 
 		// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
 		// one here with a name suffix "Merging". The rewrite rules will need them.
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index d3791d78..b9427c4a 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -524,7 +524,7 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 	return buffer
 }
 
-// writeSIMDStubs generates the simd vector intrinsic stubs and writes it to stubs_amd64.go
+// writeSIMDStubs generates the simd vector intrinsic stubs and writes it to ops_amd64.go and ops_internal_amd64.go
 // within the specified directory.
 func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 	t := templateOf(simdStubsTmpl, "simdStubs")
@@ -537,6 +537,9 @@ func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 	slices.SortFunc(ops, compareOperations)
 
 	for i, op := range ops {
+		if op.NoTypes != nil && *op.NoTypes == "true" {
+			continue
+		}
 		idxVecAsScalar, err := checkVecAsScalar(op)
 		if err != nil {
 			panic(err)
@@ -555,7 +558,6 @@ func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 			if err := t.ExecuteTemplate(buffer, s, op); err != nil {
 				panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
 			}
-
 		} else {
 			panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err))
 		}
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index 81aba7a0..c910f64a 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -22,6 +22,10 @@ var (
 {{end}}
 {{define "maskInMaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} <types.TypeMask> mask)))
 {{end}}
+{{define "sftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c])) => ({{.Asm}}const [int8(c)] x)
+{{end}}
+{{define "masksftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c]) mask) => ({{.Asm}}const [int8(c)] x ({{.MaskInConvert}} <types.TypeMask> mask))
+{{end}}
 `))
 )
 
@@ -65,15 +69,15 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 	var allData []tplRuleData
 
 	for _, opr := range ops {
+		if opr.NoGenericOps != nil && *opr.NoGenericOps == "true" {
+			continue
+		}
 		opInShape, opOutShape, maskType, immType, gOp := opr.shape()
-
+		asm := machineOpName(maskType, gOp)
 		vregInCnt := len(gOp.In)
-		asm := gOp.Asm
 		if maskType == OneMask {
-			asm += "Masked"
 			vregInCnt--
 		}
-		asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth())
 
 		data := tplRuleData{
 			GoOp: gOp.Go,
@@ -157,11 +161,25 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 			}
 		}
 
-		if tplName == "pureVreg" && data.Args == data.ArgsOut {
-			data.Args = "..."
-			data.ArgsOut = "..."
+		if gOp.SpecialLower != nil {
+			if *gOp.SpecialLower == "sftimm" {
+				sftImmData := data
+				if tplName == "maskIn" {
+					sftImmData.tplName = "masksftimm"
+				} else {
+					sftImmData.tplName = "sftimm"
+				}
+				allData = append(allData, sftImmData)
+			} else {
+				panic("simdgen sees unknwon special lower " + *gOp.SpecialLower + ", maybe implement it?")
+			}
+		} else {
+			// SpecialLower rules cannot use "...".
+			if tplName == "pureVreg" && data.Args == data.ArgsOut {
+				data.Args = "..."
+				data.ArgsOut = "..."
+			}
 		}
-
 		data.tplName = tplName
 		allData = append(allData, data)
 	}
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
index b664b0f4..5a5421a8 100644
--- a/internal/simdgen/gen_simdssa.go
+++ b/internal/simdgen/gen_simdssa.go
@@ -89,13 +89,9 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
 	seen := map[string]struct{}{}
 	allUnseen := make(map[string][]Operation)
 	for _, op := range ops {
-		asm := op.Asm
 		shapeIn, shapeOut, maskType, _, gOp := op.shape()
+		asm := machineOpName(maskType, gOp)
 
-		if maskType == 2 {
-			asm += "Masked"
-		}
-		asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth())
 		if _, ok := seen[asm]; ok {
 			continue
 		}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index f1cfcfe9..59832e0e 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -361,6 +361,16 @@ func (o Operand) OpNameAndType(s string) string {
 	return o.OpName(s) + " " + *o.Go
 }
 
+// GoExported returns [Go] with first character capitalized.
+func (op Operation) GoExported() string {
+	return capitalizeFirst(op.Go)
+}
+
+// DocumentationExported returns [Documentation] with method name capitalized.
+func (op Operation) DocumentationExported() string {
+	return strings.ReplaceAll(op.Documentation, op.Go, op.GoExported())
+}
+
 // Op0Name returns the name to use for the 0 operand,
 // if any is present, otherwise the parameter is used.
 func (op Operation) Op0Name(s string) string {
@@ -549,7 +559,7 @@ func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation)
 	return
 }
 
-func genericName(op Operation) string {
+func (op Operation) GenericName() string {
 	if op.OperandOrder != nil {
 		switch *op.OperandOrder {
 		case "21Type1", "231Type1":
@@ -557,6 +567,9 @@ func genericName(op Operation) string {
 			return op.Go + *op.In[1].Go
 		}
 	}
+	if op.In[0].Class == "immediate" {
+		return op.Go + *op.In[1].Go
+	}
 	return op.Go + *op.In[0].Go
 }
 
@@ -569,7 +582,7 @@ func dedupGodef(ops []Operation) ([]Operation, error) {
 	for _, op := range ops {
 		_, _, _, _, gOp := op.shape()
 
-		gN := genericName(gOp)
+		gN := gOp.GenericName()
 		seen[gN] = append(seen[gN], op)
 	}
 	if *FlagReportDup {
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index 8c893ba6..ba1e96be 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -921,6 +921,30 @@
   out:
   - *int
 
+- go: shiftAllLeftConst
+  asm: "VPSLL[WDQ]"
+  in:
+  - *any
+  - &imm
+    class: immediate
+    immOffset: 0
+  out:
+  - *any
+- go: shiftAllRightConst
+  asm: "VPSRL[WDQ]"
+  in:
+  - *int
+  - *imm
+  out:
+  - *int
+- go: shiftAllRightConst
+  asm: "VPSRA[WDQ]"
+  in:
+  - *uint
+  - *imm
+  out:
+  - *uint
+
 # Shift* (variable)
 - go: ShiftLeft
   asm: "VPSLLV[WD]"
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 1bdfec1b..166a5933 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -47,6 +47,9 @@ type rawOperation struct {
 	GoArch       string  // GOARCH for this definition
 	Asm          string  // Assembly mnemonic
 	OperandOrder *string // optional Operand order for better Go declarations
+	// Optional tag to indicate this operation is paired with special generic->machine ssa lowering rules.
+	// Should be paired with special templates in gen_simdrules.go
+	SpecialLower *string
 
 	In            []Operand // Parameters
 	InVariant     []Operand // Optional parameters
@@ -62,6 +65,12 @@ type rawOperation struct {
 	ConstImm *string
 	// NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits.
 	NameAndSizeCheck *bool
+	// If non-nil, all generation in gen_simdTypes.go and gen_intrinsics will be skipped.
+	NoTypes *string
+	// If non-nil, all generation in gen_simdGenericOps and gen_simdrules will be skipped.
+	NoGenericOps *string
+	// If non-nil, this string will be attached to the machine ssa op name.
+	SSAVariant *string
 }
 
 func (o *Operation) DecodeUnified(v *unify.Value) error {
@@ -114,6 +123,18 @@ func (o *Operation) VectorWidth() int {
 	panic(fmt.Errorf("Figure out what the vector width is for %v and implement it", *o))
 }
 
+func machineOpName(maskType maskShape, gOp Operation) string {
+	asm := gOp.Asm
+	if maskType == 2 {
+		asm += "Masked"
+	}
+	asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth())
+	if gOp.SSAVariant != nil {
+		asm += *gOp.SSAVariant
+	}
+	return asm
+}
+
 func compareStringPointers(x, y *string) int {
 	if x != nil && y != nil {
 		return compareNatural(*x, *y)
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
index 5528b4d8..e51d289b 100644
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -1,6 +1,7 @@
 !sum
 - go: ShiftAllLeft
   nameAndSizeCheck: true
+  specialLower: sftimm
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -8,17 +9,43 @@
 - go: ShiftAllRight
   signed: false
   nameAndSizeCheck: true
+  specialLower: sftimm
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
   signed: true
+  specialLower: sftimm
   nameAndSizeCheck: true
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+- go: shiftAllLeftConst # no APIs, only ssa ops.
+  noTypes: "true"
+  noGenericOps: "true"
+  SSAVariant: "const" # to avoid its name colliding with reg version of this instruction, amend this to its ssa op name.
+  nameAndSizeCheck: true
+  commutative: false
+  extension: "AVX.*"
+- go: shiftAllRightConst # no APIs, only ssa ops.
+  noTypes: "true"
+  noGenericOps: "true"
+  SSAVariant: "const"
+  signed: false
+  nameAndSizeCheck: true
+  commutative: false
+  extension: "AVX.*"
+- go: shiftAllRightConst # no APIs, only ssa ops.
+  noTypes: "true"
+  noGenericOps: "true"
+  SSAVariant: "const"
+  signed: true
+  nameAndSizeCheck: true
+  commutative: false
+  extension: "AVX.*"
+
 - go: ShiftLeft
   nameAndSizeCheck: true
   commutative: false
diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml
index 4ade55d7..e7ccdeb0 100644
--- a/internal/simdgen/ops/ShiftRotate/go.yaml
+++ b/internal/simdgen/ops/ShiftRotate/go.yaml
@@ -32,6 +32,30 @@
   out:
   - *int
 
+- go: shiftAllLeftConst
+  asm: "VPSLL[WDQ]"
+  in:
+  - *any
+  - &imm
+    class: immediate
+    immOffset: 0
+  out:
+  - *any
+- go: shiftAllRightConst
+  asm: "VPSRL[WDQ]"
+  in:
+  - *int
+  - *imm
+  out:
+  - *int
+- go: shiftAllRightConst
+  asm: "VPSRA[WDQ]"
+  in:
+  - *uint
+  - *imm
+  out:
+  - *uint
+
 # Shift* (variable)
 - go: ShiftLeft
   asm: "VPSLLV[WD]"

From fd301eea3aa0a7038f6e164fe44adcd2fa64c157 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 4 Aug 2025 13:55:36 -0400
Subject: [PATCH 170/200] internal/simdgen: add VPBLENDVB and VPBLENDMB

These are not exported -- for use in emulation functions.

Generates dev.simd CL 693155

Change-Id: I9f89465a3f98dcd0cb0f60f7c184bd30e25004da
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693175
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/categories.yaml           |  6 ++++
 internal/simdgen/go.yaml                   | 37 +++++++++++++++++++++-
 internal/simdgen/ops/Moves/categories.yaml |  6 ++++
 internal/simdgen/ops/Moves/go.yaml         | 37 +++++++++++++++++++++-
 4 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 0afa0b14..26e80c5e 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -380,6 +380,12 @@
   documentation: !string |-
     // NAME performs a compression on vector x using mask by
     // selecting elements as indicated by mask, and pack them to lower indexed elements.
+- go: blend
+  commutative: false
+  extension: "AVX.*"
+  documentation: !string |-
+    // NAME blends two vectors based on mask values, choosing either
+    // the first or the second based on whether the third is false or true
 - go: Mul
   commutative: true
   extension: "AVX.*"
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index ba1e96be..cdee0870 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -637,7 +637,6 @@
     base: $b
     bits: $e
 
-
 - go: Set128
   asm: "VINSERTI128"
   in:
@@ -823,6 +822,42 @@
   - *any
   out:
   - *any
+
+# For now a non-public method because
+# (1) [OverwriteClass] must be set together with [OverwriteBase]
+# (2) "simdgen does not support [OverwriteClass] in inputs".
+# That means the signature is wrong.  
+- go: blend
+  asm: VPBLENDVB
+  in: 
+  - &v
+    go: $t
+    class: vreg
+    base: int
+  - *v
+  - 
+    class: vreg
+    base: int
+    name: mask
+  out:
+  - *v
+
+# For AVX512
+- go: blend
+  asm: VPBLENDM[BWDQ]
+  in: 
+  - &v
+    go: $t
+    bits: 512
+    class: vreg
+    base: int
+  - *v
+  inVariant: 
+  - 
+    class: mask
+  out:
+  - *v
+
 # "Normal" multiplication is only available for floats.
 # This only covers the single and double precision.
 - go: Mul
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index a6dd7bab..5e51becb 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -40,3 +40,9 @@
   documentation: !string |-
     // NAME performs a compression on vector x using mask by
     // selecting elements as indicated by mask, and pack them to lower indexed elements.
+- go: blend
+  commutative: false
+  extension: "AVX.*"
+  documentation: !string |-
+    // NAME blends two vectors based on mask values, choosing either
+    // the first or the second based on whether the third is false or true
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index c1dd6e4d..52e6228d 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -27,7 +27,6 @@
     base: $b
     bits: $e
 
-
 - go: Set128
   asm: "VINSERTI128"
   in:
@@ -213,3 +212,39 @@
   - *any
   out:
   - *any
+
+# For now a non-public method because
+# (1) [OverwriteClass] must be set together with [OverwriteBase]
+# (2) "simdgen does not support [OverwriteClass] in inputs".
+# That means the signature is wrong.  
+- go: blend
+  asm: VPBLENDVB
+  in: 
+  - &v
+    go: $t
+    class: vreg
+    base: int
+  - *v
+  - 
+    class: vreg
+    base: int
+    name: mask
+  out:
+  - *v
+
+# For AVX512
+- go: blend
+  asm: VPBLENDM[BWDQ]
+  in: 
+  - &v
+    go: $t
+    bits: 512
+    class: vreg
+    base: int
+  - *v
+  inVariant: 
+  - 
+    class: mask
+  out:
+  - *v
+

From dca4598c94df40d5c693e058c663e24aa1a92610 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 1 Aug 2025 17:12:31 -0400
Subject: [PATCH 171/200] internal/simdgen: add profiling flags

Change-Id: I9f893e4a1420c135b1affaf355a84b0498e6c981
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693337
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/main.go | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index 6ac22a68..92b1fa98 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -94,6 +94,7 @@ import (
 	"maps"
 	"os"
 	"path/filepath"
+	"runtime/pprof"
 	"slices"
 	"strings"
 
@@ -116,6 +117,9 @@ var (
 	flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace")
 	flagDebugHTML  = flag.String("debug-html", "", "write unification trace to `file.html`")
 	FlagReportDup  = flag.Bool("reportdup", false, "report the duplicate godefs")
+
+	flagCPUProfile = flag.String("cpuprofile", "", "write CPU profile to `file`")
+	flagMemProfile = flag.String("memprofile", "", "write memory profile to `file`")
 )
 
 const simdPackage = "simd"
@@ -123,6 +127,26 @@ const simdPackage = "simd"
 func main() {
 	flag.Parse()
 
+	if *flagCPUProfile != "" {
+		f, err := os.Create(*flagCPUProfile)
+		if err != nil {
+			log.Fatalf("-cpuprofile: %s", err)
+		}
+		defer f.Close()
+		pprof.StartCPUProfile(f)
+		defer pprof.StopCPUProfile()
+	}
+	if *flagMemProfile != "" {
+		f, err := os.Create(*flagMemProfile)
+		if err != nil {
+			log.Fatalf("-memprofile: %s", err)
+		}
+		defer func() {
+			pprof.WriteHeapProfile(f)
+			f.Close()
+		}()
+	}
+
 	var inputs []unify.Closure
 
 	if *FlagArch != "amd64" {

From c9a2ab369950b498dfd7d4e53fd38f7df616803a Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 5 Aug 2025 16:22:15 -0400
Subject: [PATCH 172/200] internal/unify: use arbitrary expressions for
 environment sets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, nonDetEnv, which represents a set of environments, uses a
restricted algebraic form consisting of a cross-product of sets of
environments. Unfortunately, this restriction means that if we want to
union two environment sets, we may need to multiply factors out in
order to normalize the result into this restricted representation. In
some cases, this can result in exponential blowup. For example, if
there are nested sums, then the environment will contain bindings of
variables that don't matter for whole branches of the value
expression, but that still participate when constructing the union of
environment sets. These dead variables wind up expanding the
environment representation exponentially, even though they have no
effect.

To fix this, we lift this restriction. Now, a nonDetEnv is an
arbitrary algebraic expression of unions and cross-products. This is
actually much simpler, implementation-wise, and addresses this
exponential blowup problem.

We add a stress test demonstrated nested sums that prior to this
change required 12 GB of RAM and took 20 seconds to unify. With this
change, it takes 90 MB of RAM and a fraction of a second.

We're about to add "import" support to YAML, which will tend to create
these nested sums. Thus we have to fix this first.

This has no effect on the output of simdgen. Curiously, it also has no
effect on the time of simdgen, but it does reduce its memory by almost
10x:

        │ /tmp/before.bench │       /tmp/after.bench        │
        │      sec/op       │   sec/op     vs base          │
Simdgen          26.40 ± 3%   26.49 ± 26%  ~ (p=1.000 n=10)

        │ /tmp/before.bench │            /tmp/after.bench            │
        │  peak-RSS-bytes   │ peak-RSS-bytes  vs base                │
Simdgen       1443.4Mi ± 1%     178.4Mi ± 1%  -87.64% (p=0.000 n=10)

Change-Id: Idaecb8693065c61d5d63afbc1014d3300886def8
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693338
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Auto-Submit: Austin Clements <austin@google.com>
---
 internal/unify/dot.go               |  46 ++-
 internal/unify/env.go               | 535 +++++++++++++---------------
 internal/unify/html.go              |  87 +----
 internal/unify/testdata/stress.yaml |  33 ++
 internal/unify/yaml.go              |  44 ++-
 5 files changed, 367 insertions(+), 378 deletions(-)
 create mode 100644 internal/unify/testdata/stress.yaml

diff --git a/internal/unify/dot.go b/internal/unify/dot.go
index 143fa615..a26b9dad 100644
--- a/internal/unify/dot.go
+++ b/internal/unify/dot.go
@@ -96,14 +96,15 @@ func (enc *dotEncoder) edge(from, to string, label string, args ...any) {
 	fmt.Fprintf(enc.w, "%s -> %s [label=%q];\n", from, to, l)
 }
 
-func (enc *dotEncoder) subgraph(v *Value) (vID, cID string) {
+func (enc *dotEncoder) valueSubgraph(v *Value) {
 	enc.valLimit = maxNodes
-	cID = enc.newID("cluster_%d")
+	cID := enc.newID("cluster_%d")
 	fmt.Fprintf(enc.w, "subgraph %s {\n", cID)
 	fmt.Fprintf(enc.w, "style=invis;")
-	vID = enc.value(v)
+	vID := enc.value(v)
 	fmt.Fprintf(enc.w, "}\n")
-	return
+	// We don't need the IDs right now.
+	_, _ = cID, vID
 }
 
 func (enc *dotEncoder) value(v *Value) string {
@@ -181,3 +182,40 @@ func (enc *dotEncoder) value(v *Value) string {
 		return enc.node(fmt.Sprintf("Var %s", enc.idp.unique(vd.id)), "")
 	}
 }
+
+func (enc *dotEncoder) envSubgraph(e nonDetEnv) {
+	enc.valLimit = maxNodes
+	cID := enc.newID("cluster_%d")
+	fmt.Fprintf(enc.w, "subgraph %s {\n", cID)
+	fmt.Fprintf(enc.w, "style=invis;")
+	vID := enc.env(e.root)
+	fmt.Fprintf(enc.w, "}\n")
+	_, _ = cID, vID
+}
+
+func (enc *dotEncoder) env(e *envExpr) string {
+	switch e.kind {
+	default:
+		panic("bad kind")
+	case envZero:
+		return enc.node("0", "")
+	case envUnit:
+		return enc.node("1", "")
+	case envBinding:
+		node := enc.node(fmt.Sprintf("%q :", enc.idp.unique(e.id)), "")
+		enc.edge(node, enc.value(e.val), "")
+		return node
+	case envProduct:
+		node := enc.node("⨯", "")
+		for _, op := range e.operands {
+			enc.edge(node, enc.env(op), "")
+		}
+		return node
+	case envSum:
+		node := enc.node("+", "")
+		for _, op := range e.operands {
+			enc.edge(node, enc.env(op), "")
+		}
+		return node
+	}
+}
diff --git a/internal/unify/env.go b/internal/unify/env.go
index 618887cd..0f45af39 100644
--- a/internal/unify/env.go
+++ b/internal/unify/env.go
@@ -8,29 +8,36 @@ import (
 	"fmt"
 	"iter"
 	"reflect"
-	"slices"
 	"strings"
 )
 
-// A nonDetEnv is a non-deterministic mapping from [ident]s to [Value]s.
+// A nonDetEnv is an immutable set of environments, where each environment is a
+// mapping from [ident]s to [Value]s.
 //
-// Logically, this is just a set of deterministic environments, where each
-// deterministic environment is a complete mapping from each [ident]s to exactly
-// one [Value]. In particular, [ident]s are NOT necessarily independent of each
-// other. For example, an environment may have both {x: 1, y: 1} and {x: 2, y:
-// 2}, but not {x: 1, y: 2}.
+// To keep this compact, we use an algebraic representation similar to
+// relational algebra. The atoms are zero, unit, or a singular binding:
 //
-// A nonDetEnv is immutable.
+// - A singular binding is an environment set consisting of a single environment
+// that binds a single ident to a single value.
 //
-// Often [ident]s are independent of each other, so the representation optimizes
-// for this by using a cross-product of environment factors, where each factor
-// is a sum of deterministic environments. These operations obey the usual
-// distributional laws, so we can always canonicalize into this form. (It MAY be
-// worthwhile to allow more general expressions of sums and products.)
+// - Zero is the empty set.
 //
-// For example, to represent {{x: 1, y: 1}, {x: 2, y: 2}}, in which the
-// variables x and y are dependent, we need a single factor that covers x and y
-// and consists of two terms: {x: 1, y: 1} + {x: 2, y: 2}.
+// - Unit is an environment set consisting of a single, empty environment (no
+// bindings).
+//
+// From these, we build up more complex sets of environments using sums and
+// cross products:
+//
+// - A sum is simply the union of the two environment sets.
+//
+// - A cross product is the Cartesian product of the two environment sets,
+// followed by combining each pair of environments. Combining simply merges the
+// two mappings, but fails if the mappings overlap.
+//
+// For example, to represent {{x: 1, y: 1}, {x: 2, y: 2}}, we build the two
+// environments and sum them:
+//
+//	({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2})
 //
 // If we add a third variable z that can be 1 or 2, independent of x and y, we
 // get four logical environments:
@@ -40,43 +47,59 @@ import (
 //	{x: 1, y: 1, z: 2}
 //	{x: 2, y: 2, z: 2}
 //
-// This could be represented as a single factor that is the sum of these four
-// detEnvs, but because z is independent, it can be a separate factor. Hence,
-// the most compact representation of this environment is:
+// This could be represented as a sum of all four environments, but because z is
+// independent, we can use a more compact representation:
+//
+//	(({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2})) ⨯ ({z: 1} + {z: 2})
 //
-//	({x: 1, y: 1} + {x: 2, y: 2}) ⨯ ({z: 1} + {z: 2})
+// Environment sets obey commutative algebra rules:
 //
-// That is, two factors, where each is the sum of two terms.
+//	e + 0 = e
+//	e ⨯ 0 = 0
+//	e ⨯ 1 =
+//	e + f = f + e
+//	e ⨯ f = f ⨯ e
 type nonDetEnv struct {
-	// factors is a list of the multiplicative factors in this environment. The
-	// set of deterministic environments is the cross-product of these factors.
-	// All factors must have disjoint variables.
-	factors []*envSum
+	root *envExpr
 }
 
-// envSum is a sum of deterministic environments, all with the same set of
-// variables.
-type envSum struct {
-	ids   []*ident // TODO: Do we ever use this as a slice? Should it be a map?
-	terms []detEnv
-}
+type envExpr struct {
+	// TODO: A tree-based data structure for this may not be ideal, since it
+	// involves a lot of walking to find things and we often have to do deep
+	// rewrites anyway for partitioning. Would some flattened array-style
+	// representation be better, possibly combined with an index of ident uses?
+	// We could even combine that with an immutable array abstraction (ala
+	// Clojure) that could enable more efficient construction operations.
+
+	kind envExprKind
+
+	// For envBinding
+	id  *ident
+	val *Value
 
-type detEnv struct {
-	vals []*Value // Indexes correspond to envSum.ids
+	// For sum or product. Len must be >= 2 and none of the elements can have
+	// the same kind as this node.
+	operands []*envExpr
 }
 
+type envExprKind byte
+
+const (
+	envZero envExprKind = iota
+	envUnit
+	envProduct
+	envSum
+	envBinding
+)
+
 var (
-	// zeroEnvFactor is the "0" value of an [envSum]. It's a a factor with no
-	// sum terms. This is easiest to think of as: an empty sum must be the
-	// additive identity, 0.
-	zeroEnvFactor = &envSum{}
-
-	// topEnv is the algebraic one value of a [nonDetEnv]. It has no factors
-	// because the product of no factors is the multiplicative identity.
-	topEnv = nonDetEnv{}
-	// bottomEnv is the algebraic zero value of a [nonDetEnv]. The product of
-	// bottomEnv with x is bottomEnv, and the sum of bottomEnv with y is y.
-	bottomEnv = nonDetEnv{factors: []*envSum{zeroEnvFactor}}
+	// topEnv is the unit value (multiplicative identity) of a [nonDetEnv].
+	topEnv = nonDetEnv{envExprUnit}
+	// bottomEnv is the zero value (additive identity) of a [nonDetEnv].
+	bottomEnv = nonDetEnv{envExprZero}
+
+	envExprZero = &envExpr{kind: envZero}
+	envExprUnit = &envExpr{kind: envUnit}
 )
 
 // bind binds id to each of vals in e.
@@ -90,232 +113,132 @@ func (e nonDetEnv) bind(id *ident, vals ...*Value) nonDetEnv {
 		return bottomEnv
 	}
 
-	// TODO: If any of vals are _, should we just not do anything? We're kind of
+	// TODO: If any of vals are _, should we just drop that val? We're kind of
 	// inconsistent about whether an id missing from e means id is invalid or
 	// means id is _.
 
 	// Check that id isn't present in e.
-	for _, f := range e.factors {
-		if slices.Contains(f.ids, id) {
-			panic("id " + id.name + " already present in environment")
-		}
+	for range e.root.bindings(id) {
+		panic("id " + id.name + " already present in environment")
 	}
 
-	// Create the new sum term.
-	sum := &envSum{ids: []*ident{id}}
+	// Create a sum of all the values.
+	bindings := make([]*envExpr, 0, 1)
 	for _, val := range vals {
-		sum.terms = append(sum.terms, detEnv{vals: []*Value{val}})
+		bindings = append(bindings, &envExpr{kind: envBinding, id: id, val: val})
 	}
+
 	// Multiply it in.
-	factors := append(e.factors[:len(e.factors):len(e.factors)], sum)
-	return nonDetEnv{factors}
+	return nonDetEnv{newEnvExprProduct(e.root, newEnvExprSum(bindings...))}
 }
 
 func (e nonDetEnv) isBottom() bool {
-	if len(e.factors) == 0 {
-		// This is top.
-		return false
-	}
-	return len(e.factors[0].terms) == 0
+	return e.root.kind == envZero
 }
 
-func (e nonDetEnv) vars() iter.Seq[*ident] {
-	return func(yield func(*ident) bool) {
-		for _, t := range e.factors {
-			for _, id := range t.ids {
-				if !yield(id) {
-					return
+// bindings yields all [envBinding] nodes in e with the given id. If id is nil,
+// it yields all binding nodes.
+func (e *envExpr) bindings(id *ident) iter.Seq[*envExpr] {
+	// This is just a pre-order walk and it happens this is the only thing we
+	// need a pre-order walk for.
+	return func(yield func(*envExpr) bool) {
+		var rec func(e *envExpr) bool
+		rec = func(e *envExpr) bool {
+			if e.kind == envBinding && (id == nil || e.id == id) {
+				if !yield(e) {
+					return false
 				}
 			}
-		}
-	}
-}
-
-// all enumerates all deterministic environments in e.
-//
-// The result slice is in the same order as the slice returned by
-// [nonDetEnv2.vars]. The slice is reused between iterations.
-func (e nonDetEnv) all() iter.Seq[[]*Value] {
-	return func(yield func([]*Value) bool) {
-		var vals []*Value
-		var walk func(int) bool
-		walk = func(i int) bool {
-			if i == len(e.factors) {
-				return yield(vals)
-			}
-			start := len(vals)
-			for _, term := range e.factors[i].terms {
-				vals = append(vals[:start], term.vals...)
-				if !walk(i + 1) {
+			for _, o := range e.operands {
+				if !rec(o) {
 					return false
 				}
 			}
 			return true
 		}
-		walk(0)
+		rec(e)
 	}
 }
 
-// allOrdered is like all, but idOrder controls the order of the values in the
-// resulting slice. Any [ident]s in idOrder that are missing from e are set to
-// topValue. The values of idOrder must be a bijection with [0, n).
-func (e nonDetEnv) allOrdered(idOrder map[*ident]int) iter.Seq[[]*Value] {
-	valsLen := 0
-	for _, idx := range idOrder {
-		valsLen = max(valsLen, idx+1)
-	}
-
-	return func(yield func([]*Value) bool) {
-		vals := make([]*Value, valsLen)
-		// e may not have all of the IDs in idOrder. Make sure any missing
-		// values are top.
-		for i := range vals {
-			vals[i] = topValue
-		}
-		var walk func(int) bool
-		walk = func(i int) bool {
-			if i == len(e.factors) {
-				return yield(vals)
-			}
-			for _, term := range e.factors[i].terms {
-				for j, id := range e.factors[i].ids {
-					vals[idOrder[id]] = term.vals[j]
-				}
-				if !walk(i + 1) {
-					return false
-				}
-			}
-			return true
+// newEnvExprProduct constructs a product node from exprs, performing
+// simplifications. It does NOT check that bindings are disjoint.
+func newEnvExprProduct(exprs ...*envExpr) *envExpr {
+	factors := make([]*envExpr, 0, 2)
+	for _, expr := range exprs {
+		switch expr.kind {
+		case envZero:
+			return envExprZero
+		case envUnit:
+			// No effect on product
+		case envProduct:
+			factors = append(factors, expr.operands...)
+		default:
+			factors = append(factors, expr)
 		}
-		walk(0)
 	}
-}
 
-func crossEnvs(envs ...nonDetEnv) nonDetEnv {
-	// Combine the factors of envs
-	var factors []*envSum
-	haveIDs := map[*ident]struct{}{}
-	for _, e := range envs {
-		if e.isBottom() {
-			// The environment is bottom, so the whole product goes to
-			// bottom.
-			return bottomEnv
-		}
-		// Check that all ids are disjoint.
-		for _, f := range e.factors {
-			for _, id := range f.ids {
-				if _, ok := haveIDs[id]; ok {
-					panic("conflict on " + id.name)
-				}
-				haveIDs[id] = struct{}{}
-			}
-		}
-		// Everything checks out. Multiply the factors.
-		factors = append(factors, e.factors...)
+	if len(factors) == 0 {
+		return envExprUnit
+	} else if len(factors) == 1 {
+		return factors[0]
 	}
-	return nonDetEnv{factors: factors}
+	return &envExpr{kind: envProduct, operands: factors}
 }
 
-func sumEnvs(envs ...nonDetEnv) nonDetEnv {
-	// nonDetEnv is a product at the top level, so we implement summation using
-	// the distributive law. We also use associativity to keep as many top-level
-	// factors as we can, since those are what keep the environment compact.
-	//
-	// a * b * c + a * d         (where a, b, c, and d are factors)
-	//                           (combine common factors)
-	//   = a * (b * c + d)
-	//                           (expand factors into their sum terms)
-	//   = a * ((b_1 + b_2 + ...) * (c_1 + c_2 + ...) + d)
-	//                           (where b_i and c_i are deterministic environments)
-	//                           (FOIL)
-	//   = a * (b_1 * c_1 + b_1 * c_2 + b_2 * c_1 + b_2 * c2 + d)
-	//                           (all factors are now in canonical form)
-	//   = a * e
-	//
-	// The product of two deterministic environments is a deterministic
-	// environment, and the sum of deterministic environments is a factor, so
-	// this process results in the canonical product-of-sums form.
-	//
-	// TODO: This is a bit of a one-way process. We could try to factor the
-	// environment to reduce the number of sums. I'm not sure how to do this
-	// efficiently. It might be possible to guide it by gathering the
-	// distributions of each ID's bindings. E.g., if there are 12 deterministic
-	// environments in a sum and $x is bound to 4 different values, each 3
-	// times, then it *might* be possible to factor out $x into a 4-way sum of
-	// its own.
-
-	factors, toSum := commonFactors(envs)
-
-	if len(toSum) > 0 {
-		// Collect all IDs into a single order.
-		var ids []*ident
-		idOrder := make(map[*ident]int)
-		for _, e := range toSum {
-			for v := range e.vars() {
-				if _, ok := idOrder[v]; !ok {
-					idOrder[v] = len(ids)
-					ids = append(ids, v)
+// newEnvExprSum constructs a sum node from exprs, performing simplifications.
+func newEnvExprSum(exprs ...*envExpr) *envExpr {
+	// TODO: If all of envs are products (or bindings), factor any common terms.
+	// E.g., x * y + x * z ==> x * (y + z). This is easy to do for binding
+	// terms, but harder to do for more general terms.
+
+	var have smallSet[*envExpr]
+	terms := make([]*envExpr, 0, 2)
+	for _, expr := range exprs {
+		switch expr.kind {
+		case envZero:
+			// No effect on sum
+		case envSum:
+			for _, expr1 := range expr.operands {
+				if have.Add(expr1) {
+					terms = append(terms, expr1)
 				}
 			}
-		}
-
-		// Flatten out each term in the sum.
-		var summands []detEnv
-		for _, env := range toSum {
-			for vals := range env.allOrdered(idOrder) {
-				summands = append(summands, detEnv{vals: slices.Clone(vals)})
+		default:
+			if have.Add(expr) {
+				terms = append(terms, expr)
 			}
 		}
-		factors = append(factors, &envSum{ids: ids, terms: summands})
 	}
 
-	return nonDetEnv{factors: factors}
-}
-
-// commonFactors finds common factors that can be factored out of a summation of
-// [nonDetEnv]s.
-func commonFactors(envs []nonDetEnv) (common []*envSum, toSum []nonDetEnv) {
-	// Drop any bottom environments. They don't contribute to the sum and they
-	// would complicate some logic below.
-	envs = slices.DeleteFunc(envs, func(e nonDetEnv) bool {
-		return e.isBottom()
-	})
-	if len(envs) == 0 {
-		return bottomEnv.factors, nil
+	if len(terms) == 0 {
+		return envExprZero
+	} else if len(terms) == 1 {
+		return terms[0]
 	}
+	return &envExpr{kind: envSum, operands: terms}
+}
 
-	// It's very common that the exact same factor will appear across all envs.
-	// Keep those factored out.
-	//
-	// TODO: Is it also common to have vars that are bound to the same value
-	// across all envs? If so, we could also factor those into common terms.
-	counts := map[*envSum]int{}
-	for _, e := range envs {
-		for _, f := range e.factors {
-			counts[f]++
-		}
+func crossEnvs(env1, env2 nonDetEnv) nonDetEnv {
+	// Confirm that envs have disjoint idents.
+	var ids1 smallSet[*ident]
+	for e := range env1.root.bindings(nil) {
+		ids1.Add(e.id)
 	}
-	for _, f := range envs[0].factors {
-		if counts[f] == len(envs) {
-			// Common factor
-			common = append(common, f)
+	for e := range env2.root.bindings(nil) {
+		if ids1.Has(e.id) {
+			panic(fmt.Sprintf("%s bound on both sides of cross-product", e.id.name))
 		}
 	}
 
-	// Any other factors need to be multiplied out.
-	for _, env := range envs {
-		var newFactors []*envSum
-		for _, f := range env.factors {
-			if counts[f] != len(envs) {
-				newFactors = append(newFactors, f)
-			}
-		}
-		if len(newFactors) > 0 {
-			toSum = append(toSum, nonDetEnv{factors: newFactors})
-		}
-	}
+	return nonDetEnv{newEnvExprProduct(env1.root, env2.root)}
+}
 
-	return common, toSum
+func sumEnvs(envs ...nonDetEnv) nonDetEnv {
+	exprs := make([]*envExpr, len(envs))
+	for i := range envs {
+		exprs[i] = envs[i].root
+	}
+	return nonDetEnv{newEnvExprSum(exprs...)}
 }
 
 // envPartition is a subset of an env where id is bound to value in all
@@ -326,69 +249,125 @@ type envPartition struct {
 	env   nonDetEnv
 }
 
+// partitionBy splits e by distinct bindings of id and removes id from each
+// partition.
+//
+// If there are environments in e where id is not bound, they will not be
+// reflected in any partition.
+//
+// It panics if e is bottom, since attempting to partition an empty environment
+// set almost certainly indicates a bug.
 func (e nonDetEnv) partitionBy(id *ident) []envPartition {
 	if e.isBottom() {
-		// Bottom contains all variables
-		return []envPartition{{id: id, value: bottomValue, env: e}}
+		// We could return zero partitions, but getting here at all almost
+		// certainly indicates a bug.
+		panic("cannot partition empty environment set")
 	}
 
-	// Find the factor containing id and id's index in that factor.
-	idFactor, idIndex := -1, -1
-	var newIDs []*ident
-	for factI, fact := range e.factors {
-		idI := slices.Index(fact.ids, id)
-		if idI < 0 {
+	// Emit a partition for each value of id.
+	var seen smallSet[*Value]
+	var parts []envPartition
+	for n := range e.root.bindings(id) {
+		if !seen.Add(n.val) {
+			// Already emitted a partition for this value.
 			continue
-		} else if idFactor != -1 {
-			panic("multiple factors containing id " + id.name)
-		} else {
-			idFactor, idIndex = factI, idI
-			// Drop id from this factor's IDs
-			newIDs = without(fact.ids, idI)
 		}
-	}
-	if idFactor == -1 {
-		panic("id " + id.name + " not found in environment")
-	}
 
-	// If id is the only term in its factor, then dropping it is equivalent to
-	// making the factor be the unit value, so we can just drop the factor. (And
-	// if this is the only factor, we'll arrive at [topEnv], which is exactly
-	// what we want!). In this case we can use the same nonDetEnv in all of the
-	// partitions.
-	isUnit := len(newIDs) == 0
-	var unitFactors []*envSum
-	if isUnit {
-		unitFactors = without(e.factors, idFactor)
+		parts = append(parts, envPartition{
+			id:    id,
+			value: n.val,
+			env:   nonDetEnv{e.root.substitute(id, n.val)},
+		})
 	}
 
-	// Create a partition for each distinct value of id.
-	var parts []envPartition
-	partIndex := map[*Value]int{}
-	for _, det := range e.factors[idFactor].terms {
-		val := det.vals[idIndex]
-		i, ok := partIndex[val]
-		if !ok {
-			i = len(parts)
-			var factors []*envSum
-			if isUnit {
-				factors = unitFactors
-			} else {
-				// Copy all other factor
-				factors = slices.Clone(e.factors)
-				factors[idFactor] = &envSum{ids: newIDs}
+	return parts
+}
+
+// substitute replaces bindings of id to val with 1 and bindings of id to any
+// other value with 0 and simplifies the result.
+func (e *envExpr) substitute(id *ident, val *Value) *envExpr {
+	switch e.kind {
+	default:
+		panic("bad kind")
+
+	case envZero, envUnit:
+		return e
+
+	case envBinding:
+		if e.id != id {
+			return e
+		} else if e.val != val {
+			return envExprZero
+		} else {
+			return envExprUnit
+		}
+
+	case envProduct, envSum:
+		// Substitute each operand. Sometimes, this won't change anything, so we
+		// build the new operands list lazily.
+		var nOperands []*envExpr
+		for i, op := range e.operands {
+			nOp := op.substitute(id, val)
+			if nOperands == nil && op != nOp {
+				// Operand diverged; initialize nOperands.
+				nOperands = make([]*envExpr, 0, len(e.operands))
+				nOperands = append(nOperands, e.operands[:i]...)
+			}
+			if nOperands != nil {
+				nOperands = append(nOperands, nOp)
 			}
-			parts = append(parts, envPartition{id: id, value: val, env: nonDetEnv{factors: factors}})
-			partIndex[val] = i
 		}
+		if nOperands == nil {
+			// Nothing changed.
+			return e
+		}
+		if e.kind == envProduct {
+			return newEnvExprProduct(nOperands...)
+		} else {
+			return newEnvExprSum(nOperands...)
+		}
+	}
+}
+
+// A smallSet is a set optimized for stack allocation when small.
+type smallSet[T comparable] struct {
+	array [32]T
+	n     int
 
-		if !isUnit {
-			factor := parts[i].env.factors[idFactor]
-			newVals := without(det.vals, idIndex)
-			factor.terms = append(factor.terms, detEnv{vals: newVals})
+	m map[T]struct{}
+}
+
+// Has returns whether val is in set.
+func (s *smallSet[T]) Has(val T) bool {
+	arr := s.array[:s.n]
+	for i := range arr {
+		if arr[i] == val {
+			return true
 		}
 	}
-	return parts
+	_, ok := s.m[val]
+	return ok
+}
+
+// Add adds val to the set and returns true if it was added (not already
+// present).
+func (s *smallSet[T]) Add(val T) bool {
+	// Test for presence.
+	if s.Has(val) {
+		return false
+	}
+
+	// Add it
+	if s.n < len(s.array) {
+		s.array[s.n] = val
+		s.n++
+	} else {
+		if s.m == nil {
+			s.m = make(map[T]struct{})
+		}
+		s.m[val] = struct{}{}
+	}
+	return true
 }
 
 type ident struct {
@@ -494,7 +473,3 @@ func (p *identPrinter) slice(ids []*ident) string {
 	}
 	return fmt.Sprintf("[%s]", strings.Join(strs, ", "))
 }
-
-func without[Elt any](s []Elt, i int) []Elt {
-	return append(s[:i:i], s[i+1:]...)
-}
diff --git a/internal/unify/html.go b/internal/unify/html.go
index d2434fe4..d59bd8fc 100644
--- a/internal/unify/html.go
+++ b/internal/unify/html.go
@@ -52,7 +52,7 @@ const htmlCSS = `
 type htmlTracer struct {
 	w    io.Writer
 	dot  *dotEncoder
-	svgs map[*Value]string
+	svgs map[any]string
 }
 
 func (t *htmlTracer) writeTree(node *traceTree) {
@@ -91,19 +91,19 @@ func (t *htmlTracer) writeTree(node *traceTree) {
 	}
 }
 
-func (t *htmlTracer) svg(v *Value) string {
-	if s, ok := t.svgs[v]; ok {
+func htmlSVG[Key comparable](t *htmlTracer, f func(Key), arg Key) string {
+	if s, ok := t.svgs[arg]; ok {
 		return s
 	}
 	var buf strings.Builder
-	t.dot.subgraph(v)
+	f(arg)
 	t.dot.writeSvg(&buf)
 	t.dot.clear()
 	svg := buf.String()
 	if t.svgs == nil {
-		t.svgs = make(map[*Value]string)
+		t.svgs = make(map[any]string)
 	}
-	t.svgs[v] = svg
+	t.svgs[arg] = svg
 	buf.Reset()
 	return svg
 }
@@ -112,79 +112,12 @@ func (t *htmlTracer) emit(vs []*Value, labels []string, env nonDetEnv) {
 	fmt.Fprintf(t.w, `<div class="unify">`)
 	for i, v := range vs {
 		fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">%s</div>`, i+1, html.EscapeString(labels[i]))
-		fmt.Fprintf(t.w, `<div style="grid-area: 2 / %d">%s</div>`, i+1, t.svg(v))
+		fmt.Fprintf(t.w, `<div style="grid-area: 2 / %d">%s</div>`, i+1, htmlSVG(t, t.dot.valueSubgraph, v))
 	}
+	col := len(vs)
 
-	t.emitEnv(env, len(vs))
+	fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">in</div>`, col+1)
+	fmt.Fprintf(t.w, `<div style="grid-area: 2 / %d">%s</div>`, col+1, htmlSVG(t, t.dot.envSubgraph, env))
 
 	fmt.Fprintf(t.w, `</div>`)
 }
-
-func (t *htmlTracer) emitEnv(env nonDetEnv, colStart int) {
-	if env.isBottom() {
-		fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">_|_</div>`, colStart+1)
-		return
-	}
-
-	colLimit := 10
-	col := colStart
-	for i, f := range env.factors {
-		if i > 0 {
-			// Print * between each factor.
-			fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">&times;</div>`, col+1)
-			col++
-		}
-
-		var idCols []int
-		for i, id := range f.ids {
-			var str string
-			if i == 0 && len(f.ids) > 1 {
-				str = "("
-			}
-			if colLimit <= 0 {
-				str += "..."
-			} else {
-				str += html.EscapeString(t.dot.idp.unique(id))
-			}
-			if (i == len(f.ids)-1 || colLimit <= 0) && len(f.ids) > 1 {
-				str += ")"
-			}
-
-			fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">%s</div>`, col+1, str)
-			idCols = append(idCols, col)
-
-			col++
-			if colLimit <= 0 {
-				break
-			}
-			colLimit--
-		}
-
-		fmt.Fprintf(t.w, `<div class="envFactor" style="grid-area: 2 / %d / 3 / %d">`, idCols[0]+1, col+1)
-		rowLimit := 10
-		row := 0
-		for _, term := range f.terms {
-			// TODO: Print + between rows? With some horizontal something to
-			// make it clear what it applies across?
-
-			for i, val := range term.vals {
-				fmt.Fprintf(t.w, `<div style="grid-area: %d / %d">`, row+1, idCols[i]-idCols[0]+1)
-				if i < len(term.vals)-1 && i == len(idCols)-1 {
-					fmt.Fprintf(t.w, `...</div>`)
-					break
-				} else if rowLimit <= 0 {
-					fmt.Fprintf(t.w, `...</div>`)
-				} else {
-					fmt.Fprintf(t.w, `%s</div>`, t.svg(val))
-				}
-			}
-
-			row++
-			if rowLimit <= 0 {
-				break
-			}
-			rowLimit--
-		}
-		fmt.Fprintf(t.w, `</div>`)
-	}
-}
diff --git a/internal/unify/testdata/stress.yaml b/internal/unify/testdata/stress.yaml
new file mode 100644
index 00000000..e4478536
--- /dev/null
+++ b/internal/unify/testdata/stress.yaml
@@ -0,0 +1,33 @@
+# In the original representation of environments, this caused an exponential
+# blowup in time and allocation. With that representation, this took about 20
+# seconds on my laptop and had a max RSS of ~12 GB. Big enough to be really
+# noticeable, but not so big it's likely to crash a developer machine. With the
+# better environment representation, it runs almost instantly and has an RSS of
+# ~90 MB.
+unify:
+- !sum
+  - !sum [1, 2]
+  - !sum [3, 4]
+  - !sum [5, 6]
+  - !sum [7, 8]
+  - !sum [9, 10]
+  - !sum [11, 12]
+  - !sum [13, 14]
+  - !sum [15, 16]
+  - !sum [17, 18]
+  - !sum [19, 20]
+  - !sum [21, 22]
+- !sum
+  - !sum [1, 2]
+  - !sum [3, 4]
+  - !sum [5, 6]
+  - !sum [7, 8]
+  - !sum [9, 10]
+  - !sum [11, 12]
+  - !sum [13, 14]
+  - !sum [15, 16]
+  - !sum [17, 18]
+  - !sum [19, 20]
+  - !sum [21, 22]
+all:
+  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go
index 1b1c8139..ff5115f7 100644
--- a/internal/unify/yaml.go
+++ b/internal/unify/yaml.go
@@ -93,7 +93,7 @@ func (c *Closure) Unmarshal(r io.Reader, opts UnmarshalOpts) error {
 }
 
 func (c *Closure) unmarshal(node *yaml.Node, opts UnmarshalOpts) error {
-	dec := &yamlDecoder{opts: opts, vars: make(map[string]*ident)}
+	dec := &yamlDecoder{opts: opts, vars: make(map[string]*ident), env: topEnv}
 	val, err := dec.value(node)
 	if err != nil {
 		return err
@@ -349,25 +349,35 @@ func (enc *yamlEncoder) closure(c Closure) *yaml.Node {
 }
 
 func (enc *yamlEncoder) env(e nonDetEnv) *yaml.Node {
-	var n yaml.Node
-	n.Kind = yaml.SequenceNode
-	n.Tag = "!env"
-	for _, term := range e.factors {
-		var nTerm yaml.Node
-		n.Content = append(n.Content, &nTerm)
-		nTerm.Kind = yaml.SequenceNode
-		for _, det := range term.terms {
-			var nDet yaml.Node
-			nTerm.Content = append(nTerm.Content, &nDet)
-			nDet.Kind = yaml.MappingNode
-			for i, val := range det.vals {
-				var nLabel yaml.Node
-				nLabel.SetString(enc.idp.unique(term.ids[i]))
-				nDet.Content = append(nDet.Content, &nLabel, enc.value(val))
+	var encode func(e *envExpr) *yaml.Node
+	encode = func(e *envExpr) *yaml.Node {
+		var n yaml.Node
+		switch e.kind {
+		default:
+			panic("bad kind")
+		case envZero:
+			n.SetString("0")
+		case envUnit:
+			n.SetString("1")
+		case envBinding:
+			var id yaml.Node
+			id.SetString(enc.idp.unique(e.id))
+			n.Kind = yaml.MappingNode
+			n.Content = []*yaml.Node{&id, enc.value(e.val)}
+		case envProduct, envSum:
+			n.Kind = yaml.SequenceNode
+			if e.kind == envProduct {
+				n.Tag = "!product"
+			} else {
+				n.Tag = "!sum"
+			}
+			for _, e2 := range e.operands {
+				n.Content = append(n.Content, encode(e2))
 			}
 		}
+		return &n
 	}
-	return &n
+	return encode(e.root)
 }
 
 var yamlIntRe = regexp.MustCompile(`^-?[0-9]+$`)

From 0a9327541e7556eb6439fd93c5d825d1f9205723 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 5 Aug 2025 14:17:06 -0400
Subject: [PATCH 173/200] internal/unify: rename nonDetEnv -> envSet

Leaning into the "this is just a set" view, we also rename isBottom to
isEmpty and sumEnvs to unionEnvs.

Change-Id: Ib69c99995b7fd0944eab88721eefdb28e3edecee
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693339
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/unify/closure.go | 18 ++++++++--------
 internal/unify/domain.go  |  4 ++--
 internal/unify/dot.go     |  2 +-
 internal/unify/env.go     | 44 +++++++++++++++++++--------------------
 internal/unify/html.go    |  2 +-
 internal/unify/trace.go   |  8 +++----
 internal/unify/unify.go   | 22 ++++++++++----------
 internal/unify/yaml.go    | 12 +++++------
 8 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/internal/unify/closure.go b/internal/unify/closure.go
index 8a1636de..5b654879 100644
--- a/internal/unify/closure.go
+++ b/internal/unify/closure.go
@@ -13,7 +13,7 @@ import (
 
 type Closure struct {
 	val *Value
-	env nonDetEnv
+	env envSet
 }
 
 func NewSum(vs ...*Value) Closure {
@@ -67,13 +67,13 @@ func (c Closure) All() iter.Seq[*Value] {
 	// continuation for each choice. Similar to a yield function, the
 	// continuation can return false to stop the non-deterministic walk.
 	return func(yield func(*Value) bool) {
-		c.val.all1(c.env, func(v *Value, e nonDetEnv) bool {
+		c.val.all1(c.env, func(v *Value, e envSet) bool {
 			return yield(v)
 		})
 	}
 }
 
-func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool {
+func (v *Value) all1(e envSet, cont func(*Value, envSet) bool) bool {
 	switch d := v.Domain.(type) {
 	default:
 		panic(fmt.Sprintf("unknown domain type %T", d))
@@ -93,8 +93,8 @@ func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool {
 
 		// TODO: If there are no Vars or Sums under this Def, then nothing can
 		// change the Value or env, so we could just cont(v, e).
-		var allElt func(elt int, e nonDetEnv) bool
-		allElt = func(elt int, e nonDetEnv) bool {
+		var allElt func(elt int, e envSet) bool
+		allElt = func(elt int, e envSet) bool {
 			if elt == len(fields) {
 				// Build a new Def from the concrete parts. Clone parts because
 				// we may reuse it on other non-deterministic branches.
@@ -102,7 +102,7 @@ func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool {
 				return cont(nVal, e)
 			}
 
-			return d.fields[fields[elt]].all1(e, func(v *Value, e nonDetEnv) bool {
+			return d.fields[fields[elt]].all1(e, func(v *Value, e envSet) bool {
 				parts[fields[elt]] = v
 				return allElt(elt+1, e)
 			})
@@ -116,8 +116,8 @@ func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool {
 			return cont(v, e)
 		}
 		parts := make([]*Value, len(d.vs))
-		var allElt func(elt int, e nonDetEnv) bool
-		allElt = func(elt int, e nonDetEnv) bool {
+		var allElt func(elt int, e envSet) bool
+		allElt = func(elt int, e envSet) bool {
 			if elt == len(d.vs) {
 				// Build a new tuple from the concrete parts. Clone parts because
 				// we may reuse it on other non-deterministic branches.
@@ -125,7 +125,7 @@ func (v *Value) all1(e nonDetEnv, cont func(*Value, nonDetEnv) bool) bool {
 				return cont(nVal, e)
 			}
 
-			return d.vs[elt].all1(e, func(v *Value, e nonDetEnv) bool {
+			return d.vs[elt].all1(e, func(v *Value, e envSet) bool {
 				parts[elt] = v
 				return allElt(elt+1, e)
 			})
diff --git a/internal/unify/domain.go b/internal/unify/domain.go
index 36239054..5c4d349f 100644
--- a/internal/unify/domain.go
+++ b/internal/unify/domain.go
@@ -196,14 +196,14 @@ type Tuple struct {
 	// function because we don't necessarily want *exactly* the same Value
 	// repeated. For example, in YAML encoding, a !sum in a repeated tuple needs
 	// a fresh variable in each instance.
-	repeat []func(nonDetEnv) (*Value, nonDetEnv)
+	repeat []func(envSet) (*Value, envSet)
 }
 
 func NewTuple(vs ...*Value) Tuple {
 	return Tuple{vs: vs}
 }
 
-func NewRepeat(gens ...func(nonDetEnv) (*Value, nonDetEnv)) Tuple {
+func NewRepeat(gens ...func(envSet) (*Value, envSet)) Tuple {
 	return Tuple{repeat: gens}
 }
 
diff --git a/internal/unify/dot.go b/internal/unify/dot.go
index a26b9dad..6fafa252 100644
--- a/internal/unify/dot.go
+++ b/internal/unify/dot.go
@@ -183,7 +183,7 @@ func (enc *dotEncoder) value(v *Value) string {
 	}
 }
 
-func (enc *dotEncoder) envSubgraph(e nonDetEnv) {
+func (enc *dotEncoder) envSubgraph(e envSet) {
 	enc.valLimit = maxNodes
 	cID := enc.newID("cluster_%d")
 	fmt.Fprintf(enc.w, "subgraph %s {\n", cID)
diff --git a/internal/unify/env.go b/internal/unify/env.go
index 0f45af39..b9989dd2 100644
--- a/internal/unify/env.go
+++ b/internal/unify/env.go
@@ -11,7 +11,7 @@ import (
 	"strings"
 )
 
-// A nonDetEnv is an immutable set of environments, where each environment is a
+// An envSet is an immutable set of environments, where each environment is a
 // mapping from [ident]s to [Value]s.
 //
 // To keep this compact, we use an algebraic representation similar to
@@ -59,7 +59,7 @@ import (
 //	e ⨯ 1 =
 //	e + f = f + e
 //	e ⨯ f = f ⨯ e
-type nonDetEnv struct {
+type envSet struct {
 	root *envExpr
 }
 
@@ -93,10 +93,10 @@ const (
 )
 
 var (
-	// topEnv is the unit value (multiplicative identity) of a [nonDetEnv].
-	topEnv = nonDetEnv{envExprUnit}
-	// bottomEnv is the zero value (additive identity) of a [nonDetEnv].
-	bottomEnv = nonDetEnv{envExprZero}
+	// topEnv is the unit value (multiplicative identity) of a [envSet].
+	topEnv = envSet{envExprUnit}
+	// bottomEnv is the zero value (additive identity) of a [envSet].
+	bottomEnv = envSet{envExprZero}
 
 	envExprZero = &envExpr{kind: envZero}
 	envExprUnit = &envExpr{kind: envUnit}
@@ -108,8 +108,8 @@ var (
 //
 // Environments are typically initially constructed by starting with [topEnv]
 // and calling bind one or more times.
-func (e nonDetEnv) bind(id *ident, vals ...*Value) nonDetEnv {
-	if e.isBottom() {
+func (e envSet) bind(id *ident, vals ...*Value) envSet {
+	if e.isEmpty() {
 		return bottomEnv
 	}
 
@@ -129,10 +129,10 @@ func (e nonDetEnv) bind(id *ident, vals ...*Value) nonDetEnv {
 	}
 
 	// Multiply it in.
-	return nonDetEnv{newEnvExprProduct(e.root, newEnvExprSum(bindings...))}
+	return envSet{newEnvExprProduct(e.root, newEnvExprSum(bindings...))}
 }
 
-func (e nonDetEnv) isBottom() bool {
+func (e envSet) isEmpty() bool {
 	return e.root.kind == envZero
 }
 
@@ -218,7 +218,7 @@ func newEnvExprSum(exprs ...*envExpr) *envExpr {
 	return &envExpr{kind: envSum, operands: terms}
 }
 
-func crossEnvs(env1, env2 nonDetEnv) nonDetEnv {
+func crossEnvs(env1, env2 envSet) envSet {
 	// Confirm that envs have disjoint idents.
 	var ids1 smallSet[*ident]
 	for e := range env1.root.bindings(nil) {
@@ -230,15 +230,15 @@ func crossEnvs(env1, env2 nonDetEnv) nonDetEnv {
 		}
 	}
 
-	return nonDetEnv{newEnvExprProduct(env1.root, env2.root)}
+	return envSet{newEnvExprProduct(env1.root, env2.root)}
 }
 
-func sumEnvs(envs ...nonDetEnv) nonDetEnv {
+func unionEnvs(envs ...envSet) envSet {
 	exprs := make([]*envExpr, len(envs))
 	for i := range envs {
 		exprs[i] = envs[i].root
 	}
-	return nonDetEnv{newEnvExprSum(exprs...)}
+	return envSet{newEnvExprSum(exprs...)}
 }
 
 // envPartition is a subset of an env where id is bound to value in all
@@ -246,7 +246,7 @@ func sumEnvs(envs ...nonDetEnv) nonDetEnv {
 type envPartition struct {
 	id    *ident
 	value *Value
-	env   nonDetEnv
+	env   envSet
 }
 
 // partitionBy splits e by distinct bindings of id and removes id from each
@@ -257,8 +257,8 @@ type envPartition struct {
 //
 // It panics if e is bottom, since attempting to partition an empty environment
 // set almost certainly indicates a bug.
-func (e nonDetEnv) partitionBy(id *ident) []envPartition {
-	if e.isBottom() {
+func (e envSet) partitionBy(id *ident) []envPartition {
+	if e.isEmpty() {
 		// We could return zero partitions, but getting here at all almost
 		// certainly indicates a bug.
 		panic("cannot partition empty environment set")
@@ -276,7 +276,7 @@ func (e nonDetEnv) partitionBy(id *ident) []envPartition {
 		parts = append(parts, envPartition{
 			id:    id,
 			value: n.val,
-			env:   nonDetEnv{e.root.substitute(id, n.val)},
+			env:   envSet{e.root.substitute(id, n.val)},
 		})
 	}
 
@@ -388,7 +388,7 @@ func (d Var) decode(rv reflect.Value) error {
 	return &inexactError{"var", rv.Type().String()}
 }
 
-func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) {
+func (d Var) unify(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
 	// TODO: Vars from !sums in the input can have a huge number of values.
 	// Unifying these could be way more efficient with some indexes over any
 	// exact values we can pull out, like Def fields that are exact Strings.
@@ -409,7 +409,7 @@ func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDe
 	// We need to unify w with the value of d in each possible environment. We
 	// can save some work by grouping environments by the value of d, since
 	// there will be a lot of redundancy here.
-	var nEnvs []nonDetEnv
+	var nEnvs []envSet
 	envParts := e.partitionBy(d.id)
 	for i, envPart := range envParts {
 		exit := uf.enterVar(d.id, i)
@@ -419,7 +419,7 @@ func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDe
 		res, e2, err := w.unify(envPart.value, envPart.env, swap, uf)
 		exit.exit()
 		if err != nil {
-			return nil, nonDetEnv{}, err
+			return nil, envSet{}, err
 		}
 		if res.Domain == nil {
 			// This branch entirely failed to unify, so it's gone.
@@ -436,7 +436,7 @@ func (d Var) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDe
 
 	// The effect of this is entirely captured in the environment. We can return
 	// back the same Bind node.
-	return d, sumEnvs(nEnvs...), nil
+	return d, unionEnvs(nEnvs...), nil
 }
 
 // An identPrinter maps [ident]s to unique string names.
diff --git a/internal/unify/html.go b/internal/unify/html.go
index d59bd8fc..036b80e2 100644
--- a/internal/unify/html.go
+++ b/internal/unify/html.go
@@ -108,7 +108,7 @@ func htmlSVG[Key comparable](t *htmlTracer, f func(Key), arg Key) string {
 	return svg
 }
 
-func (t *htmlTracer) emit(vs []*Value, labels []string, env nonDetEnv) {
+func (t *htmlTracer) emit(vs []*Value, labels []string, env envSet) {
 	fmt.Fprintf(t.w, `<div class="unify">`)
 	for i, v := range vs {
 		fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">%s</div>`, i+1, html.EscapeString(labels[i]))
diff --git a/internal/unify/trace.go b/internal/unify/trace.go
index f1a7ea2c..b0aa3525 100644
--- a/internal/unify/trace.go
+++ b/internal/unify/trace.go
@@ -40,9 +40,9 @@ type tracer struct {
 type traceTree struct {
 	label string // Identifies this node as a child of parent
 	v, w  *Value // Unification inputs
-	envIn nonDetEnv
+	envIn envSet
 	res   *Value // Unification result
-	env   nonDetEnv
+	env   envSet
 	err   error // or error
 
 	parent   *traceTree
@@ -127,7 +127,7 @@ func (t *tracer) logf(pat string, vals ...any) {
 	fmt.Fprintf(t.logw, "%s\n", s)
 }
 
-func (t *tracer) traceUnify(v, w *Value, e nonDetEnv) {
+func (t *tracer) traceUnify(v, w *Value, e envSet) {
 	if t == nil {
 		return
 	}
@@ -146,7 +146,7 @@ func (t *tracer) traceUnify(v, w *Value, e nonDetEnv) {
 	}
 }
 
-func (t *tracer) traceDone(res *Value, e nonDetEnv, err error) {
+func (t *tracer) traceDone(res *Value, e envSet, err error) {
 	if t == nil {
 		return
 	}
diff --git a/internal/unify/unify.go b/internal/unify/unify.go
index 6ebed7bd..9d22bf19 100644
--- a/internal/unify/unify.go
+++ b/internal/unify/unify.go
@@ -103,7 +103,7 @@ func newUnifier() *unifier {
 // unify1 could not unify the domains of the two values.
 var errDomains = errors.New("cannot unify domains")
 
-func (v *Value) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (*Value, nonDetEnv, error) {
+func (v *Value) unify(w *Value, e envSet, swap bool, uf *unifier) (*Value, envSet, error) {
 	if swap {
 		// Put the values in order. This just happens to be a handy choke-point
 		// to do this at.
@@ -122,14 +122,14 @@ func (v *Value) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (*Value, no
 		}
 	}
 	if err != nil {
-		uf.traceDone(nil, nonDetEnv{}, err)
-		return nil, nonDetEnv{}, err
+		uf.traceDone(nil, envSet{}, err)
+		return nil, envSet{}, err
 	}
 	res := unified(d, v, w)
 	uf.traceDone(res, e2, nil)
 	if d == nil {
 		// Double check that a bottom Value also has a bottom env.
-		if !e2.isBottom() {
+		if !e2.isEmpty() {
 			panic("bottom Value has non-bottom environment")
 		}
 	}
@@ -137,7 +137,7 @@ func (v *Value) unify(w *Value, e nonDetEnv, swap bool, uf *unifier) (*Value, no
 	return res, e2, nil
 }
 
-func (v *Value) unify1(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) {
+func (v *Value) unify1(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
 	// TODO: If there's an error, attach position information to it.
 
 	vd, wd := v.Domain, w.Domain
@@ -180,10 +180,10 @@ func (v *Value) unify1(w *Value, e nonDetEnv, swap bool, uf *unifier) (Domain, n
 		}
 	}
 
-	return nil, nonDetEnv{}, errDomains
+	return nil, envSet{}, errDomains
 }
 
-func (d Def) unify(o Def, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) {
+func (d Def) unify(o Def, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
 	out := Def{fields: make(map[string]*Value)}
 
 	// Check keys of d against o.
@@ -198,7 +198,7 @@ func (d Def) unify(o Def, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEn
 		res, e2, err := dv.unify(ov, e, swap, uf)
 		exit.exit()
 		if err != nil {
-			return nil, nonDetEnv{}, err
+			return nil, envSet{}, err
 		} else if res.Domain == nil {
 			// No match.
 			return nil, bottomEnv, nil
@@ -216,7 +216,7 @@ func (d Def) unify(o Def, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEn
 	return out, e, nil
 }
 
-func (v Tuple) unify(w Tuple, e nonDetEnv, swap bool, uf *unifier) (Domain, nonDetEnv, error) {
+func (v Tuple) unify(w Tuple, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
 	if v.repeat != nil && w.repeat != nil {
 		// Since we generate the content of these lazily, there's not much we
 		// can do but just stick them on a list to unify later.
@@ -253,7 +253,7 @@ func (v Tuple) unify(w Tuple, e nonDetEnv, swap bool, uf *unifier) (Domain, nonD
 			z, e2, err := v1.unify(t.vs[i], e, swap, uf)
 			exit.exit()
 			if err != nil {
-				return nil, nonDetEnv{}, err
+				return nil, envSet{}, err
 			} else if z.Domain == nil {
 				return nil, bottomEnv, nil
 			}
@@ -268,7 +268,7 @@ func (v Tuple) unify(w Tuple, e nonDetEnv, swap bool, uf *unifier) (Domain, nonD
 
 // doRepeat creates a fixed-length tuple from a repeated tuple. The caller is
 // expected to unify the returned tuples.
-func (v Tuple) doRepeat(e nonDetEnv, n int) ([]Tuple, nonDetEnv) {
+func (v Tuple) doRepeat(e envSet, n int) ([]Tuple, envSet) {
 	res := make([]Tuple, len(v.repeat))
 	for i, gen := range v.repeat {
 		res[i].vs = make([]*Value, n)
diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go
index ff5115f7..7edc3d9d 100644
--- a/internal/unify/yaml.go
+++ b/internal/unify/yaml.go
@@ -112,7 +112,7 @@ type yamlDecoder struct {
 	vars  map[string]*ident
 	nSums int
 
-	env nonDetEnv
+	env envSet
 }
 
 func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
@@ -243,7 +243,7 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
 		// Decode the children to make sure they're well-formed, but otherwise
 		// discard that decoding and do it again every time we need a new
 		// element.
-		var gen []func(e nonDetEnv) (*Value, nonDetEnv)
+		var gen []func(e envSet) (*Value, envSet)
 		origEnv := dec.env
 		elts := node.Content
 		for i, elt := range elts {
@@ -256,7 +256,7 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
 			// introduced within the element.
 			dec.env = origEnv
 			// Add a generator function
-			gen = append(gen, func(e nonDetEnv) (*Value, nonDetEnv) {
+			gen = append(gen, func(e envSet) (*Value, envSet) {
 				dec.env = e
 				// TODO: If this is in a sum, this tends to generate a ton of
 				// fresh variables that are different on each branch of the
@@ -298,7 +298,7 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
 
 type yamlEncoder struct {
 	idp identPrinter
-	e   nonDetEnv // We track the environment for !repeat nodes.
+	e   envSet // We track the environment for !repeat nodes.
 }
 
 // TODO: Switch some Value marshaling to Closure?
@@ -344,11 +344,11 @@ func (enc *yamlEncoder) closure(c Closure) *yaml.Node {
 	// Fill in the env after we've written the value in case value encoding
 	// affects the env.
 	n.Content[1] = enc.env(enc.e)
-	enc.e = nonDetEnv{} // Allow GC'ing the env
+	enc.e = envSet{} // Allow GC'ing the env
 	return &n
 }
 
-func (enc *yamlEncoder) env(e nonDetEnv) *yaml.Node {
+func (enc *yamlEncoder) env(e envSet) *yaml.Node {
 	var encode func(e *envExpr) *yaml.Node
 	encode = func(e *envExpr) *yaml.Node {
 		var n yaml.Node

From 603ac80d1808fe60694cb2da7d6de67f3dbe7d02 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 29 Jul 2025 14:16:22 -0400
Subject: [PATCH 174/200] internal/unify: drop StringReplacer

We're not using it and it was probably a bad idea anyway.

Change-Id: I6c5ff0b5796a1adffe4889309dc08dd352d0fe56
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693340
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/unify/yaml.go | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go
index 7edc3d9d..281519de 100644
--- a/internal/unify/yaml.go
+++ b/internal/unify/yaml.go
@@ -19,10 +19,6 @@ import (
 type UnmarshalOpts struct {
 	// Path is the file path to store in the [Pos] of all [Value]s.
 	Path string
-
-	// StringReplacer, if non-nil, is called for each string value to perform
-	// any application-specific string interpolation.
-	StringReplacer func(string) string
 }
 
 // UnmarshalYAML unmarshals a YAML node into a Closure.
@@ -184,18 +180,12 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
 
 	case isExact():
 		val := node.Value
-		if dec.opts.StringReplacer != nil {
-			val = dec.opts.StringReplacer(val)
-		}
 		return mk(NewStringExact(val))
 
 	case isStr || is(yaml.ScalarNode, "!regex"):
 		// Any other string we treat as a regex. This will produce an exact
 		// string anyway if the regex is literal.
 		val := node.Value
-		if dec.opts.StringReplacer != nil {
-			val = dec.opts.StringReplacer(val)
-		}
 		return mk2(NewStringRegex(val))
 
 	case is(yaml.SequenceNode, "!regex"):

From 57b3564f8b95515bb4c02f4a07a41d16fbffbcb7 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 31 Jul 2025 17:40:09 -0400
Subject: [PATCH 175/200] internal/unify: refactor in preparation for !import

This rearranges entry to the YAML parser in preparation for
referencing other files from the YAML.

Currently the options struct is empty, but we keep it around because
we'll use it in the next CL.

Change-Id: Ib41dd274cd50c30bb22cdec7785721e9c0997939
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693341
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/main.go |  20 +-----
 internal/unify/yaml.go   | 132 ++++++++++++++++++++++++++++-----------
 2 files changed, 98 insertions(+), 54 deletions(-)

diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index 92b1fa98..7bf43618 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -162,8 +162,8 @@ func main() {
 	// Load query.
 	if *flagQ != "" {
 		r := strings.NewReader(*flagQ)
-		var def unify.Closure
-		if err := def.Unmarshal(r, unify.UnmarshalOpts{Path: "<query>"}); err != nil {
+		def, err := unify.Read(r, "<query>", unify.ReadOpts{})
+		if err != nil {
 			log.Fatalf("parsing -q: %s", err)
 		}
 		inputs = append(inputs, def)
@@ -172,7 +172,7 @@ func main() {
 	// Load defs files.
 	must := make(map[*unify.Value]struct{})
 	for _, path := range flag.Args() {
-		defs, err := loadValue(path)
+		defs, err := unify.ReadFile(path, unify.ReadOpts{})
 		if err != nil {
 			log.Fatal(err)
 		}
@@ -245,20 +245,6 @@ func main() {
 	}
 }
 
-func loadValue(path string) (unify.Closure, error) {
-	f, err := os.Open(path)
-	if err != nil {
-		return unify.Closure{}, err
-	}
-	defer f.Close()
-
-	var c unify.Closure
-	if err := c.Unmarshal(f, unify.UnmarshalOpts{}); err != nil {
-		return unify.Closure{}, fmt.Errorf("%s: %v", path, err)
-	}
-	return c, nil
-}
-
 func validate(cl unify.Closure, required map[*unify.Value]struct{}) {
 	// Validate that:
 	// 1. All final defs are exact
diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go
index 281519de..afe9c7b5 100644
--- a/internal/unify/yaml.go
+++ b/internal/unify/yaml.go
@@ -8,22 +8,21 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"os"
 	"regexp"
 	"strings"
 
 	"gopkg.in/yaml.v3"
 )
 
-// UnmarshalOpts provides options to unmarshaling. The zero value is the default
-// options.
-type UnmarshalOpts struct {
-	// Path is the file path to store in the [Pos] of all [Value]s.
-	Path string
+// ReadOpts provides options to [Read] and related functions. The zero value is
+// the default options.
+type ReadOpts struct {
 }
 
-// UnmarshalYAML unmarshals a YAML node into a Closure.
+// Read reads a [Closure] in YAML format from r, using path for error messages.
 //
-// This is how UnmarshalYAML maps YAML nodes into terminal Values:
+// It maps YAML nodes into terminal Values as follows:
 //
 // - "_" or !top _ is the top value ([Top]).
 //
@@ -46,7 +45,7 @@ type UnmarshalOpts struct {
 //
 // - !regex [x, y, ...] is an intersection of regular expressions ([String]).
 //
-// This is how UnmarshalYAML maps YAML nodes into non-terminal Values:
+// It maps YAML nodes into non-terminal Values as follows:
 //
 // - Sequence nodes like [x, y, z] are tuples ([Tuple]).
 //
@@ -62,48 +61,53 @@ type UnmarshalOpts struct {
 // non-deterministic choice view really works. The unifier does not directly
 // implement sums; instead, this is decoded as a fresh variable that's
 // simultaneously bound to x, y, and z.
-func (c *Closure) UnmarshalYAML(node *yaml.Node) error {
-	return c.unmarshal(node, UnmarshalOpts{})
+func Read(r io.Reader, path string, opts ReadOpts) (Closure, error) {
+	dec := yamlDecoder{opts: opts, path: path, env: topEnv}
+	v, err := dec.read(r)
+	if err != nil {
+		return Closure{}, err
+	}
+	return dec.close(v), nil
 }
 
-// Unmarshal is like [UnmarshalYAML], but accepts options and reads from r. If
-// opts.Path is "" and r has a Name() string method, the result of r.Name() is
-// used as the path for all [Value]s read from r.
-func (c *Closure) Unmarshal(r io.Reader, opts UnmarshalOpts) error {
-	if opts.Path == "" {
-		type named interface{ Name() string }
-		if n, ok := r.(named); ok {
-			opts.Path = n.Name()
-		}
+// ReadFile reads a [Closure] in YAML format from a file.
+//
+// The file must consist of a single YAML document.
+//
+// See [Read] for details.
+func ReadFile(path string, opts ReadOpts) (Closure, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return Closure{}, err
 	}
+	defer f.Close()
 
-	var node yaml.Node
-	if err := yaml.NewDecoder(r).Decode(&node); err != nil {
-		return err
-	}
-	np := &node
-	if np.Kind == yaml.DocumentNode {
-		np = node.Content[0]
+	dec := yamlDecoder{opts: opts, path: path, env: topEnv}
+	v, err := dec.read(f)
+	if err != nil {
+		return Closure{}, err
 	}
-	return c.unmarshal(np, opts)
+
+	return dec.close(v), nil
 }
 
-func (c *Closure) unmarshal(node *yaml.Node, opts UnmarshalOpts) error {
-	dec := &yamlDecoder{opts: opts, vars: make(map[string]*ident), env: topEnv}
-	val, err := dec.value(node)
+// UnmarshalYAML implements [yaml.Unmarshaler].
+//
+// Since there is no way to pass [ReadOpts] to this function, it assumes default
+// options.
+func (c *Closure) UnmarshalYAML(node *yaml.Node) error {
+	dec := yamlDecoder{path: "<yaml.Node>", env: topEnv}
+	v, err := dec.root(node)
 	if err != nil {
 		return err
 	}
-	vars := make(map[*ident]*Value)
-	for _, id := range dec.vars {
-		vars[id] = topValue
-	}
-	*c = Closure{val, dec.env}
+	*c = dec.close(v)
 	return nil
 }
 
 type yamlDecoder struct {
-	opts UnmarshalOpts
+	opts ReadOpts
+	path string
 
 	vars  map[string]*ident
 	nSums int
@@ -111,8 +115,62 @@ type yamlDecoder struct {
 	env envSet
 }
 
+func (dec *yamlDecoder) read(r io.Reader) (*Value, error) {
+	n, err := readOneNode(r)
+	if err != nil {
+		return nil, fmt.Errorf("%s: %w", dec.path, err)
+	}
+
+	// Decode YAML node to a Value
+	v, err := dec.root(n)
+	if err != nil {
+		return nil, fmt.Errorf("%s: %w", dec.path, err)
+	}
+
+	return v, nil
+}
+
+// readOneNode reads a single YAML document from r and returns an error if there
+// are more documents in r.
+func readOneNode(r io.Reader) (*yaml.Node, error) {
+	yd := yaml.NewDecoder(r)
+
+	// Decode as a YAML node
+	var node yaml.Node
+	if err := yd.Decode(&node); err != nil {
+		return nil, err
+	}
+	np := &node
+	if np.Kind == yaml.DocumentNode {
+		np = node.Content[0]
+	}
+
+	// Ensure there are no more YAML docs in this file
+	if err := yd.Decode(nil); err == nil {
+		return nil, fmt.Errorf("must not contain multiple documents")
+	} else if err != io.EOF {
+		return nil, err
+	}
+
+	return np, nil
+}
+
+// root parses the root of a file.
+func (dec *yamlDecoder) root(node *yaml.Node) (*Value, error) {
+	// Prepare for variable name resolution in this file.
+	dec.vars = make(map[string]*ident, 0)
+	dec.nSums = 0
+
+	return dec.value(node)
+}
+
+// close wraps a decoded [Value] into a [Closure].
+func (dec *yamlDecoder) close(v *Value) Closure {
+	return Closure{v, dec.env}
+}
+
 func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
-	pos := &Pos{Path: dec.opts.Path, Line: node.Line}
+	pos := &Pos{Path: dec.path, Line: node.Line}
 
 	// Resolve alias nodes.
 	if node.Kind == yaml.AliasNode {

From dfa62d10c0340b1dadadd0350e61a7a46d821612 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 1 Aug 2025 13:10:05 -0400
Subject: [PATCH 176/200] internal/unify: add !import nodes

The new "!import pattern" node can be used to import other YAML files
using a glob pattern.

Each file gets its own variable scope ($x in one file is not the same
as $x in another file), and we protect against paths that reference
files above the current file.

Change-Id: Ib479aa16f0979c3b9060d63320aa3505f72b1ff1
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693342
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/unify/value_test.go | 16 ++++++-
 internal/unify/yaml.go       | 90 +++++++++++++++++++++++++++++++++---
 internal/unify/yaml_test.go  | 57 ++++++++++++++++++++++-
 3 files changed, 155 insertions(+), 8 deletions(-)

diff --git a/internal/unify/value_test.go b/internal/unify/value_test.go
index 28d22b25..54937c68 100644
--- a/internal/unify/value_test.go
+++ b/internal/unify/value_test.go
@@ -4,7 +4,11 @@
 
 package unify
 
-import "slices"
+import (
+	"reflect"
+	"slices"
+	"testing"
+)
 
 func ExampleClosure_All_tuple() {
 	v := mustParse(`
@@ -34,3 +38,13 @@ c: 5
 	// - {a: 2, b: 3, c: 5}
 	// - {a: 2, b: 4, c: 5}
 }
+
+func checkDecode[T any](t *testing.T, got *Value, want T) {
+	var gotT T
+	if err := got.Decode(&gotT); err != nil {
+		t.Fatalf("Decode failed: %v", err)
+	}
+	if !reflect.DeepEqual(&gotT, &want) {
+		t.Fatalf("got:\n%s\nwant:\n%s", prettyYaml(gotT), prettyYaml(want))
+	}
+}
diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go
index afe9c7b5..dadcd71d 100644
--- a/internal/unify/yaml.go
+++ b/internal/unify/yaml.go
@@ -8,7 +8,9 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"io/fs"
 	"os"
+	"path/filepath"
 	"regexp"
 	"strings"
 
@@ -18,6 +20,9 @@ import (
 // ReadOpts provides options to [Read] and related functions. The zero value is
 // the default options.
 type ReadOpts struct {
+	// FS, if non-nil, is the file system from which to resolve !import file
+	// names.
+	FS fs.FS
 }
 
 // Read reads a [Closure] in YAML format from r, using path for error messages.
@@ -61,6 +66,10 @@ type ReadOpts struct {
 // non-deterministic choice view really works. The unifier does not directly
 // implement sums; instead, this is decoded as a fresh variable that's
 // simultaneously bound to x, y, and z.
+//
+// - !import glob is like a !sum, but its children are read from all files
+// matching the given glob pattern, which is interpreted relative to the current
+// file path. Each file gets its own variable scope.
 func Read(r io.Reader, path string, opts ReadOpts) (Closure, error) {
 	dec := yamlDecoder{opts: opts, path: path, env: topEnv}
 	v, err := dec.read(r)
@@ -74,6 +83,8 @@ func Read(r io.Reader, path string, opts ReadOpts) (Closure, error) {
 //
 // The file must consist of a single YAML document.
 //
+// If opts.FS is not set, this sets it to a FS rooted at path's directory.
+//
 // See [Read] for details.
 func ReadFile(path string, opts ReadOpts) (Closure, error) {
 	f, err := os.Open(path)
@@ -82,13 +93,11 @@ func ReadFile(path string, opts ReadOpts) (Closure, error) {
 	}
 	defer f.Close()
 
-	dec := yamlDecoder{opts: opts, path: path, env: topEnv}
-	v, err := dec.read(f)
-	if err != nil {
-		return Closure{}, err
+	if opts.FS == nil {
+		opts.FS = os.DirFS(filepath.Dir(path))
 	}
 
-	return dec.close(v), nil
+	return Read(f, path, opts)
 }
 
 // UnmarshalYAML implements [yaml.Unmarshaler].
@@ -157,7 +166,12 @@ func readOneNode(r io.Reader) (*yaml.Node, error) {
 
 // root parses the root of a file.
 func (dec *yamlDecoder) root(node *yaml.Node) (*Value, error) {
-	// Prepare for variable name resolution in this file.
+	// Prepare for variable name resolution in this file. This may be a nested
+	// root, so restore the current values when we're done.
+	oldVars, oldNSums := dec.vars, dec.nSums
+	defer func() {
+		dec.vars, dec.nSums = oldVars, oldNSums
+	}()
 	dec.vars = make(map[string]*ident, 0)
 	dec.nSums = 0
 
@@ -339,11 +353,75 @@ func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
 		dec.nSums++
 		dec.env = dec.env.bind(id, vs...)
 		return mk(Var{id: id})
+
+	case is(yaml.ScalarNode, "!import"):
+		if dec.opts.FS == nil {
+			return nil, fmt.Errorf("!import not allowed (ReadOpts.FS not set)")
+		}
+		pat := node.Value
+
+		if !fs.ValidPath(pat) {
+			// This will result in Glob returning no results. Give a more useful
+			// error message for this case.
+			return nil, fmt.Errorf("!import path must not contain '.' or '..'")
+		}
+
+		ms, err := fs.Glob(dec.opts.FS, pat)
+		if err != nil {
+			return nil, fmt.Errorf("resolving !import: %w", err)
+		}
+		if len(ms) == 0 {
+			return nil, fmt.Errorf("!import did not match any files")
+		}
+
+		// Parse each file
+		vs := make([]*Value, 0, len(ms))
+		for _, m := range ms {
+			v, err := dec.import1(m)
+			if err != nil {
+				return nil, err
+			}
+			vs = append(vs, v)
+		}
+
+		// Create a sum.
+		if len(vs) == 1 {
+			return vs[0], nil
+		}
+		id := &ident{name: "import"}
+		dec.env = dec.env.bind(id, vs...)
+		return mk(Var{id: id})
 	}
 
 	return nil, fmt.Errorf("unknown node kind %d %v", node.Kind, node.Tag)
 }
 
+func (dec *yamlDecoder) import1(path string) (*Value, error) {
+	// Make sure we can open the path first.
+	f, err := dec.opts.FS.Open(path)
+	if err != nil {
+		return nil, fmt.Errorf("!import failed: %w", err)
+	}
+	defer f.Close()
+
+	// Prepare the enter path.
+	oldFS, oldPath := dec.opts.FS, dec.path
+	defer func() {
+		dec.opts.FS, dec.path = oldFS, oldPath
+	}()
+
+	// Enter path, which is relative to the current path's directory.
+	newPath := filepath.Join(filepath.Dir(dec.path), path)
+	subFS, err := fs.Sub(dec.opts.FS, filepath.Dir(path))
+	if err != nil {
+		return nil, err
+	}
+	dec.opts.FS, dec.path = subFS, newPath
+
+	// Parse the file.
+	return dec.read(f)
+}
+
 type yamlEncoder struct {
 	idp identPrinter
 	e   envSet // We track the environment for !repeat nodes.
diff --git a/internal/unify/yaml_test.go b/internal/unify/yaml_test.go
index e04d05d4..4f0aef43 100644
--- a/internal/unify/yaml_test.go
+++ b/internal/unify/yaml_test.go
@@ -11,6 +11,7 @@ import (
 	"log"
 	"strings"
 	"testing"
+	"testing/fstest"
 
 	"gopkg.in/yaml.v3"
 )
@@ -37,6 +38,10 @@ func oneValue(t *testing.T, c Closure) *Value {
 }
 
 func printYaml(val any) {
+	fmt.Println(prettyYaml(val))
+}
+
+func prettyYaml(val any) string {
 	b, err := yaml.Marshal(val)
 	if err != nil {
 		panic(err)
@@ -66,7 +71,7 @@ func printYaml(val any) {
 	if err != nil {
 		panic(err)
 	}
-	fmt.Println(string(b))
+	return string(b)
 }
 
 func cleanYaml(node *yaml.Node, lines []int, endPos int) {
@@ -145,3 +150,53 @@ func TestEmptyString(t *testing.T) {
 		t.Fatal("expected exact string")
 	}
 }
+
+func TestImport(t *testing.T) {
+	// Test a basic import
+	main := strings.NewReader("!import x/y.yaml")
+	fs := fstest.MapFS{
+		// Test a glob import with a relative path
+		"x/y.yaml":   {Data: []byte("!import y/*.yaml")},
+		"x/y/z.yaml": {Data: []byte("42")},
+	}
+	cl, err := Read(main, "x.yaml", ReadOpts{FS: fs})
+	if err != nil {
+		t.Fatal(err)
+	}
+	x := 42
+	checkDecode(t, oneValue(t, cl), &x)
+}
+
+func TestImportEscape(t *testing.T) {
+	// Make sure an import can't escape its subdirectory.
+	main := strings.NewReader("!import x/y.yaml")
+	fs := fstest.MapFS{
+		"x/y.yaml": {Data: []byte("!import ../y/*.yaml")},
+		"y/z.yaml": {Data: []byte("42")},
+	}
+	_, err := Read(main, "x.yaml", ReadOpts{FS: fs})
+	if err == nil {
+		t.Fatal("relative !import should have failed")
+	}
+	if !strings.Contains(err.Error(), "must not contain") {
+		t.Fatalf("unexpected error %v", err)
+	}
+}
+
+func TestImportScope(t *testing.T) {
+	// Test that imports have different variable scopes.
+	main := strings.NewReader("[!import y.yaml, !import y.yaml]")
+	fs := fstest.MapFS{
+		"y.yaml": {Data: []byte("$v")},
+	}
+	cl1, err := Read(main, "x.yaml", ReadOpts{FS: fs})
+	if err != nil {
+		t.Fatal(err)
+	}
+	cl2 := mustParse("[1, 2]")
+	res, err := Unify(cl1, cl2)
+	if err != nil {
+		t.Fatal(err)
+	}
+	checkDecode(t, oneValue(t, res), []int{1, 2})
+}

From ca8f6cb8d5997937a1ce630f5ead4104cbff1bda Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 5 Aug 2025 17:05:27 -0400
Subject: [PATCH 177/200] internal/unify: fix Summands for sum of sums

Right now, Closure.Summands only iterates over a top-level sum. Fix it
so it can handle sums of sums. This is going to start coming up since
!imports tend to lead to sums of sums.

Change-Id: Ie67ced083d6d5b814e3ad77b089c4adb2591c568
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693343
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/unify/closure.go | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/internal/unify/closure.go b/internal/unify/closure.go
index 5b654879..e8e76e21 100644
--- a/internal/unify/closure.go
+++ b/internal/unify/closure.go
@@ -29,18 +29,24 @@ func (c Closure) IsBottom() bool {
 // Summands returns the top-level Values of c. This assumes the top-level of c
 // was constructed as a sum, and is mostly useful for debugging.
 func (c Closure) Summands() iter.Seq[*Value] {
-	if v, ok := c.val.Domain.(Var); ok {
-		parts := c.env.partitionBy(v.id)
-		return func(yield func(*Value) bool) {
-			for _, part := range parts {
-				if !yield(part.value) {
-					return
+	return func(yield func(*Value) bool) {
+		var rec func(v *Value, env envSet) bool
+		rec = func(v *Value, env envSet) bool {
+			switch d := v.Domain.(type) {
+			case Var:
+				parts := env.partitionBy(d.id)
+				for _, part := range parts {
+					// It may be a sum of sums. Walk into this value.
+					if !rec(part.value, part.env) {
+						return false
+					}
 				}
+				return true
+			default:
+				return yield(v)
 			}
 		}
-	}
-	return func(yield func(*Value) bool) {
-		yield(c.val)
+		rec(c.val, c.env)
 	}
 }
 

From f958ca743b4b6fb2e95d944adcd52a16643dc393 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 1 Aug 2025 15:48:19 -0400
Subject: [PATCH 178/200] internal/simdgen: replace go generate with !import

Currently, after editing the source YAML files, you have to run go
generate to produce that concatenated inputs. This is easy to forget
to do, and it's easy to accidentally edit the concatenated input
instead of the source YAML. It also means any line numbers that appear
in error messages or debug output refer to the generated YAML instead
of the source YAML.

Fix this by using the new !import node to perform this file loading in
the unifier itself from the original source files.

Change-Id: I735f96ac9b12ccd1057629758a73b1bda30544cc
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693344
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/categories.yaml |  524 +-------------
 internal/simdgen/etetest.sh      |    1 -
 internal/simdgen/go.yaml         | 1097 +-----------------------------
 internal/simdgen/main.go         |    6 -
 internal/simdgen/ops/main.go     |   75 --
 5 files changed, 2 insertions(+), 1701 deletions(-)
 delete mode 100644 internal/simdgen/ops/main.go

diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
index 26e80c5e..ed4c9645 100644
--- a/internal/simdgen/categories.yaml
+++ b/internal/simdgen/categories.yaml
@@ -1,523 +1 @@
-!sum
-- go: Add
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME adds corresponding elements of two vectors.
-- go: AddSaturated
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME adds corresponding elements of two vectors with saturation.
-- go: Sub
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME subtracts corresponding elements of two vectors.
-- go: SubSaturated
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME subtracts corresponding elements of two vectors with saturation.
-- go: AddPairs
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME horizontally adds adjacent pairs of elements.
-    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-- go: SubPairs
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME horizontally subtracts adjacent pairs of elements.
-    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
-- go: AddPairsSaturated
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME horizontally adds adjacent pairs of elements with saturation.
-    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-- go: SubPairsSaturated
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME horizontally subtracts adjacent pairs of elements with saturation.
-    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
-- go: And
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise AND operation between two vectors.
-- go: Or
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise OR operation between two vectors.
-- go: AndNot
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise x &^ y.
-- go: Xor
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a bitwise XOR operation between two vectors.
-
-# We also have PTEST and VPTERNLOG, those should be hidden from the users
-# and only appear in rewrite rules.
-# const imm predicate(holds for both float and int|uint):
-# 0: Equal
-# 1: Less
-# 2: LessEqual
-# 4: NotEqual
-# 5: GreaterEqual
-# 6: Greater
-- go: Equal
-  constImm: 0
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for equality.
-- go: Less
-  constImm: 1
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for less than.
-- go: LessEqual
-  constImm: 2
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for less than or equal.
-- go: IsNan # For float only.
-  constImm: 3
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME checks if elements are NaN. Use as x.IsNan(x).
-- go: NotEqual
-  constImm: 4
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for inequality.
-- go: GreaterEqual
-  constImm: 13
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for greater than or equal.
-- go: Greater
-  constImm: 14
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME compares for greater than.
-- go: ConvertToInt32
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // ConvertToInt32 converts element values to int32.
-
-- go: ConvertToUint32
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // ConvertToUint32Masked converts element values to uint32.
-- go: Div
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME divides elements of two vectors.
-- go: Sqrt
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes the square root of each element.
-- go: ApproximateReciprocal
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes an approximate reciprocal of each element.
-- go: ApproximateReciprocalOfSqrt
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes an approximate reciprocal of the square root of each element.
-- go: Scale
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies elements by a power of 2.
-- go: Round
-  commutative: false
-  extension: "AVX.*"
-  constImm: 0
-  documentation: !string |-
-    // NAME rounds elements to the nearest integer.
-- go: RoundScaled
-  commutative: false
-  extension: "AVX.*"
-  constImm: 0
-  documentation: !string |-
-    // NAME rounds elements with specified precision.
-- go: RoundScaledResidue
-  commutative: false
-  extension: "AVX.*"
-  constImm: 0
-  documentation: !string |-
-    // NAME computes the difference after rounding with specified precision.
-- go: Floor
-  commutative: false
-  extension: "AVX.*"
-  constImm: 1
-  documentation: !string |-
-    // NAME rounds elements down to the nearest integer.
-- go: FloorScaled
-  commutative: false
-  extension: "AVX.*"
-  constImm: 1
-  documentation: !string |-
-    // NAME rounds elements down with specified precision.
-- go: FloorScaledResidue
-  commutative: false
-  extension: "AVX.*"
-  constImm: 1
-  documentation: !string |-
-    // NAME computes the difference after flooring with specified precision.
-- go: Ceil
-  commutative: false
-  extension: "AVX.*"
-  constImm: 2
-  documentation: !string |-
-    // NAME rounds elements up to the nearest integer.
-- go: CeilScaled
-  commutative: false
-  extension: "AVX.*"
-  constImm: 2
-  documentation: !string |-
-    // NAME rounds elements up with specified precision.
-- go: CeilScaledResidue
-  commutative: false
-  extension: "AVX.*"
-  constImm: 2
-  documentation: !string |-
-    // NAME computes the difference after ceiling with specified precision.
-- go: Trunc
-  commutative: false
-  extension: "AVX.*"
-  constImm: 3
-  documentation: !string |-
-    // NAME truncates elements towards zero.
-- go: TruncScaled
-  commutative: false
-  extension: "AVX.*"
-  constImm: 3
-  documentation: !string |-
-    // NAME truncates elements with specified precision.
-- go: TruncScaledResidue
-  commutative: false
-  extension: "AVX.*"
-  constImm: 3
-  documentation: !string |-
-    // NAME computes the difference after truncating with specified precision.
-- go: AddSub
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME subtracts even elements and adds odd elements of two vectors.
-- go: GaloisFieldAffineTransform
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes an affine transformation in GF(2^8):
-    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-    // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-    // corresponding to a group of 8 elements in x.
-- go: GaloisFieldAffineTransformInverse
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes an affine transformation in GF(2^8),
-    // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-    // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-    // corresponding to a group of 8 elements in x.
-- go: GaloisFieldMul
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes element-wise GF(2^8) multiplication with
-    // reduction polynomial x^8 + x^4 + x^3 + x + 1.
-- go: Average
-  commutative: true
-  extension: "AVX.*" # VPAVGB/W are available across various AVX versions
-  documentation: !string |-
-    // NAME computes the rounded average of corresponding elements.
-- go: Absolute
-  commutative: false
-  # Unary operation, not commutative
-  extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
-  documentation: !string |-
-    // NAME computes the absolute value of each element.
-- go: Sign
-  # Applies sign of second operand to first: sign(val, sign_src)
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME returns the product of the first operand with -1, 0, or 1,
-    // whichever constant is nearest to the value of the second operand.
-  # Sign does not have masked version
-- go: PopCount
-  commutative: false
-  extension: "AVX512.*"
-  documentation: !string |-
-    // NAME counts the number of set bits in each element.
-- go: PairDotProd
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies the elements and add the pairs together,
-    // yielding a vector of half as many elements with twice the input element size.
-# TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
-- go: SaturatedUnsignedSignedPairDotProd
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies the elements and add the pairs together with saturation,
-    // yielding a vector of half as many elements with twice the input element size.
-# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
-- go: DotProdBroadcast
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies all elements and broadcasts the sum.
-- go: UnsignedSignedQuadDotProdAccumulate
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
-- go: SaturatedUnsignedSignedQuadDotProdAccumulate
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-- go: AddDotProd
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs dot products on pairs of elements of y and z and then adds x.
-- go: SaturatedAddDotProd
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs dot products on pairs of elements of y and z and then adds x.
-- go: FusedMultiplyAdd
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs (x * y) + z.
-- go: FusedMultiplyAddSub
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-- go: FusedMultiplySubAdd
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-- go: Max
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes the maximum of corresponding elements.
-- go: Min
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME computes the minimum of corresponding elements.
-- go: SetElem
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME sets a single constant-indexed element's value.
-- go: GetElem
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME retrieves a single constant-indexed element's value.
-- go: Set128
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
-- go: Get128
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
-- go: Permute
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a full permutation of vector x using indices:
-    // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-    // Only the needed bits to represent x's index are used in indices' elements.
-- go: Permute2 # Permute2 is only available on or after AVX512
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a full permutation of vector x, y using indices:
-    // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-    // where xy is x appending y.
-    // Only the needed bits to represent xy's index are used in indices' elements.
-- go: Compress
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME performs a compression on vector x using mask by
-    // selecting elements as indicated by mask, and pack them to lower indexed elements.
-- go: blend
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME blends two vectors based on mask values, choosing either
-    // the first or the second based on whether the third is false or true
-- go: Mul
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies corresponding elements of two vectors.
-- go: MulEvenWiden
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies even-indexed elements, widening the result.
-    // Result[i] = v1.Even[i] * v2.Even[i].
-- go: MulHigh
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies elements and stores the high part of the result.
-- go: ShiftAllLeft
-  nameAndSizeCheck: true
-  specialLower: sftimm
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-- go: ShiftAllRight
-  signed: false
-  nameAndSizeCheck: true
-  specialLower: sftimm
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-- go: ShiftAllRight
-  signed: true
-  specialLower: sftimm
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-- go: shiftAllLeftConst # no APIs, only ssa ops.
-  noTypes: "true"
-  noGenericOps: "true"
-  SSAVariant: "const" # to avoid its name colliding with reg version of this instruction, amend this to its ssa op name.
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-- go: shiftAllRightConst # no APIs, only ssa ops.
-  noTypes: "true"
-  noGenericOps: "true"
-  SSAVariant: "const"
-  signed: false
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-- go: shiftAllRightConst # no APIs, only ssa ops.
-  noTypes: "true"
-  noGenericOps: "true"
-  SSAVariant: "const"
-  signed: true
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-
-- go: ShiftLeft
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-- go: ShiftRight
-  signed: false
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-- go: ShiftRight
-  signed: true
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-- go: RotateAllLeft
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME rotates each element to the left by the number of bits specified by the immediate.
-- go: RotateLeft
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-- go: RotateAllRight
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME rotates each element to the right by the number of bits specified by the immediate.
-- go: RotateRight
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-- go: ShiftAllLeftConcat
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element of x to the left by the number of bits specified by the
-    // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-- go: ShiftAllRightConcat
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element of x to the right by the number of bits specified by the
-    // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-- go: ShiftLeftConcat
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element of x to the left by the number of bits specified by the
-    // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-- go: ShiftRightConcat
-  nameAndSizeCheck: true
-  commutative: false
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME shifts each element of x to the right by the number of bits specified by the
-    // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+!import ops/*/categories.yaml
diff --git a/internal/simdgen/etetest.sh b/internal/simdgen/etetest.sh
index ea270429..7b5001ec 100755
--- a/internal/simdgen/etetest.sh
+++ b/internal/simdgen/etetest.sh
@@ -13,7 +13,6 @@ builds the compiler.
 
 rm -rf go-test
 git clone https://go.googlesource.com/go -b dev.simd go-test
-go generate
 go run . -xedPath xeddata  -o godefs -goroot ./go-test  go.yaml types.yaml categories.yaml
 (cd go-test/src/cmd/compile/internal/ssa/_gen ; go run *.go )
 (cd go-test/src ; GOEXPERIMENT=simd  ./make.bash )
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
index cdee0870..4f077c81 100644
--- a/internal/simdgen/go.yaml
+++ b/internal/simdgen/go.yaml
@@ -1,1096 +1 @@
-!sum
-# Add
-- go: Add
-  asm: "VPADD[BWDQ]|VADDP[SD]"
-  in:
-  - &any
-    go: $t
-  - *any
-  out:
-  - *any
-# Add Saturated
-- go: AddSaturated
-  asm: "VPADDS[BWDQ]"
-  in:
-  - &int
-    go: $t
-    base: int
-  - *int
-  out:
-  - *int
-- go: AddSaturated
-  asm: "VPADDS[BWDQ]"
-  in:
-  - &uint
-    go: $t
-    base: uint
-  - *uint
-  out:
-  - *uint
-
-# Sub
-- go: Sub
-  asm: "VPSUB[BWDQ]|VSUBP[SD]"
-  in: &2any
-  - *any
-  - *any
-  out: &1any
-  - *any
-# Sub Saturated
-- go: SubSaturated
-  asm: "VPSUBS[BWDQ]"
-  in: &2int
-  - *int
-  - *int
-  out: &1int
-  - *int
-- go: SubSaturated
-  asm: "VPSUBS[BWDQ]"
-  in:
-  - *uint
-  - *uint
-  out:
-  - *uint
-- go: AddPairs
-  asm: "VPHADD[DW]"
-  in: *2any
-  out: *1any
-- go: SubPairs
-  asm: "VPHSUB[DW]"
-  in: *2any
-  out: *1any
-- go: AddPairs
-  asm: "VHADDP[SD]" # floats
-  in: *2any
-  out: *1any
-- go: SubPairs
-  asm: "VHSUBP[SD]"  # floats
-  in: *2any
-  out: *1any
-- go: AddPairsSaturated
-  asm: "VPHADDS[DW]"
-  in: *2int
-  out: *1int
-- go: SubPairsSaturated
-  asm: "VPHSUBS[DW]"
-  in: *2int
-  out: *1int
-# In the XED data, *all* floating point bitwise logic operation has their
-# operand type marked as uint. We are not trying to understand why Intel
-# decided that they want FP bit-wise logic operations, but this irregularity
-# has to be dealed with in separate rules with some overwrites.
-
-# For many bit-wise operations, we have the following non-orthogonal
-# choices:
-#
-# - Non-masked AVX operations have no element width (because it
-# doesn't matter), but only cover 128 and 256 bit vectors.
-#
-# - Masked AVX-512 operations have an element width (because it needs
-# to know how to interpret the mask), and cover 128, 256, and 512 bit
-# vectors. These only cover 32- and 64-bit element widths.
-#
-# - Non-masked AVX-512 operations still have an element width (because
-# they're just the masked operations with an implicit K0 mask) but it
-# doesn't matter! This is the only option for non-masked 512 bit
-# operations, and we can pick any of the element widths.
-#
-# We unify with ALL of these operations and the compiler generator
-# picks when there are multiple options.
-
-# TODO: We don't currently generate unmasked bit-wise operations on 512 bit
-# vectors of 8- or 16-bit elements. AVX-512 only has *masked* bit-wise
-# operations for 32- and 64-bit elements; while the element width doesn't matter
-# for unmasked operations, right now we don't realize that we can just use the
-# 32- or 64-bit version for the unmasked form. Maybe in the XED decoder we
-# should recognize bit-wise operations when generating unmasked versions and
-# omit the element width.
-
-# For binary operations, we constrain their two inputs and one output to the
-# same Go type using a variable.
-
-- go: And
-  asm: "VPAND[DQ]?"
-  in:
-  - &any
-    go: $t
-  - *any
-  out:
-  - *any
-
-- go: And
-  asm: "VPANDD" # Fill in the gap, And is missing for Uint8x64 and Int8x64
-  inVariant: []
-  in: &twoI8x64
-  - &i8x64
-    go: $t
-    overwriteElementBits: 8
-  - *i8x64
-  out: &oneI8x64
-  - *i8x64
-
-- go: And
-  asm: "VPANDD" # Fill in the gap, And is missing for Uint16x32 and Int16x32
-  inVariant: []
-  in: &twoI16x32
-  - &i16x32
-    go: $t
-    overwriteElementBits: 16
-  - *i16x32
-  out: &oneI16x32
-  - *i16x32
-
-- go: AndNot
-  asm: "VPANDN[DQ]?"
-  operandOrder: "21" # switch the arg order
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-
-- go: AndNot
-  asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint8x64 and Int8x64
-  operandOrder: "21" # switch the arg order
-  inVariant: []
-  in: *twoI8x64
-  out: *oneI8x64
-
-- go: AndNot
-  asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint16x32 and Int16x32
-  operandOrder: "21" # switch the arg order
-  inVariant: []
-  in: *twoI16x32
-  out: *oneI16x32
-
-- go: Or
-  asm: "VPOR[DQ]?"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-
-- go: Or
-  asm: "VPORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
-  inVariant: []
-  in: *twoI8x64
-  out: *oneI8x64
-
-- go: Or
-  asm: "VPORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
-  inVariant: []
-  in: *twoI16x32
-  out: *oneI16x32
-
-- go: Xor
-  asm: "VPXOR[DQ]?"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-
-- go: Xor
-  asm: "VPXORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
-  inVariant: []
-  in: *twoI8x64
-  out: *oneI8x64
-
-- go: Xor
-  asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
-  inVariant: []
-  in: *twoI16x32
-  out: *oneI16x32
-# Ints
-- go: Equal
-  asm: "V?PCMPEQ[BWDQ]"
-  in:
-  - &any
-    go: $t
-  - *any
-  out:
-  - &anyvregToMask
-    go: $t
-    overwriteBase: int
-    overwriteClass: mask
-- go: Greater
-  asm: "V?PCMPGT[BWDQ]"
-  in:
-  - &int
-    go: $t
-    base: int
-  - *int
-  out:
-  - *anyvregToMask
-# 256-bit VCMPGTQ's output elemBits is marked 32-bit in the XED data, we
-# believe this is an error, so add this definition to overwrite.
-- go: Greater
-  asm: "VPCMPGTQ"
-  in:
-  - &int64
-    go: $t
-    base: int
-    elemBits: 64
-  - *int64
-  out:
-  - base: int
-    elemBits: 32
-    overwriteElementBits: 64
-    overwriteClass: mask
-    overwriteBase: int
-# AVX-512 compares produce masks.
-- go: Equal
-  asm: "V?PCMPEQ[BWDQ]"
-  in:
-  - *any
-  - *any
-  out:
-  - class: mask
-- go: Greater
-  asm: "V?PCMPGT[BWDQ]"
-  in:
-  - *int
-  - *int
-  out:
-  - class: mask
-# The const imm predicated compares after AVX512, please see categories.yaml
-# for const imm specification.
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
-  asm: "VPCMP[BWDQ]"
-  in:
-  - *int
-  - *int
-  - class: immediate
-    const: 0 # Just a placeholder, will be overwritten by const imm porting.
-  out:
-  - class: mask
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
-  asm: "VPCMPU[BWDQ]"
-  in:
-  - &uint
-    go: $t
-    base: uint
-  - *uint
-  - class: immediate
-    const: 0
-  out:
-  - class: mask
-
-# Floats
-- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan
-  asm: "VCMPP[SD]"
-  in:
-  - &float
-    go: $t
-    base: float
-  - *float
-  - class: immediate
-    const: 0
-  out:
-  - go: $t
-    overwriteBase: int
-    overwriteClass: mask
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
-  asm: "VCMPP[SD]"
-  in:
-  - *float
-  - *float
-  - class: immediate
-    const: 0
-  out:
-  - class: mask
-- go: ConvertToInt32
-  asm: "VCVTTPS2DQ"
-  in:
-  - &fp
-    go: $t
-    base: float
-  out:
-  - &i32
-    go: $u
-    base: int
-    elemBits: 32
-- go: ConvertToUint32
-  asm: "VCVTPS2UDQ"
-  in:
-  - *fp
-  out:
-  - &u32
-    go: $u
-    base: uint
-    elemBits: 32
-- go: Div
-  asm: "V?DIVP[SD]"
-  in: &2fp
-  - &fp
-    go: $t
-    base: float
-  - *fp
-  out: &1fp
-  - *fp
-- go: Sqrt
-  asm: "V?SQRTP[SD]"
-  in: *1fp
-  out: *1fp
-# TODO: Provide separate methods for 12-bit precision and 14-bit precision?
-- go: ApproximateReciprocal
-  asm: "VRCP(14)?P[SD]"
-  in: *1fp
-  out: *1fp
-- go: ApproximateReciprocalOfSqrt
-  asm: "V?RSQRT(14)?P[SD]"
-  in: *1fp
-  out: *1fp
-- go: Scale
-  asm: "VSCALEFP[SD]"
-  in: *2fp
-  out: *1fp
-
-- go: "Round|Ceil|Floor|Trunc"
-  asm: "VROUNDP[SD]"
-  in:
-  - *fp
-  - class: immediate
-    const: 0 # place holder
-  out: *1fp
-
-- go: "(Round|Ceil|Floor|Trunc)Scaled"
-  asm: "VRNDSCALEP[SD]"
-  in:
-  - *fp
-  - class: immediate
-    const: 0 # place holder
-    immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
-    name: prec
-  out: *1fp
-- go: "(Round|Ceil|Floor|Trunc)ScaledResidue"
-  asm: "VREDUCEP[SD]"
-  in:
-  - *fp
-  - class: immediate
-    const: 0 # place holder
-    immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
-    name: prec
-  out: *1fp
-
-- go: "AddSub"
-  asm: "VADDSUBP[SD]"
-  in:
-  - *fp
-  - *fp
-  out:
-  - *fp
-- go: GaloisFieldAffineTransform
-  asm: VGF2P8AFFINEQB
-  operandOrder: 2I # 2nd operand, then immediate
-  in: &AffineArgs
-  - &uint8
-    go: $t
-    base: uint
-  - &uint8x8
-    go: $t2
-    base: uint
-  - &pureImmVar
-    class: immediate
-    immOffset: 0
-    name: b
-  out:
-  - *uint8
-
-- go: GaloisFieldAffineTransformInverse
-  asm: VGF2P8AFFINEINVQB
-  operandOrder: 2I # 2nd operand, then immediate
-  in: *AffineArgs
-  out:
-  - *uint8
-
-- go: GaloisFieldMul
-  asm: VGF2P8MULB
-  in:
-  - *uint8
-  - *uint8
-  out:
-  - *uint8
-# Average (unsigned byte, unsigned word)
-# Instructions: VPAVGB, VPAVGW
-- go: Average
-  asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word)
-  in:
-  - &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW
-    go: $t
-    base: uint
-  - *uint_t
-  out:
-  - *uint_t
-
-# Absolute Value (signed byte, word, dword, qword)
-# Instructions: VPABSB, VPABSW, VPABSD, VPABSQ
-- go: Absolute
-  asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ
-  in:
-  - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN
-    go: $t
-    base: int
-  out:
-  - *int_t # Output is magnitude, fits in the same signed type
-
-# Sign Operation (signed byte, word, dword)
-# Applies sign of second operand to the first.
-# Instructions: VPSIGNB, VPSIGNW, VPSIGND
-- go: Sign
-  asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND
-  in:
-  - *int_t # value to apply sign to
-  - *int_t # value from which to take the sign
-  out:
-  - *int_t
-
-# Population Count (count set bits in each element)
-# Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
-#               VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
-- go: PopCount
-  asm: "VPOPCNT[BWDQ]"
-  in:
-  - &any
-    go: $t
-  out:
-  - *any
-- go: PairDotProd
-  asm: VPMADDWD
-  in:
-  - &int
-    go: $t
-    base: int
-  - *int
-  out:
-  - &int2 # The elemBits are different
-    go: $t2
-    base: int
-- go: SaturatedUnsignedSignedPairDotProd
-  asm: VPMADDUBSW
-  in:
-  - &uint
-    go: $t
-    base: uint
-    overwriteElementBits: 8
-  - &int3
-    go: $t3
-    base: int
-    overwriteElementBits: 8
-  out:
-  - *int2
-- go: DotProdBroadcast
-  asm: VDPP[SD]
-  in:
-  - &dpb_src
-    go: $t
-  - *dpb_src
-  - class: immediate
-    const: 127
-  out:
-  - *dpb_src
-- go: UnsignedSignedQuadDotProdAccumulate
-  asm: "VPDPBUSD"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - &qdpa_acc
-    go: $t_acc
-    base: int
-    elemBits: 32
-  - &qdpa_src1
-    go: $t_src1
-    base: uint
-    overwriteElementBits: 8
-  - &qdpa_src2
-    go: $t_src2
-    base: int
-    overwriteElementBits: 8
-  out:
-  - *qdpa_acc
-- go: SaturatedUnsignedSignedQuadDotProdAccumulate
-  asm: "VPDPBUSDS"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - *qdpa_acc
-  - *qdpa_src1
-  - *qdpa_src2
-  out:
-  - *qdpa_acc
-- go: AddDotProd
-  asm: "VPDPWSSD"
-  in:
-  - &pdpa_acc
-    go: $t_acc
-    base: int
-    elemBits: 32
-  - &pdpa_src1
-    go: $t_src1
-    base: int
-    overwriteElementBits: 16
-  - &pdpa_src2
-    go: $t_src2
-    base: int
-    overwriteElementBits: 16
-  out:
-  - *pdpa_acc
-- go: SaturatedAddDotProd
-  asm: "VPDPWSSDS"
-  in:
-  - *pdpa_acc
-  - *pdpa_src1
-  - *pdpa_src2
-  out:
-  - *pdpa_acc
-- go: FusedMultiplyAdd
-  asm: "VFMADD213PS|VFMADD213PD"
-  in:
-  - &fma_op
-   go: $t
-   base: float
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: FusedMultiplyAddSub
-  asm: "VFMADDSUB213PS|VFMADDSUB213PD"
-  in:
-  - *fma_op
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: FusedMultiplySubAdd
-  asm: "VFMSUBADD213PS|VFMSUBADD213PD"
-  in:
-  - *fma_op
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: Max
-  asm: "V?PMAXS[BWDQ]"
-  in: &2int
-  - &int
-    go: $t
-    base: int
-  - *int
-  out: &1int
-  - *int
-- go: Max
-  asm: "V?PMAXU[BWDQ]"
-  in: &2uint
-  - &uint
-    go: $t
-    base: uint
-  - *uint
-  out: &1uint
-  - *uint
-
-- go: Min
-  asm: "V?PMINS[BWDQ]"
-  in: *2int
-  out: *1int
-- go: Min
-  asm: "V?PMINU[BWDQ]"
-  in: *2uint
-  out: *1uint
-
-- go: Max
-  asm: "V?MAXP[SD]"
-  in: &2float
-  - &float
-    go: $t
-    base: float
-  - *float
-  out: &1float
-  - *float
-- go: Min
-  asm: "V?MINP[SD]"
-  in: *2float
-  out: *1float
-- go: SetElem
-  asm: "VPINSR[BWDQ]"
-  in:
-  - &t
-    class: vreg
-    base: $b
-  - class: greg
-    base: $b
-    lanes: 1 # Scalar, darn it!
-  - &imm
-    class: immediate
-    immOffset: 0
-    name: index
-  out:
-  - *t
-
-- go: GetElem
-  asm: "VPEXTR[BWDQ]"
-  in:
-  - class: vreg
-    base: $b
-    elemBits: $e
-  - *imm
-  out:
-  - class: greg
-    base: $b
-    bits: $e
-
-- go: Set128
-  asm: "VINSERTI128"
-  in:
-  - &i8x32
-    class: vreg
-    base: $t
-    bits: 256
-    OverwriteElementBits: 8
-  - &i8x16
-    class: vreg
-    base: $t
-    bits: 128
-    OverwriteElementBits: 8
-  - &imm01 # This immediate should be only 0 or 1
-    class: immediate
-    immOffset: 0
-    name: index
-  out:
-  - *i8x32
-
-- go: Get128
-  asm: "VEXTRACTI128"
-  in:
-  - *i8x32
-  - *imm01
-  out:
-  - *i8x16
-
-- go: Set128
-  asm: "VINSERTI128"
-  in:
-  - &i16x16
-    class: vreg
-    base: $t
-    bits: 256
-    OverwriteElementBits: 16
-  - &i16x8
-    class: vreg
-    base: $t
-    bits: 128
-    OverwriteElementBits: 16
-  - *imm01
-  out:
-  - *i16x16
-
-- go: Get128
-  asm: "VEXTRACTI128"
-  in:
-  - *i16x16
-  - *imm01
-  out:
-  - *i16x8
-
-- go: Set128
-  asm: "VINSERTI128"
-  in:
-  - &i32x8
-    class: vreg
-    base: $t
-    bits: 256
-    OverwriteElementBits: 32
-  - &i32x4
-    class: vreg
-    base: $t
-    bits: 128
-    OverwriteElementBits: 32
-  - *imm01
-  out:
-  - *i32x8
-
-- go: Get128
-  asm: "VEXTRACTI128"
-  in:
-  - *i32x8
-  - *imm01
-  out:
-  - *i32x4
-
-- go: Set128
-  asm: "VINSERTI128"
-  in:
-  - &i64x4
-    class: vreg
-    base: $t
-    bits: 256
-    OverwriteElementBits: 64
-  - &i64x2
-    class: vreg
-    base: $t
-    bits: 128
-    OverwriteElementBits: 64
-  - *imm01
-  out:
-  - *i64x4
-
-- go: Get128
-  asm: "VEXTRACTI128"
-  in:
-  - *i64x4
-  - *imm01
-  out:
-  - *i64x2
-
-- go: Set128
-  asm: "VINSERTF128"
-  in:
-  - &f32x8
-    class: vreg
-    base: $t
-    bits: 256
-    OverwriteElementBits: 32
-  - &f32x4
-    class: vreg
-    base: $t
-    bits: 128
-    OverwriteElementBits: 32
-  - *imm01
-  out:
-  - *f32x8
-
-- go: Get128
-  asm: "VEXTRACTF128"
-  in:
-  - *f32x8
-  - *imm01
-  out:
-  - *f32x4
-
-- go: Set128
-  asm: "VINSERTF128"
-  in:
-  - &f64x4
-    class: vreg
-    base: $t
-    bits: 256
-  - &f64x2
-    class: vreg
-    base: $t
-    bits: 128
-  - *imm01
-  out:
-  - *f64x4
-
-- go: Get128
-  asm: "VEXTRACTF128"
-  in:
-  - *f64x4
-  - *imm01
-  out:
-  - *f64x2
-
-- go: Permute
-  asm: "VPERM[BWDQ]|VPERMP[SD]"
-  operandOrder: "21Type1"
-  in:
-  - &anyindices
-    go: $t
-    name: indices
-    overwriteBase: uint
-  - &any
-    go: $t
-  out:
-  - *any
-
-- go: Permute2
-  asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
-  # Because we are overwriting the receiver's type, we 
-  # have to move the receiver to be a parameter so that
-  # we can have no duplication.
-  operandOrder: "231Type1" 
-  in:
-  - *anyindices # result in arg 0
-  - *any
-  - *any
-  out:
-  - *any
-
-- go: Compress
-  asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]"
-  in:
-    # The mask in Compress is a control mask rather than a write mask, so it's not optional.
-  - class: mask
-  - *any
-  out:
-  - *any
-
-# For now a non-public method because
-# (1) [OverwriteClass] must be set together with [OverwriteBase]
-# (2) "simdgen does not support [OverwriteClass] in inputs".
-# That means the signature is wrong.  
-- go: blend
-  asm: VPBLENDVB
-  in: 
-  - &v
-    go: $t
-    class: vreg
-    base: int
-  - *v
-  - 
-    class: vreg
-    base: int
-    name: mask
-  out:
-  - *v
-
-# For AVX512
-- go: blend
-  asm: VPBLENDM[BWDQ]
-  in: 
-  - &v
-    go: $t
-    bits: 512
-    class: vreg
-    base: int
-  - *v
-  inVariant: 
-  - 
-    class: mask
-  out:
-  - *v
-
-# "Normal" multiplication is only available for floats.
-# This only covers the single and double precision.
-- go: Mul
-  asm: "VMULP[SD]"
-  in:
-  - &fp
-    go: $t
-    base: float
-  - *fp
-  out:
-  - *fp
-
-# Integer multiplications.
-
-# MulEvenWiden
-# Dword only.
-- go: MulEvenWiden
-  asm: "VPMULDQ"
-  in:
-  - &int
-    go: $t
-    base: int
-  - *int
-  out:
-  - &int2
-    go: $t2
-    base: int
-- go: MulEvenWiden
-  asm: "VPMULUDQ"
-  in:
-  - &uint
-    go: $t
-    base: uint
-  - *uint
-  out:
-  - &uint2
-    go: $t2
-    base: uint
-
-# MulHigh
-# Word only.
-- go: MulHigh
-  asm: "VPMULHW"
-  in:
-  - *int
-  - *int
-  out:
-  - *int2
-- go: MulHigh
-  asm: "VPMULHUW"
-  in:
-  - *uint
-  - *uint
-  out:
-  - *uint2
-
-# MulLow
-# Signed int only.
-- go: Mul
-  asm: "VPMULL[WDQ]"
-  in:
-  - *int
-  - *int
-  out:
-  - *int2
-# Integers
-# ShiftAll*
-- go: ShiftAllLeft
-  asm: "VPSLL[WDQ]"
-  in:
-  - &any
-    go: $t
-  - &vecAsScalar64
-    go: "Uint.*"
-    treatLikeAScalarOfSize: 64
-  out:
-  - *any
-- go: ShiftAllRight
-  signed: false
-  asm: "VPSRL[WDQ]"
-  in:
-  - &uint
-    go: $t
-    base: uint
-  - *vecAsScalar64
-  out:
-  - *uint
-- go: ShiftAllRight
-  signed: true
-  asm: "VPSRA[WDQ]"
-  in:
-  - &int
-    go: $t
-    base: int
-  - *vecAsScalar64
-  out:
-  - *int
-
-- go: shiftAllLeftConst
-  asm: "VPSLL[WDQ]"
-  in:
-  - *any
-  - &imm
-    class: immediate
-    immOffset: 0
-  out:
-  - *any
-- go: shiftAllRightConst
-  asm: "VPSRL[WDQ]"
-  in:
-  - *int
-  - *imm
-  out:
-  - *int
-- go: shiftAllRightConst
-  asm: "VPSRA[WDQ]"
-  in:
-  - *uint
-  - *imm
-  out:
-  - *uint
-
-# Shift* (variable)
-- go: ShiftLeft
-  asm: "VPSLLV[WD]"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-# XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite
-# it to 64.
-- go: ShiftLeft
-  asm: "VPSLLVQ"
-  in:
-  - &anyOverwriteElemBits
-    go: $t
-    overwriteElementBits: 64
-  - *anyOverwriteElemBits
-  out:
-  - *anyOverwriteElemBits
-- go: ShiftRight
-  signed: false
-  asm: "VPSRLV[WD]"
-  in:
-  - *uint
-  - *uint
-  out:
-  - *uint
-# XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
-- go: ShiftRight
-  signed: false
-  asm: "VPSRLVQ"
-  in:
-  - &uintOverwriteElemBits
-    go: $t
-    base: uint
-    overwriteElementBits: 64
-  - *uintOverwriteElemBits
-  out:
-  - *uintOverwriteElemBits
-- go: ShiftRight
-  signed: true
-  asm: "VPSRAV[WDQ]"
-  in:
-  - *int
-  - *int
-  out:
-  - *int
-
-# Rotate
-- go: RotateAllLeft
-  asm: "VPROL[DQ]"
-  in:
-  - *any
-  - &pureImm
-    class: immediate
-    immOffset: 0
-    name: shift
-  out:
-  - *any
-- go: RotateAllRight
-  asm: "VPROR[DQ]"
-  in:
-  - *any
-  - *pureImm
-  out:
-  - *any
-- go: RotateLeft
-  asm: "VPROLV[DQ]"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-- go: RotateRight
-  asm: "VPRORV[DQ]"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-
-# Bizzare shifts.
-- go: ShiftAllLeftConcat
-  asm: "VPSHLD[WDQ]"
-  in:
-  - *any
-  - *any
-  - *pureImm
-  out:
-  - *any
-- go: ShiftAllRightConcat
-  asm: "VPSHRD[WDQ]"
-  in:
-  - *any
-  - *any
-  - *pureImm
-  out:
-  - *any
-- go: ShiftLeftConcat
-  asm: "VPSHLDV[WDQ]"
-  in:
-  - *any
-  - *any
-  - *any
-  out:
-  - *any
-- go: ShiftRightConcat
-  asm: "VPSHRDV[WDQ]"
-  in:
-  - *any
-  - *any
-  - *any
-  out:
-  - *any
+!import ops/*/go.yaml
diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index 7bf43618..69eb85f9 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -46,10 +46,6 @@
 // categories.yaml and go.yaml contains definitions that unifies with types.yaml and XED
 // data, you can find an example in ops/AddSub/.
 //
-// To produce an aggregation of go.yaml and categoris.yaml from ./ops/ to ./, run:
-//
-// go generate
-//
 // When generating Go definitions, simdgen do 3 "magic"s:
 // - It splits masked operations(with op's [Masked] field set) to const and non const:
 //   - One is a normal masked operation, the original
@@ -84,8 +80,6 @@ package main
 // - Do I need Closure, Value, and Domain? It feels like I should only need two
 // types.
 
-//go:generate go run ./ops/.
-
 import (
 	"cmp"
 	"flag"
diff --git a/internal/simdgen/ops/main.go b/internal/simdgen/ops/main.go
deleted file mode 100644
index 7e462bf7..00000000
--- a/internal/simdgen/ops/main.go
+++ /dev/null
@@ -1,75 +0,0 @@
-package main
-
-import (
-	"bufio"
-	"fmt"
-	"os"
-	"path/filepath"
-)
-
-const baseDir = "ops" // The main directory containing A, B, C, etc.
-
-func main() {
-	if err := mergeYamlFiles("categories.yaml"); err != nil {
-		fmt.Printf("Error processing categories.yaml: %v\n", err)
-		os.Exit(1)
-	}
-	if err := mergeYamlFiles("go.yaml"); err != nil {
-		fmt.Printf("Error processing go.yaml: %v\n", err)
-		os.Exit(1)
-	}
-}
-
-func mergeYamlFiles(targetFileName string) error {
-	outputFile, err := os.Create(targetFileName)
-	if err != nil {
-		return fmt.Errorf("failed to create output file %s: %w", targetFileName, err)
-	}
-	defer outputFile.Close()
-
-	writer := bufio.NewWriter(outputFile)
-	_, err = writer.WriteString("!sum\n")
-	if err != nil {
-		return fmt.Errorf("failed to write '!sum' to %s: %w", targetFileName, err)
-	}
-
-	entries, err := os.ReadDir(baseDir)
-	if err != nil {
-		return fmt.Errorf("failed to read base directory %s: %w", baseDir, err)
-	}
-	for _, entry := range entries {
-		if !entry.IsDir() {
-			continue
-		}
-
-		subdirPath := filepath.Join(baseDir, entry.Name())
-		sourceFilePath := filepath.Join(subdirPath, targetFileName)
-
-		sourceFile, err := os.Open(sourceFilePath)
-		if err != nil {
-			if os.IsNotExist(err) {
-				fmt.Printf("Skipping: %s not found in %s\n", targetFileName, subdirPath)
-				continue
-			}
-			return fmt.Errorf("failed to open source file %s: %w", sourceFilePath, err)
-		}
-		defer sourceFile.Close()
-
-		scanner := bufio.NewScanner(sourceFile)
-		// Skip first line
-		scanner.Scan()
-		// Append the rest of the lines to the output file
-		for scanner.Scan() {
-			line := scanner.Text()
-			_, err = writer.WriteString(line + "\n")
-			if err != nil {
-				return fmt.Errorf("failed to write line from %s to %s: %w", sourceFilePath, targetFileName, err)
-			}
-		}
-
-		if err := scanner.Err(); err != nil {
-			return fmt.Errorf("error reading lines from %s: %w", sourceFilePath, err)
-		}
-	}
-	return writer.Flush()
-}

From 5f469bfecac542f07368a0e23c0c83951decb709 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 5 Aug 2025 19:01:59 +0000
Subject: [PATCH 179/200] internal/simdgen: (Set|Get)(Lo|Hi)

This CL adds the missing pieces of set/get elements for larger vectors.
It also changes the Set and Get API to be better.

This CL generates CL 693355.

Change-Id: If545221e87776de7946205b41f9a7648a8148b2d
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693335
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/ops/Moves/categories.yaml |  22 +++-
 internal/simdgen/ops/Moves/go.yaml         | 136 +++++++++++----------
 2 files changed, 87 insertions(+), 71 deletions(-)

diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index 5e51becb..d56e4c93 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -9,16 +9,30 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME retrieves a single constant-indexed element's value.
-- go: Set128
+- go: SetLo
   commutative: false
+  constImm: 0
   extension: "AVX.*"
   documentation: !string |-
-    // NAME combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
-- go: Get128
+    // NAME returns x with its lower half set to y.
+- go: GetLo
   commutative: false
+  constImm: 0
   extension: "AVX.*"
   documentation: !string |-
-    // NAME retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+    // NAME returns the lower half of x.
+- go: SetHi
+  commutative: false
+  constImm: 1
+  extension: "AVX.*"
+  documentation: !string |-
+    // NAME returns x with its upper half set to y.
+- go: GetHi
+  commutative: false
+  constImm: 1
+  extension: "AVX.*"
+  documentation: !string |-
+    // NAME returns the upper half of x.
 - go: Permute
   commutative: false
   extension: "AVX.*"
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index 52e6228d..b014a7a6 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -27,156 +27,158 @@
     base: $b
     bits: $e
 
-- go: Set128
-  asm: "VINSERTI128"
+- go: "SetHi|SetLo"
+  asm: "VINSERTI128|VINSERTI64X4"
+  inVariant: []
   in:
-  - &i8x32
+  - &i8x2N
     class: vreg
     base: $t
-    bits: 256
     OverwriteElementBits: 8
-  - &i8x16
+  - &i8xN
     class: vreg
     base: $t
-    bits: 128
     OverwriteElementBits: 8
   - &imm01 # This immediate should be only 0 or 1
     class: immediate
-    immOffset: 0
+    const: 0 # place holder
     name: index
   out:
-  - *i8x32
+  - *i8x2N
 
-- go: Get128
-  asm: "VEXTRACTI128"
+- go: "GetHi|GetLo"
+  asm: "VEXTRACTI128|VEXTRACTI64X4"
+  inVariant: []
   in:
-  - *i8x32
+  - *i8x2N
   - *imm01
   out:
-  - *i8x16
+  - *i8xN
 
-- go: Set128
-  asm: "VINSERTI128"
+- go: "SetHi|SetLo"
+  asm: "VINSERTI128|VINSERTI64X4"
+  inVariant: []
   in:
-  - &i16x16
+  - &i16x2N
     class: vreg
     base: $t
-    bits: 256
     OverwriteElementBits: 16
-  - &i16x8
+  - &i16xN
     class: vreg
     base: $t
-    bits: 128
     OverwriteElementBits: 16
   - *imm01
   out:
-  - *i16x16
+  - *i16x2N
 
-- go: Get128
-  asm: "VEXTRACTI128"
+- go: "GetHi|GetLo"
+  asm: "VEXTRACTI128|VEXTRACTI64X4"
+  inVariant: []
   in:
-  - *i16x16
+  - *i16x2N
   - *imm01
   out:
-  - *i16x8
+  - *i16xN
 
-- go: Set128
-  asm: "VINSERTI128"
+- go: "SetHi|SetLo"
+  asm: "VINSERTI128|VINSERTI64X4"
+  inVariant: []
   in:
-  - &i32x8
+  - &i32x2N
     class: vreg
     base: $t
-    bits: 256
     OverwriteElementBits: 32
-  - &i32x4
+  - &i32xN
     class: vreg
     base: $t
-    bits: 128
     OverwriteElementBits: 32
   - *imm01
   out:
-  - *i32x8
+  - *i32x2N
 
-- go: Get128
-  asm: "VEXTRACTI128"
+- go: "GetHi|GetLo"
+  asm: "VEXTRACTI128|VEXTRACTI64X4"
+  inVariant: []
   in:
-  - *i32x8
+  - *i32x2N
   - *imm01
   out:
-  - *i32x4
+  - *i32xN
 
-- go: Set128
-  asm: "VINSERTI128"
+- go: "SetHi|SetLo"
+  asm: "VINSERTI128|VINSERTI64X4"
+  inVariant: []
   in:
-  - &i64x4
+  - &i64x2N
     class: vreg
     base: $t
-    bits: 256
     OverwriteElementBits: 64
-  - &i64x2
+  - &i64xN
     class: vreg
     base: $t
-    bits: 128
     OverwriteElementBits: 64
   - *imm01
   out:
-  - *i64x4
+  - *i64x2N
 
-- go: Get128
-  asm: "VEXTRACTI128"
+- go: "GetHi|GetLo"
+  asm: "VEXTRACTI128|VEXTRACTI64X4"
+  inVariant: []
   in:
-  - *i64x4
+  - *i64x2N
   - *imm01
   out:
-  - *i64x2
+  - *i64xN
 
-- go: Set128
-  asm: "VINSERTF128"
+- go: "SetHi|SetLo"
+  asm: "VINSERTF128|VINSERTF64X4"
+  inVariant: []
   in:
-  - &f32x8
+  - &f32x2N
     class: vreg
     base: $t
-    bits: 256
     OverwriteElementBits: 32
-  - &f32x4
+  - &f32xN
     class: vreg
     base: $t
-    bits: 128
     OverwriteElementBits: 32
   - *imm01
   out:
-  - *f32x8
+  - *f32x2N
 
-- go: Get128
-  asm: "VEXTRACTF128"
+- go: "GetHi|GetLo"
+  asm: "VEXTRACTF128|VEXTRACTF64X4"
+  inVariant: []
   in:
-  - *f32x8
+  - *f32x2N
   - *imm01
   out:
-  - *f32x4
+  - *f32xN
 
-- go: Set128
-  asm: "VINSERTF128"
+- go: "SetHi|SetLo"
+  asm: "VINSERTF128|VINSERTF64X4"
+  inVariant: []
   in:
-  - &f64x4
+  - &f64x2N
     class: vreg
     base: $t
-    bits: 256
-  - &f64x2
+    OverwriteElementBits: 64
+  - &f64xN
     class: vreg
     base: $t
-    bits: 128
+    OverwriteElementBits: 64
   - *imm01
   out:
-  - *f64x4
+  - *f64x2N
 
-- go: Get128
-  asm: "VEXTRACTF128"
+- go: "GetHi|GetLo"
+  asm: "VEXTRACTF128|VEXTRACTF64X4"
+  inVariant: []
   in:
-  - *f64x4
+  - *f64x2N
   - *imm01
   out:
-  - *f64x2
+  - *f64xN
 
 - go: Permute
   asm: "VPERM[BWDQ]|VPERMP[SD]"

From 515a7504e9e134accd2aa63ff300676afb0ff61a Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Tue, 5 Aug 2025 19:42:29 +0000
Subject: [PATCH 180/200] internal/simdgen: add Expand

This CL generates CL 693375.

Change-Id: Id7e71a68e9997cbec767cd3addbf152710f4c1f8
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693336
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/ops/Moves/categories.yaml | 6 ++++++
 internal/simdgen/ops/Moves/go.yaml         | 8 ++++++++
 internal/simdgen/xed.go                    | 2 +-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index d56e4c93..6f30ccbc 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -60,3 +60,9 @@
   documentation: !string |-
     // NAME blends two vectors based on mask values, choosing either
     // the first or the second based on whether the third is false or true
+- go: Expand
+  commutative: false
+  extension: "AVX.*"
+  documentation: !string |-
+    // NAME performs an expansion on a vector x whose elements are packed to lower parts.
+    // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index b014a7a6..50e2869e 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -250,3 +250,11 @@
   out:
   - *v
 
+- go: Expand
+  asm: "VPEXPAND[BWDQ]|VEXPANDP[SD]"
+  in:
+    # The mask in Expand is a control mask rather than a write mask, so it's not optional.
+  - class: mask
+  - *any
+  out:
+  - *any
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index 6a3feb36..f773fcda 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -89,7 +89,7 @@ func loadXED(xedPath string) []*unify.Value {
 }
 
 var (
-	maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]`)
+	maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]|VPEXPAND[BWDQ]|VEXPANDP[SD]`)
 	maskOptionalRe = regexp.MustCompile(`VPCMP(EQ|GT|U)?[BWDQ]|VCMPP[SD]`)
 )
 

From b15c9c00eaa3881472a1bd88e46cfb6d9863dab7 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Wed, 6 Aug 2025 18:18:26 +0000
Subject: [PATCH 181/200] internal/simdgen: add value conversion ToBits for
 mask

This CL generates CL 693755.

Change-Id: If29791f9810cacebb99e27516d677fe9200badb2
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693598
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdIntrinsics.go | 3 ++-
 internal/simdgen/gen_simdTypes.go      | 8 +++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index ca339ac2..5050834b 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -79,7 +79,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
 	addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
 	addF(simdPackage, "{{.Name}}.StoreToBits", simdStoreMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
-	addF(simdPackage, "{{.Name}}FromBits", simdCvtMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
+	addF(simdPackage, "{{.Name}}FromBits", simdCvtVToMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
+	addF(simdPackage, "{{.Name}}.ToBits", simdCvtMaskToV({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
 {{end}}
 
 {{define "footer"}}}
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index b9427c4a..f3c68796 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -175,8 +175,14 @@ const simdMaskFromValTemplate = `
 // {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset.
 // Only the lower {{.Lanes}} bits of y are used.
 //
-// Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512"
+// Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512
 func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}}
+
+// ToBits constructs a bitmap from a {{.Name}}, where 1 means set for the indexed element, 0 means unset.
+// Only the lower {{.Lanes}} bits of y are used.
+//
+// Asm: KMOV{{.IntelSizeSuffix}}, CPU Features: AVX512
+func (x {{.Name}}) ToBits() uint{{.LanesContainer}}
 `
 
 const simdMaskedLoadStoreTemplate = `

From 238887481806c4d02444e1ba8f568919520ce1f5 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Thu, 7 Aug 2025 16:49:18 +0000
Subject: [PATCH 182/200] internal/simdgen: API interface fixes

- Absolute -> Abs
- ApproximateReciprocal -> Reciprocal
  - Other derived apis also changed.
- Round -> RoundToEven
  - Other derived apis also changed.
- Drop DotProdBroadcast
- Fused(Mul|Add)(Mul|Add)? -> remove the "Fused"
- MulEvenWiden -> remove 64bit
- MulLow -> Mul, add unit
- PairDotProd -> DotProdPairs
  - make AddDotProdPairs machine ops only - peepholes will be in another
    CL at dev.simd.
- PopCount -> OnesCount
- Saturated* -> *Saturated
- Fix (Add|Sub)Saturated uint mappings.
- UnsignedSignedQuadDotProdAccumulate -> AddDotProdQuadruple
  - The "DotProdQuadruple" instruction does not exist, so no peepholes for
    this.

This CL generated CL 694115.

Change-Id: I02a22b14110154a4c9d06bde30d0ba8306e6e9be
Reviewed-on: https://go-review.googlesource.com/c/arch/+/694095
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/ops/AddSub/go.yaml           |  4 +-
 .../simdgen/ops/FPonlyArith/categories.yaml   | 10 ++---
 internal/simdgen/ops/FPonlyArith/go.yaml      | 10 ++---
 .../simdgen/ops/IntOnlyArith/categories.yaml  |  6 +--
 internal/simdgen/ops/IntOnlyArith/go.yaml     |  6 +--
 internal/simdgen/ops/MLOps/categories.yaml    | 36 +++++++++---------
 internal/simdgen/ops/MLOps/go.yaml            | 38 +++++++++----------
 internal/simdgen/ops/Mul/go.yaml              | 31 +++++++++------
 8 files changed, 75 insertions(+), 66 deletions(-)

diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml
index 45726cd6..4423d8c7 100644
--- a/internal/simdgen/ops/AddSub/go.yaml
+++ b/internal/simdgen/ops/AddSub/go.yaml
@@ -19,7 +19,7 @@
   out:
   - *int
 - go: AddSaturated
-  asm: "VPADDS[BWDQ]"
+  asm: "VPADDUS[BWDQ]"
   in:
   - &uint
     go: $t
@@ -45,7 +45,7 @@
   out: &1int
   - *int
 - go: SubSaturated
-  asm: "VPSUBS[BWDQ]"
+  asm: "VPSUBUS[BWDQ]"
   in:
   - *uint
   - *uint
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 63ddbb34..512cfc50 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -9,12 +9,12 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes the square root of each element.
-- go: ApproximateReciprocal
+- go: Reciprocal
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME computes an approximate reciprocal of each element.
-- go: ApproximateReciprocalOfSqrt
+- go: ReciprocalSqrt
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
@@ -24,19 +24,19 @@
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies elements by a power of 2.
-- go: Round
+- go: RoundToEven
   commutative: false
   extension: "AVX.*"
   constImm: 0
   documentation: !string |-
     // NAME rounds elements to the nearest integer.
-- go: RoundScaled
+- go: RoundToEvenScaled
   commutative: false
   extension: "AVX.*"
   constImm: 0
   documentation: !string |-
     // NAME rounds elements with specified precision.
-- go: RoundScaledResidue
+- go: RoundToEvenScaledResidue
   commutative: false
   extension: "AVX.*"
   constImm: 0
diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml
index dfb0454e..e164f7b7 100644
--- a/internal/simdgen/ops/FPonlyArith/go.yaml
+++ b/internal/simdgen/ops/FPonlyArith/go.yaml
@@ -13,11 +13,11 @@
   in: *1fp
   out: *1fp
 # TODO: Provide separate methods for 12-bit precision and 14-bit precision?
-- go: ApproximateReciprocal
+- go: Reciprocal
   asm: "VRCP(14)?P[SD]"
   in: *1fp
   out: *1fp
-- go: ApproximateReciprocalOfSqrt
+- go: ReciprocalSqrt
   asm: "V?RSQRT(14)?P[SD]"
   in: *1fp
   out: *1fp
@@ -26,7 +26,7 @@
   in: *2fp
   out: *1fp
 
-- go: "Round|Ceil|Floor|Trunc"
+- go: "RoundToEven|Ceil|Floor|Trunc"
   asm: "VROUNDP[SD]"
   in:
   - *fp
@@ -34,7 +34,7 @@
     const: 0 # place holder
   out: *1fp
 
-- go: "(Round|Ceil|Floor|Trunc)Scaled"
+- go: "(RoundToEven|Ceil|Floor|Trunc)Scaled"
   asm: "VRNDSCALEP[SD]"
   in:
   - *fp
@@ -43,7 +43,7 @@
     immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
     name: prec
   out: *1fp
-- go: "(Round|Ceil|Floor|Trunc)ScaledResidue"
+- go: "(RoundToEven|Ceil|Floor|Trunc)ScaledResidue"
   asm: "VREDUCEP[SD]"
   in:
   - *fp
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
index 477b1896..2c7a9998 100644
--- a/internal/simdgen/ops/IntOnlyArith/categories.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml
@@ -4,13 +4,13 @@
   extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
     // NAME computes the rounded average of corresponding elements.
-- go: Absolute
+- go: Abs
   commutative: false
   # Unary operation, not commutative
   extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
     // NAME computes the absolute value of each element.
-- go: Sign
+- go: CopySign
   # Applies sign of second operand to first: sign(val, sign_src)
   commutative: false
   extension: "AVX.*"
@@ -18,7 +18,7 @@
     // NAME returns the product of the first operand with -1, 0, or 1,
     // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
-- go: PopCount
+- go: OnesCount
   commutative: false
   extension: "AVX512.*"
   documentation: !string |-
diff --git a/internal/simdgen/ops/IntOnlyArith/go.yaml b/internal/simdgen/ops/IntOnlyArith/go.yaml
index 4c73be26..54938b4f 100644
--- a/internal/simdgen/ops/IntOnlyArith/go.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/go.yaml
@@ -13,7 +13,7 @@
 
 # Absolute Value (signed byte, word, dword, qword)
 # Instructions: VPABSB, VPABSW, VPABSD, VPABSQ
-- go: Absolute
+- go: Abs
   asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ
   in:
   - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN
@@ -25,7 +25,7 @@
 # Sign Operation (signed byte, word, dword)
 # Applies sign of second operand to the first.
 # Instructions: VPSIGNB, VPSIGNW, VPSIGND
-- go: Sign
+- go: CopySign
   asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND
   in:
   - *int_t # value to apply sign to
@@ -36,7 +36,7 @@
 # Population Count (count set bits in each element)
 # Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
 #               VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
-- go: PopCount
+- go: OnesCount
   asm: "VPOPCNT[BWDQ]"
   in:
   - &any
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index b3508d25..6c5d3c67 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -1,55 +1,57 @@
 !sum
-- go: PairDotProd
+- go: DotProdPairs
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
-- go: SaturatedUnsignedSignedPairDotProd
+- go: DotProdPairsSaturated
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
-- go: DotProdBroadcast
-  commutative: true
-  extension: "AVX.*"
-  documentation: !string |-
-    // NAME multiplies all elements and broadcasts the sum.
-- go: UnsignedSignedQuadDotProdAccumulate
+# - go: DotProdBroadcast
+#   commutative: true
+#   extension: "AVX.*"
+#   documentation: !string |-
+#     // NAME multiplies all elements and broadcasts the sum.
+- go: AddDotProdQuadruple
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on groups of 4 elements of x and y and then adds z.
-- go: SaturatedUnsignedSignedQuadDotProdAccumulate
+- go: AddDotProdQuadrupleSaturated
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-- go: AddDotProd
+- go: AddDotProdPairs
   commutative: false
+  noTypes: "true"
+  noGenericOps: "true"
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on pairs of elements of y and z and then adds x.
-- go: SaturatedAddDotProd
+- go: AddDotProdPairsSaturated
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on pairs of elements of y and z and then adds x.
-- go: FusedMultiplyAdd
+- go: MulAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // NAME performs (x * y) + z.
-- go: FusedMultiplyAddSub
+    // NAME performs a fused (x * y) + z.
+- go: MulAddSub
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // NAME performs (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-- go: FusedMultiplySubAdd
+    // NAME performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
+- go: MulSubAdd
   commutative: false
   extension: "AVX.*"
   documentation: !string |-
-    // NAME performs (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
+    // NAME performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
index 8da2071d..f6b6f135 100644
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ b/internal/simdgen/ops/MLOps/go.yaml
@@ -1,5 +1,5 @@
 !sum
-- go: PairDotProd
+- go: DotProdPairs
   asm: VPMADDWD
   in:
   - &int
@@ -10,7 +10,7 @@
   - &int2 # The elemBits are different
     go: $t2
     base: int
-- go: SaturatedUnsignedSignedPairDotProd
+- go: DotProdPairsSaturated
   asm: VPMADDUBSW
   in:
   - &uint
@@ -23,17 +23,17 @@
     overwriteElementBits: 8
   out:
   - *int2
-- go: DotProdBroadcast
-  asm: VDPP[SD]
-  in:
-  - &dpb_src
-    go: $t
-  - *dpb_src
-  - class: immediate
-    const: 127
-  out:
-  - *dpb_src
-- go: UnsignedSignedQuadDotProdAccumulate
+# - go: DotProdBroadcast
+#   asm: VDPP[SD]
+#   in:
+#   - &dpb_src
+#     go: $t
+#   - *dpb_src
+#   - class: immediate
+#     const: 127
+#   out:
+#   - *dpb_src
+- go: AddDotProdQuadruple
   asm: "VPDPBUSD"
   operandOrder: "31" # switch operand 3 and 1
   in:
@@ -51,7 +51,7 @@
     overwriteElementBits: 8
   out:
   - *qdpa_acc
-- go: SaturatedUnsignedSignedQuadDotProdAccumulate
+- go: AddDotProdQuadrupleSaturated
   asm: "VPDPBUSDS"
   operandOrder: "31" # switch operand 3 and 1
   in:
@@ -60,7 +60,7 @@
   - *qdpa_src2
   out:
   - *qdpa_acc
-- go: AddDotProd
+- go: AddDotProdPairs
   asm: "VPDPWSSD"
   in:
   - &pdpa_acc
@@ -77,7 +77,7 @@
     overwriteElementBits: 16
   out:
   - *pdpa_acc
-- go: SaturatedAddDotProd
+- go: AddDotProdPairsSaturated
   asm: "VPDPWSSDS"
   in:
   - *pdpa_acc
@@ -85,7 +85,7 @@
   - *pdpa_src2
   out:
   - *pdpa_acc
-- go: FusedMultiplyAdd
+- go: MulAdd
   asm: "VFMADD213PS|VFMADD213PD"
   in:
   - &fma_op
@@ -95,7 +95,7 @@
   - *fma_op
   out:
   - *fma_op
-- go: FusedMultiplyAddSub
+- go: MulAddSub
   asm: "VFMADDSUB213PS|VFMADDSUB213PD"
   in:
   - *fma_op
@@ -103,7 +103,7 @@
   - *fma_op
   out:
   - *fma_op
-- go: FusedMultiplySubAdd
+- go: MulSubAdd
   asm: "VFMSUBADD213PS|VFMSUBADD213PD"
   in:
   - *fma_op
diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml
index dd9f55fb..3d868839 100644
--- a/internal/simdgen/ops/Mul/go.yaml
+++ b/internal/simdgen/ops/Mul/go.yaml
@@ -18,10 +18,11 @@
 - go: MulEvenWiden
   asm: "VPMULDQ"
   in:
-  - &int
+  - &intNot64
     go: $t
+    elemBits: 8|16|32
     base: int
-  - *int
+  - *intNot64
   out:
   - &int2
     go: $t2
@@ -29,10 +30,11 @@
 - go: MulEvenWiden
   asm: "VPMULUDQ"
   in:
-  - &uint
+  - &uintNot64
     go: $t
+    elemBits: 8|16|32
     base: uint
-  - *uint
+  - *uintNot64
   out:
   - &uint2
     go: $t2
@@ -43,24 +45,29 @@
 - go: MulHigh
   asm: "VPMULHW"
   in:
-  - *int
+  - &int
+    go: $t
+    base: int
   - *int
   out:
-  - *int2
+  - *int
 - go: MulHigh
   asm: "VPMULHUW"
   in:
-  - *uint
+  - &uint
+    go: $t
+    base: int
   - *uint
   out:
-  - *uint2
+  - *uint
 
 # MulLow
-# Signed int only.
+# signed and unsigned are the same for lower bits.
 - go: Mul
   asm: "VPMULL[WDQ]"
   in:
-  - *int
-  - *int
+  - &any
+    go: $t
+  - *any
   out:
-  - *int2
+  - *any

From d3d6994999bd0d662e06f310b1ec5f5ae95e72f0 Mon Sep 17 00:00:00 2001
From: Mark D Ryan <markdryan@rivosinc.com>
Date: Tue, 29 Apr 2025 08:48:41 +0000
Subject: [PATCH 183/200] riscv64: fix the path to the RISC-V extensions in
 spec.go

The riscv-opcodes repository has been restructured.  The files needed
by spec.go are now to be found in the extensions directory.

Change-Id: I163c08aed5d99088f5094c0365a9918977e39b5a
Reviewed-on: https://go-review.googlesource.com/c/arch/+/670875
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Mark Freeman <markfreeman@google.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
---
 riscv64/riscv64spec/spec.go | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/riscv64/riscv64spec/spec.go b/riscv64/riscv64spec/spec.go
index 55c498a0..b65ea697 100644
--- a/riscv64/riscv64spec/spec.go
+++ b/riscv64/riscv64spec/spec.go
@@ -68,11 +68,10 @@ func main() {
 	log.SetFlags(0)
 	log.SetPrefix("riscv64spec: ")
 
-	var repoPath string
 	if len(os.Args) < 1 {
 		log.Fatal("usage: go run spec.go <opcodes-repo-path>")
 	}
-	repoPath = os.Args[1]
+	extensionsPath := filepath.Join(os.Args[1], "extensions")
 
 	fileTables, err := os.Create("tables.go")
 	if err != nil {
@@ -86,7 +85,7 @@ func main() {
 	}
 
 	for _, ext := range extensions {
-		f, err := os.Open(filepath.Join(repoPath, ext))
+		f, err := os.Open(filepath.Join(extensionsPath, ext))
 		if err != nil {
 			log.Fatal(err)
 		}

From 46ba08e3ae58883936f0eefa4871530b0fa6156f Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 7 Aug 2025 15:31:06 -0400
Subject: [PATCH 184/200] internal/unify: fix minor comment typo

Change-Id: Ib5a1580d3561f86e7583460a03c6da708388a100
Reviewed-on: https://go-review.googlesource.com/c/arch/+/694116
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/unify/env.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/unify/env.go b/internal/unify/env.go
index b9989dd2..1200eb36 100644
--- a/internal/unify/env.go
+++ b/internal/unify/env.go
@@ -56,7 +56,7 @@ import (
 //
 //	e + 0 = e
 //	e ⨯ 0 = 0
-//	e ⨯ 1 =
+//	e ⨯ 1 = e
 //	e + f = f + e
 //	e ⨯ f = f ⨯ e
 type envSet struct {

From 861b9976b78b3cdf81fc3cb14aaac37314c226f4 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 8 Aug 2025 17:32:14 +0000
Subject: [PATCH 185/200] internal/simdgen: fix imm aux types and change
 documentation

The correct Aux type for immediates of SIMD instruction is uint8(signed
value will be rejected by the assembler). This CL fixes it.

Since we generate a jump table for non-const immediates now, this CL
also updates the documentation.

This CL partially generates CL 694395.

Change-Id: Iaf1b0044242ad679cb326fbc6fdb07158b8266c1
Reviewed-on: https://go-review.googlesource.com/c/arch/+/694375
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdGenericOps.go |  2 +-
 internal/simdgen/gen_simdMachineOps.go |  2 +-
 internal/simdgen/gen_simdTypes.go      | 12 ++++++------
 internal/simdgen/gen_simdrules.go      |  4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index daf941d7..72cc8fab 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -19,7 +19,7 @@ func simdGenericOps() []opData {
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}},
 {{- end }}
 {{- range .OpsImm }}
-		{name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}, aux: "Int8"},
+		{name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}, aux: "UInt8"},
 {{- end }}
 	}
 }
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index 7c538a00..fbd7ccf5 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -20,7 +20,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
 {{- end }}
 {{- range .OpsDataImm }}
-		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "Int8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
+		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
 {{- end }}
 	}
 }
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index f3c68796..c7053f24 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -311,7 +311,7 @@ func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndTyp
 {{define "op1Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
@@ -320,7 +320,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
 {{define "op2Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
@@ -329,7 +329,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"
 {{define "op2Imm8_2I"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
@@ -339,7 +339,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin
 {{define "op3Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
@@ -348,7 +348,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"
 {{define "op3Imm8_2I"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
@@ -358,7 +358,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin
 {{define "op4Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} is expected to be a constant, non-constant value will trigger a runtime panic.
+// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index c910f64a..bac4e942 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -22,9 +22,9 @@ var (
 {{end}}
 {{define "maskInMaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} <types.TypeMask> mask)))
 {{end}}
-{{define "sftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c])) => ({{.Asm}}const [int8(c)] x)
+{{define "sftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c])) => ({{.Asm}}const [uint8(c)] x)
 {{end}}
-{{define "masksftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c]) mask) => ({{.Asm}}const [int8(c)] x ({{.MaskInConvert}} <types.TypeMask> mask))
+{{define "masksftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c]) mask) => ({{.Asm}}const [uint8(c)] x ({{.MaskInConvert}} <types.TypeMask> mask))
 {{end}}
 `))
 )

From 134aefd5422e22b9fd27337491b58870499055b2 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Mon, 11 Aug 2025 17:19:06 +0000
Subject: [PATCH 186/200] internal/simdgen: imm document improve

This CL generates CL 694795.

Change-Id: I36165d0f3cd038f2fa04b8612446b87ac1bce89c
Reviewed-on: https://go-review.googlesource.com/c/arch/+/694775
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdTypes.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index c7053f24..820c27fa 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -311,7 +311,7 @@ func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndTyp
 {{define "op1Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
+// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
@@ -320,7 +320,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
 {{define "op2Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
+// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
@@ -329,7 +329,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"
 {{define "op2Imm8_2I"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
+// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
@@ -339,7 +339,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin
 {{define "op3Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
+// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
@@ -348,7 +348,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"
 {{define "op3Imm8_2I"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
+// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
@@ -358,7 +358,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin
 {{define "op4Imm8"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
-// {{.ImmName}} results in better performance when it's a constant, non-constant value will trigger a jump table to be generated.
+// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
 //
 // Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
 func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}

From fbc9dad06686f9627e9ff873bbe622fc27730def Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 8 Aug 2025 16:09:15 -0400
Subject: [PATCH 187/200] internal/simdgen/ops: use correct op for unsigned
 MulHigh

We were matching both signed and unsigned definitions to the signed
instruction. This caused dedupGodef to pick essentially arbitrarily
between them, which hid the problem.

Change-Id: I51cc697ebf5ee4b9ac00307d6db472ef21279904
Reviewed-on: https://go-review.googlesource.com/c/arch/+/694857
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/godefs.go       | 2 ++
 internal/simdgen/ops/Mul/go.yaml | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 166a5933..522ae69a 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -349,6 +349,8 @@ func writeGoDefs(path string, cl unify.Closure) error {
 		log.Printf("dedup len: %d\n", len(deduped))
 	}
 	if !*FlagNoDedup {
+		// TODO: This can hide mistakes in the API definitions, especially when
+		// multiple patterns result in the same API unintentionally. Make it stricter.
 		if deduped, err = dedupGodef(deduped); err != nil {
 			return err
 		}
diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml
index 3d868839..c0205a68 100644
--- a/internal/simdgen/ops/Mul/go.yaml
+++ b/internal/simdgen/ops/Mul/go.yaml
@@ -56,7 +56,7 @@
   in:
   - &uint
     go: $t
-    base: int
+    base: uint
   - *uint
   out:
   - *uint

From 3d4fe2e6b6f9416be630cf183c843f1a746bb8f4 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 8 Aug 2025 16:17:21 -0400
Subject: [PATCH 188/200] internal/simdgen: compute CPU feature in XED decoder

Currently, the XED decoder emits the raw "EXTENSION" and "ISA_SET"
fields directly from the XED, and these are translated into a CPU
feature by godefs using a bunch of fairly ad hoc string manipulations.

Replace this with computing the CPU feature directly in the XED
decoder. The extension and isa_set are strictly XED concepts, while
"CPU features" are generic concepts. Thus, this should be the role of
the XED decoder.

We also use an explicit mapping table rather than string
manipulations. These CPU feature names appear in the API, and thus
it's important that we pay attention to their names.

No effect on generated code.

Change-Id: I1c7c79c461d57b2cd78cfa81f376683ae33c69b1
Reviewed-on: https://go-review.googlesource.com/c/arch/+/694858
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Auto-Submit: Austin Clements <austin@google.com>
---
 internal/simdgen/gen_simdMachineOps.go        |   2 +-
 internal/simdgen/gen_utility.go               |  48 +-------
 internal/simdgen/godefs.go                    |   9 +-
 internal/simdgen/ops/AddSub/categories.yaml   |   8 --
 .../simdgen/ops/BitwiseLogic/categories.yaml  |   4 -
 internal/simdgen/ops/Compares/categories.yaml |   7 --
 internal/simdgen/ops/Converts/categories.yaml |   2 -
 .../simdgen/ops/FPonlyArith/categories.yaml   |  18 ---
 .../simdgen/ops/GaloisField/categories.yaml   |   3 -
 .../simdgen/ops/IntOnlyArith/categories.yaml  |   4 -
 internal/simdgen/ops/MLOps/categories.yaml    |  12 +-
 internal/simdgen/ops/MinMax/categories.yaml   |   2 -
 internal/simdgen/ops/Moves/categories.yaml    |  11 --
 internal/simdgen/ops/Mul/categories.yaml      |   3 -
 .../simdgen/ops/ShiftRotate/categories.yaml   |  17 ---
 internal/simdgen/xed.go                       | 108 ++++++++++++++++--
 16 files changed, 104 insertions(+), 154 deletions(-)

diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index fbd7ccf5..f110ae61 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -79,7 +79,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 			}
 		}
 		// Makes AVX512 operations use upper registers
-		if strings.Contains(op.Extension, "AVX512") {
+		if strings.Contains(op.CPUFeature, "AVX512") {
 			regInfo = strings.ReplaceAll(regInfo, "v", "w")
 		}
 		if _, ok := regInfoSet[regInfo]; !ok {
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 59832e0e..8a3e1735 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -515,50 +515,6 @@ func dedup(ops []Operation) (deduped []Operation) {
 	return
 }
 
-func fillCPUFeature(ops []Operation) (filled []Operation, excluded []Operation) {
-	allCPUFeatures := map[string]struct{}{}
-	for _, op := range ops {
-		if op.ISASet == "" {
-			newS := op.Extension
-			op.CPUFeature = &newS
-		} else {
-			newS := strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(op.ISASet, "_128"), "_256"), "_512")
-			newS = strings.TrimSuffix(strings.TrimSuffix(strings.TrimSuffix(newS, "_128N"), "_256N"), "_512N")
-			op.CPUFeature = &newS
-		}
-		if *op.CPUFeature == "AVX" || *op.CPUFeature == "AVX2" || strings.HasPrefix(*op.CPUFeature, "AVX512") ||
-			strings.HasPrefix(*op.CPUFeature, "AVX_") || strings.HasPrefix(*op.CPUFeature, "AVX2_") {
-			// This excludes instructions from CPU Features like AVX10.1, which usually are rebrandings of AVX512.
-			filled = append(filled, op)
-			if strings.Contains(*op.CPUFeature, "_") {
-				*op.CPUFeature = strings.ReplaceAll(*op.CPUFeature, "_", "")
-			}
-			allCPUFeatures[*op.CPUFeature] = struct{}{}
-		} else {
-			excluded = append(excluded, op)
-		}
-	}
-	// Sanity check, make sure we are not excluding the only definition of an operation
-	filledSeen := map[string]struct{}{}
-	excludedSeen := map[string]Operation{}
-	for _, op := range filled {
-		filledSeen[op.Go+*op.In[0].Go] = struct{}{}
-	}
-	for _, op := range excluded {
-		excludedSeen[op.Go+*op.In[0].Go] = op
-	}
-	for k, op := range excludedSeen {
-		if _, ok := filledSeen[k]; !ok {
-			panic(fmt.Sprintf("simdgen is excluding the only def of op: %s", op))
-		}
-	}
-	if *Verbose {
-		// It might contain
-		log.Printf("All CPU Features: %v\n", allCPUFeatures)
-	}
-	return
-}
-
 func (op Operation) GenericName() string {
 	if op.OperandOrder != nil {
 		switch *op.OperandOrder {
@@ -597,7 +553,7 @@ func dedupGodef(ops []Operation) ([]Operation, error) {
 		return ops, nil
 	}
 	isAVX512 := func(op Operation) bool {
-		return strings.Contains(op.Extension, "AVX512")
+		return strings.Contains(op.CPUFeature, "AVX512")
 	}
 	deduped := []Operation{}
 	for _, dup := range seen {
@@ -610,7 +566,7 @@ func dedupGodef(ops []Operation) ([]Operation, error) {
 				if isAVX512(i) && !isAVX512(j) {
 					return 1
 				}
-				return strings.Compare(*i.CPUFeature, *j.CPUFeature)
+				return strings.Compare(i.CPUFeature, j.CPUFeature)
 			})
 		}
 		deduped = append(deduped, dup[0])
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 522ae69a..c37b4d38 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -55,9 +55,7 @@ type rawOperation struct {
 	InVariant     []Operand // Optional parameters
 	Out           []Operand // Results
 	Commutative   bool      // Commutativity
-	Extension     string    // Extension
-	ISASet        string    // ISASet
-	CPUFeature    *string   // If ISASet is empty, then Extension, otherwise ISASet
+	CPUFeature    string    // CPUID/Has* feature name
 	Zeroing       *bool     // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
 	Documentation *string   // Documentation will be appended to the stubs comments.
 	// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
@@ -329,11 +327,6 @@ func writeGoDefs(path string, cl unify.Closure) error {
 	// The parsed XED data might contain duplicates, like
 	// 512 bits VPADDP.
 	deduped := dedup(ops)
-	var excluded []Operation
-	deduped, excluded = fillCPUFeature(deduped)
-	if *Verbose {
-		log.Printf("excluded len: %d\n", len(excluded))
-	}
 
 	if *Verbose {
 		log.Printf("dedup len: %d\n", len(ops))
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
index 4e492516..35e81042 100644
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ b/internal/simdgen/ops/AddSub/categories.yaml
@@ -1,45 +1,37 @@
 !sum
 - go: Add
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME adds corresponding elements of two vectors.
 - go: AddSaturated
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME adds corresponding elements of two vectors with saturation.
 - go: Sub
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME subtracts corresponding elements of two vectors.
 - go: SubSaturated
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME subtracts corresponding elements of two vectors with saturation.
 - go: AddPairs
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME horizontally adds adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: SubPairs
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME horizontally subtracts adjacent pairs of elements.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
 - go: AddPairsSaturated
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME horizontally adds adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
 - go: SubPairsSaturated
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME horizontally subtracts adjacent pairs of elements with saturation.
     // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
index 320cfd18..3142d191 100644
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ b/internal/simdgen/ops/BitwiseLogic/categories.yaml
@@ -1,22 +1,18 @@
 !sum
 - go: And
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise AND operation between two vectors.
 - go: Or
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise OR operation between two vectors.
 - go: AndNot
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise x &^ y.
 - go: Xor
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs a bitwise XOR operation between two vectors.
 
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
index e3d990ed..aa07ade2 100644
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ b/internal/simdgen/ops/Compares/categories.yaml
@@ -9,42 +9,35 @@
 - go: Equal
   constImm: 0
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME compares for equality.
 - go: Less
   constImm: 1
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME compares for less than.
 - go: LessEqual
   constImm: 2
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME compares for less than or equal.
 - go: IsNan # For float only.
   constImm: 3
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME checks if elements are NaN. Use as x.IsNan(x).
 - go: NotEqual
   constImm: 4
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME compares for inequality.
 - go: GreaterEqual
   constImm: 13
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME compares for greater than or equal.
 - go: Greater
   constImm: 14
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME compares for greater than.
diff --git a/internal/simdgen/ops/Converts/categories.yaml b/internal/simdgen/ops/Converts/categories.yaml
index 16316ed3..cc6c419d 100644
--- a/internal/simdgen/ops/Converts/categories.yaml
+++ b/internal/simdgen/ops/Converts/categories.yaml
@@ -1,12 +1,10 @@
 !sum
 - go: ConvertToInt32
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // ConvertToInt32 converts element values to int32.
 
 - go: ConvertToUint32
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // ConvertToUint32Masked converts element values to uint32.
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
index 512cfc50..f2d8af68 100644
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ b/internal/simdgen/ops/FPonlyArith/categories.yaml
@@ -1,103 +1,85 @@
 !sum
 - go: Div
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME divides elements of two vectors.
 - go: Sqrt
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME computes the square root of each element.
 - go: Reciprocal
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME computes an approximate reciprocal of each element.
 - go: ReciprocalSqrt
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME computes an approximate reciprocal of the square root of each element.
 - go: Scale
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies elements by a power of 2.
 - go: RoundToEven
   commutative: false
-  extension: "AVX.*"
   constImm: 0
   documentation: !string |-
     // NAME rounds elements to the nearest integer.
 - go: RoundToEvenScaled
   commutative: false
-  extension: "AVX.*"
   constImm: 0
   documentation: !string |-
     // NAME rounds elements with specified precision.
 - go: RoundToEvenScaledResidue
   commutative: false
-  extension: "AVX.*"
   constImm: 0
   documentation: !string |-
     // NAME computes the difference after rounding with specified precision.
 - go: Floor
   commutative: false
-  extension: "AVX.*"
   constImm: 1
   documentation: !string |-
     // NAME rounds elements down to the nearest integer.
 - go: FloorScaled
   commutative: false
-  extension: "AVX.*"
   constImm: 1
   documentation: !string |-
     // NAME rounds elements down with specified precision.
 - go: FloorScaledResidue
   commutative: false
-  extension: "AVX.*"
   constImm: 1
   documentation: !string |-
     // NAME computes the difference after flooring with specified precision.
 - go: Ceil
   commutative: false
-  extension: "AVX.*"
   constImm: 2
   documentation: !string |-
     // NAME rounds elements up to the nearest integer.
 - go: CeilScaled
   commutative: false
-  extension: "AVX.*"
   constImm: 2
   documentation: !string |-
     // NAME rounds elements up with specified precision.
 - go: CeilScaledResidue
   commutative: false
-  extension: "AVX.*"
   constImm: 2
   documentation: !string |-
     // NAME computes the difference after ceiling with specified precision.
 - go: Trunc
   commutative: false
-  extension: "AVX.*"
   constImm: 3
   documentation: !string |-
     // NAME truncates elements towards zero.
 - go: TruncScaled
   commutative: false
-  extension: "AVX.*"
   constImm: 3
   documentation: !string |-
     // NAME truncates elements with specified precision.
 - go: TruncScaledResidue
   commutative: false
-  extension: "AVX.*"
   constImm: 3
   documentation: !string |-
     // NAME computes the difference after truncating with specified precision.
 - go: AddSub
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME subtracts even elements and adds odd elements of two vectors.
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
index d57b5265..25824625 100644
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ b/internal/simdgen/ops/GaloisField/categories.yaml
@@ -1,7 +1,6 @@
 !sum
 - go: GaloisFieldAffineTransform
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME computes an affine transformation in GF(2^8):
     // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
@@ -9,7 +8,6 @@
     // corresponding to a group of 8 elements in x.
 - go: GaloisFieldAffineTransformInverse
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME computes an affine transformation in GF(2^8),
     // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
@@ -18,7 +16,6 @@
     // corresponding to a group of 8 elements in x.
 - go: GaloisFieldMul
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME computes element-wise GF(2^8) multiplication with
     // reduction polynomial x^8 + x^4 + x^3 + x + 1.
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
index 2c7a9998..bf33642a 100644
--- a/internal/simdgen/ops/IntOnlyArith/categories.yaml
+++ b/internal/simdgen/ops/IntOnlyArith/categories.yaml
@@ -1,25 +1,21 @@
 !sum
 - go: Average
   commutative: true
-  extension: "AVX.*" # VPAVGB/W are available across various AVX versions
   documentation: !string |-
     // NAME computes the rounded average of corresponding elements.
 - go: Abs
   commutative: false
   # Unary operation, not commutative
-  extension: "AVX.*" # VPABSB/W/D are AVX, VPABSQ is AVX512
   documentation: !string |-
     // NAME computes the absolute value of each element.
 - go: CopySign
   # Applies sign of second operand to first: sign(val, sign_src)
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME returns the product of the first operand with -1, 0, or 1,
     // whichever constant is nearest to the value of the second operand.
   # Sign does not have masked version
 - go: OnesCount
   commutative: false
-  extension: "AVX512.*"
   documentation: !string |-
     // NAME counts the number of set bits in each element.
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
index 6c5d3c67..97381e1e 100644
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ b/internal/simdgen/ops/MLOps/categories.yaml
@@ -1,57 +1,47 @@
 !sum
 - go: DotProdPairs
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
 - go: DotProdPairsSaturated
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
 # QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
 # - go: DotProdBroadcast
 #   commutative: true
-#   extension: "AVX.*"
-#   documentation: !string |-
+# #   documentation: !string |-
 #     // NAME multiplies all elements and broadcasts the sum.
 - go: AddDotProdQuadruple
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on groups of 4 elements of x and y and then adds z.
 - go: AddDotProdQuadrupleSaturated
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 - go: AddDotProdPairs
   commutative: false
   noTypes: "true"
   noGenericOps: "true"
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on pairs of elements of y and z and then adds x.
 - go: AddDotProdPairsSaturated
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs dot products on pairs of elements of y and z and then adds x.
 - go: MulAdd
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs a fused (x * y) + z.
 - go: MulAddSub
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
 - go: MulSubAdd
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml
index 9ac0d3d4..a7e30f46 100644
--- a/internal/simdgen/ops/MinMax/categories.yaml
+++ b/internal/simdgen/ops/MinMax/categories.yaml
@@ -1,11 +1,9 @@
 !sum
 - go: Max
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME computes the maximum of corresponding elements.
 - go: Min
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME computes the minimum of corresponding elements.
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index 6f30ccbc..cd9260ab 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -1,48 +1,40 @@
 !sum
 - go: SetElem
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME sets a single constant-indexed element's value.
 - go: GetElem
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME retrieves a single constant-indexed element's value.
 - go: SetLo
   commutative: false
   constImm: 0
-  extension: "AVX.*"
   documentation: !string |-
     // NAME returns x with its lower half set to y.
 - go: GetLo
   commutative: false
   constImm: 0
-  extension: "AVX.*"
   documentation: !string |-
     // NAME returns the lower half of x.
 - go: SetHi
   commutative: false
   constImm: 1
-  extension: "AVX.*"
   documentation: !string |-
     // NAME returns x with its upper half set to y.
 - go: GetHi
   commutative: false
   constImm: 1
-  extension: "AVX.*"
   documentation: !string |-
     // NAME returns the upper half of x.
 - go: Permute
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs a full permutation of vector x using indices:
     // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
     // Only the needed bits to represent x's index are used in indices' elements.
 - go: Permute2 # Permute2 is only available on or after AVX512
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs a full permutation of vector x, y using indices:
     // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
@@ -50,19 +42,16 @@
     // Only the needed bits to represent xy's index are used in indices' elements.
 - go: Compress
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs a compression on vector x using mask by
     // selecting elements as indicated by mask, and pack them to lower indexed elements.
 - go: blend
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME blends two vectors based on mask values, choosing either
     // the first or the second based on whether the third is false or true
 - go: Expand
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME performs an expansion on a vector x whose elements are packed to lower parts.
     // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
index f4e2aed2..92491b51 100644
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ b/internal/simdgen/ops/Mul/categories.yaml
@@ -1,17 +1,14 @@
 !sum
 - go: Mul
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies corresponding elements of two vectors.
 - go: MulEvenWiden
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies even-indexed elements, widening the result.
     // Result[i] = v1.Even[i] * v2.Even[i].
 - go: MulHigh
   commutative: true
-  extension: "AVX.*"
   documentation: !string |-
     // NAME multiplies elements and stores the high part of the result.
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
index e51d289b..0d0b006c 100644
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ b/internal/simdgen/ops/ShiftRotate/categories.yaml
@@ -3,7 +3,6 @@
   nameAndSizeCheck: true
   specialLower: sftimm
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 - go: ShiftAllRight
@@ -11,7 +10,6 @@
   nameAndSizeCheck: true
   specialLower: sftimm
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 - go: ShiftAllRight
@@ -19,7 +17,6 @@
   specialLower: sftimm
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 - go: shiftAllLeftConst # no APIs, only ssa ops.
@@ -28,7 +25,6 @@
   SSAVariant: "const" # to avoid its name colliding with reg version of this instruction, amend this to its ssa op name.
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
 - go: shiftAllRightConst # no APIs, only ssa ops.
   noTypes: "true"
   noGenericOps: "true"
@@ -36,7 +32,6 @@
   signed: false
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
 - go: shiftAllRightConst # no APIs, only ssa ops.
   noTypes: "true"
   noGenericOps: "true"
@@ -44,77 +39,65 @@
   signed: true
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
 
 - go: ShiftLeft
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
 - go: ShiftRight
   signed: false
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
 - go: ShiftRight
   signed: true
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 - go: RotateAllLeft
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element to the left by the number of bits specified by the immediate.
 - go: RotateLeft
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
 - go: RotateAllRight
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element to the right by the number of bits specified by the immediate.
 - go: RotateRight
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
 - go: ShiftAllLeftConcat
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the left by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 - go: ShiftAllRightConcat
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the right by the number of bits specified by the
     // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 - go: ShiftLeftConcat
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the left by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 - go: ShiftRightConcat
   nameAndSizeCheck: true
   commutative: false
-  extension: "AVX.*"
   documentation: !string |-
     // NAME shifts each element of x to the right by the number of bits specified by the
     // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index f773fcda..b0b4ab5a 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -5,9 +5,12 @@
 package main
 
 import (
+	"cmp"
 	"fmt"
 	"log"
+	"maps"
 	"regexp"
+	"slices"
 	"strconv"
 	"strings"
 
@@ -52,9 +55,9 @@ func loadXED(xedPath string) []*unify.Value {
 		switch {
 		case inst.RealOpcode == "N":
 			return // Skip unstable instructions
-		case !(strings.HasPrefix(inst.Extension, "SSE") || strings.HasPrefix(inst.Extension, "AVX")):
-			// We're only intested in SSE and AVX instuctions.
-			return // Skip non-AVX or SSE instructions
+		case !strings.HasPrefix(inst.Extension, "AVX"):
+			// We're only interested in AVX instructions.
+			return
 		}
 
 		if *flagDebugXED {
@@ -85,6 +88,30 @@ func loadXED(xedPath string) []*unify.Value {
 	if err != nil {
 		log.Fatalf("walk insts: %v", err)
 	}
+
+	if len(unknownFeatures) > 0 {
+		if !*Verbose {
+			nInst := 0
+			for _, insts := range unknownFeatures {
+				nInst += len(insts)
+			}
+			log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst)
+		} else {
+			keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int {
+				return cmp.Or(cmp.Compare(a.Extension, b.Extension),
+					cmp.Compare(a.ISASet, b.ISASet))
+			})
+			for _, key := range keys {
+				if key.ISASet == "" || key.ISASet == key.Extension {
+					log.Printf("unhandled Extension %s", key.Extension)
+				} else {
+					log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet)
+				}
+				log.Printf("  opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key])))
+			}
+		}
+	}
+
 	return defs
 }
 
@@ -492,22 +519,25 @@ func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVaria
 }
 
 func instToUVal(inst *xeddata.Inst, ops []operand) []*unify.Value {
+	feature, ok := decodeCPUFeature(inst)
+	if !ok {
+		return nil
+	}
+
 	var vals []*unify.Value
-	vals = append(vals, instToUVal1(inst, ops, instVariantNone))
+	vals = append(vals, instToUVal1(inst, ops, feature, instVariantNone))
 	if hasOptionalMask(ops) {
-		vals = append(vals, instToUVal1(inst, ops, instVariantMasked))
+		vals = append(vals, instToUVal1(inst, ops, feature, instVariantMasked))
 	}
 	return vals
 }
 
-func instToUVal1(inst *xeddata.Inst, ops []operand, variant instVariant) *unify.Value {
-	// TODO: "feature"
+func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant instVariant) *unify.Value {
 	var db unify.DefBuilder
 	db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64")))
 	db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode())))
 	addOperandsToDef(ops, &db, variant)
-	db.Add("extension", unify.NewValue(unify.NewStringExact(inst.Extension)))
-	db.Add("isaset", unify.NewValue(unify.NewStringExact(inst.ISASet)))
+	db.Add("cpuFeature", unify.NewValue(unify.NewStringExact(feature)))
 
 	if strings.Contains(inst.Pattern, "ZEROING=0") {
 		// This is an EVEX instruction, but the ".Z" (zero-merging)
@@ -531,6 +561,66 @@ func instToUVal1(inst *xeddata.Inst, ops []operand, variant instVariant) *unify.
 	return unify.NewValuePos(db.Build(), pos)
 }
 
+// decodeCPUFeature returns the CPU feature name required by inst. These match
+// the names of the "Has*" feature checks in the simd package.
+func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
+	key := cpuFeatureKey{
+		Extension: inst.Extension,
+		ISASet:    isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""),
+	}
+	feat, ok := cpuFeatureMap[key]
+	if !ok {
+		imap := unknownFeatures[key]
+		if imap == nil {
+			imap = make(map[string]struct{})
+			unknownFeatures[key] = imap
+		}
+		imap[inst.Opcode()] = struct{}{}
+		return "", false
+	}
+	if feat == "ignore" {
+		return "", false
+	}
+	return feat, true
+}
+
+var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$")
+
+type cpuFeatureKey struct {
+	Extension, ISASet string
+}
+
+// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name
+// that can be used in the SIMD API.
+var cpuFeatureMap = map[cpuFeatureKey]string{
+	{"AVX", ""}:              "AVX",
+	{"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
+	{"AVX2", ""}:             "AVX2",
+
+	// AVX-512 foundational features
+	//
+	// TODO: These should all map to "AVX512".
+	{"AVX512EVEX", "AVX512F"}:  "AVX512F",
+	{"AVX512EVEX", "AVX512CD"}: "AVX512CD",
+	{"AVX512EVEX", "AVX512BW"}: "AVX512BW",
+	{"AVX512EVEX", "AVX512DQ"}: "AVX512DQ",
+	// AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by
+	// the vector length suffix.
+
+	// AVX-512 extension features
+	{"AVX512EVEX", "AVX512_BITALG"}:    "AVX512BITALG",
+	{"AVX512EVEX", "AVX512_GFNI"}:      "AVX512GFNI",
+	{"AVX512EVEX", "AVX512_VBMI2"}:     "AVX512VBMI2",
+	{"AVX512EVEX", "AVX512_VBMI"}:      "AVX512VBMI",
+	{"AVX512EVEX", "AVX512_VNNI"}:      "AVX512VNNI",
+	{"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
+
+	// AVX 10.2 (not yet supported)
+	{"AVX512EVEX", "AVX10_2_RC"}: "ignore",
+}
+
+var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{}
+
 // hasOptionalMask returns whether there is an optional mask operand in ops.
 func hasOptionalMask(ops []operand) bool {
 	for _, op := range ops {

From c1242d79c210cb1fc8df78e9a9221324d456fc12 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 8 Aug 2025 16:23:30 -0400
Subject: [PATCH 189/200] internal/simdgen: combine AVX512F+CD+BW+DQ+VL into
 "AVX512" feature

This affects only comments in the generated code.

Change-Id: Ieb475ffaf9ae90e5f5b78c72b556e92e6e65b0c6
Reviewed-on: https://go-review.googlesource.com/c/arch/+/694859
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/xed.go | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
index b0b4ab5a..3bbf2cbc 100644
--- a/internal/simdgen/xed.go
+++ b/internal/simdgen/xed.go
@@ -597,13 +597,11 @@ var cpuFeatureMap = map[cpuFeatureKey]string{
 	{"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
 	{"AVX2", ""}:             "AVX2",
 
-	// AVX-512 foundational features
-	//
-	// TODO: These should all map to "AVX512".
-	{"AVX512EVEX", "AVX512F"}:  "AVX512F",
-	{"AVX512EVEX", "AVX512CD"}: "AVX512CD",
-	{"AVX512EVEX", "AVX512BW"}: "AVX512BW",
-	{"AVX512EVEX", "AVX512DQ"}: "AVX512DQ",
+	// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
+	{"AVX512EVEX", "AVX512F"}:  "AVX512",
+	{"AVX512EVEX", "AVX512CD"}: "AVX512",
+	{"AVX512EVEX", "AVX512BW"}: "AVX512",
+	{"AVX512EVEX", "AVX512DQ"}: "AVX512",
 	// AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by
 	// the vector length suffix.
 

From 88601128e7881337b45f4ea9ef1036dadd8e3f6e Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 8 Aug 2025 22:24:20 -0400
Subject: [PATCH 190/200] internal/simdgen: single copy of the generated header
 string

Change-Id: I1c9b2d09961513e1b2a1e2087204afc3f8383459
Reviewed-on: https://go-review.googlesource.com/c/arch/+/694860
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdGenericOps.go |  3 ++-
 internal/simdgen/gen_simdIntrinsics.go |  4 +++-
 internal/simdgen/gen_simdMachineOps.go |  3 ++-
 internal/simdgen/gen_simdTypes.go      | 24 ++++++------------------
 internal/simdgen/gen_simdrules.go      |  9 +--------
 internal/simdgen/godefs.go             |  3 +++
 6 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
index 72cc8fab..3dbbeb09 100644
--- a/internal/simdgen/gen_simdGenericOps.go
+++ b/internal/simdgen/gen_simdGenericOps.go
@@ -10,7 +10,7 @@ import (
 	"sort"
 )
 
-const simdGenericOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+const simdGenericOpsTmpl = `
 package main
 
 func simdGenericOps() []opData {
@@ -30,6 +30,7 @@ func simdGenericOps() []opData {
 func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
 	t := templateOf(simdGenericOpsTmpl, "simdgenericOps")
 	buffer := new(bytes.Buffer)
+	buffer.WriteString(generatedHeader)
 
 	type genericOpsData struct {
 		OpName  string
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
index 5050834b..6a1501e1 100644
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ b/internal/simdgen/gen_simdIntrinsics.go
@@ -10,7 +10,8 @@ import (
 	"slices"
 )
 
-const simdIntrinsicsTmpl = `{{define "header"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+const simdIntrinsicsTmpl = `
+{{define "header"}}
 package ssagen
 
 import (
@@ -92,6 +93,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 	t := templateOf(simdIntrinsicsTmpl, "simdintrinsics")
 	buffer := new(bytes.Buffer)
+	buffer.WriteString(generatedHeader)
 
 	if err := t.ExecuteTemplate(buffer, "header", nil); err != nil {
 		panic(fmt.Errorf("failed to execute header template: %w", err))
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
index f110ae61..64918e55 100644
--- a/internal/simdgen/gen_simdMachineOps.go
+++ b/internal/simdgen/gen_simdMachineOps.go
@@ -11,7 +11,7 @@ import (
 	"strings"
 )
 
-const simdMachineOpsTmpl = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+const simdMachineOpsTmpl = `
 package main
 
 func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw regInfo) []opData {
@@ -31,6 +31,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
 	t := templateOf(simdMachineOpsTmpl, "simdAMD64Ops")
 	buffer := new(bytes.Buffer)
+	buffer.WriteString(generatedHeader)
 
 	type opData struct {
 		OpName       string
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 820c27fa..57d48317 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -117,13 +117,13 @@ func compareSimdTypePairs(x, y simdTypePair) int {
 	return compareSimdTypes(x.Tdst, y.Tdst)
 }
 
-const simdTypesTemplates = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
-
+const simdPackageHeader = generatedHeader + `
 //go:build goexperiment.simd
 
 package simd
-{{end}}
+`
 
+const simdTypesTemplates = `
 {{define "sizeTmpl"}}
 // v{{.}} is a tag type that tells the compiler that this is really {{.}}-bit SIMD
 type v{{.}} struct {
@@ -203,13 +203,7 @@ func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lan
 func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}})
 `
 
-const simdStubsTmpl = `{{define "fileHeader"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
-
-//go:build goexperiment.simd
-
-package simd
-{{end}}
-
+const simdStubsTmpl = `
 {{define "op1"}}
 {{if .Documentation}}{{.Documentation}}
 //{{end}}
@@ -479,10 +473,7 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 	maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64")
 
 	buffer := new(bytes.Buffer)
-
-	if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil {
-		panic(fmt.Errorf("failed to execute fileHeader template: %w", err))
-	}
+	buffer.WriteString(simdPackageHeader)
 
 	sizes := make([]int, 0, len(typeMap))
 	for size, types := range typeMap {
@@ -535,10 +526,7 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
 	t := templateOf(simdStubsTmpl, "simdStubs")
 	buffer := new(bytes.Buffer)
-
-	if err := t.ExecuteTemplate(buffer, "fileHeader", nil); err != nil {
-		panic(fmt.Errorf("failed to execute fileHeader template: %w", err))
-	}
+	buffer.WriteString(simdPackageHeader)
 
 	slices.SortFunc(ops, compareOperations)
 
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index bac4e942..d1db2545 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -7,7 +7,6 @@ package main
 import (
 	"bytes"
 	"fmt"
-	"io"
 	"slices"
 	"text/template"
 )
@@ -58,13 +57,7 @@ func compareTplRuleData(x, y tplRuleData) int {
 // within the specified directory.
 func writeSIMDRules(ops []Operation) *bytes.Buffer {
 	buffer := new(bytes.Buffer)
-
-	header := `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
-
-`
-	if _, err := io.WriteString(buffer, header); err != nil {
-		panic(fmt.Errorf("failed to write header: %w", err))
-	}
+	buffer.WriteString(generatedHeader + "\n")
 
 	var allData []tplRuleData
 
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index c37b4d38..203b227e 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -305,6 +305,9 @@ func compareNatural(s1, s2 string) int {
 	return strings.Compare(s1, s2)
 }
 
+const generatedHeader = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+`
+
 func writeGoDefs(path string, cl unify.Closure) error {
 	// TODO: Merge operations with the same signature but multiple
 	// implementations (e.g., SSE vs AVX)

From 1e80165d14f3d8caf67c9e0cb801fa252ea63b98 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 8 Aug 2025 22:42:08 -0400
Subject: [PATCH 191/200] internal/simdgen: generate cpu.go feature checks API

Change-Id: I205a88c9d643f4f76b5dade5e674ce0f413e6570
Reviewed-on: https://go-review.googlesource.com/c/arch/+/694861
Auto-Submit: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
---
 internal/simdgen/gen_simdTypes.go | 56 +++++++++++++++++++++++++++++++
 internal/simdgen/godefs.go        |  1 +
 2 files changed, 57 insertions(+)

diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
index 57d48317..a367cce0 100644
--- a/internal/simdgen/gen_simdTypes.go
+++ b/internal/simdgen/gen_simdTypes.go
@@ -6,7 +6,9 @@ package main
 
 import (
 	"bytes"
+	"cmp"
 	"fmt"
+	"maps"
 	"slices"
 	"sort"
 	"strings"
@@ -140,6 +142,29 @@ type {{.Name}} struct {
 {{end}}
 `
 
+const simdFeaturesTemplate = `
+import "internal/cpu"
+
+{{range .}}
+{{- if eq .Feature "AVX512"}}
+// Has{{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
+//
+// These five CPU features are bundled together, and no use of AVX-512
+// is allowed unless all of these features are supported together.
+// Nearly every CPU that has shipped with any support for AVX-512 has
+// supported all five of these features.
+{{- else -}}
+// Has{{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
+{{- end}}
+//
+// Has{{.Feature}} is defined on all GOARCHes, but will only return true on
+// GOARCH {{.GoArch}}.
+func Has{{.Feature}}() bool {
+	return cpu.X86.Has{{.Feature}}
+}
+{{end}}
+`
+
 const simdLoadStoreTemplate = `
 // Len returns the number of elements in a {{.Name}}
 func (x {{.Name}}) Len() int { return {{.Lanes}} }
@@ -521,6 +546,37 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
 	return buffer
 }
 
+func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
+	// Gather all features
+	type featureKey struct {
+		GoArch  string
+		Feature string
+	}
+	featureSet := make(map[featureKey]struct{})
+	for _, op := range ops {
+		featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{}
+	}
+	features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
+		if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
+			return c
+		}
+		return compareNatural(a.Feature, b.Feature)
+	})
+
+	// If we ever have the same feature name on more than one GOARCH, we'll have
+	// to be more careful about this.
+	t := templateOf(simdFeaturesTemplate, "features")
+
+	buffer := new(bytes.Buffer)
+	buffer.WriteString(simdPackageHeader)
+
+	if err := t.Execute(buffer, features); err != nil {
+		panic(fmt.Errorf("failed to execute features template: %w", err))
+	}
+
+	return buffer
+}
+
 // writeSIMDStubs generates the simd vector intrinsic stubs and writes it to ops_amd64.go and ops_internal_amd64.go
 // within the specified directory.
 func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 203b227e..7c65d0ad 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -366,6 +366,7 @@ func writeGoDefs(path string, cl unify.Closure) error {
 	typeMap := parseSIMDTypes(deduped)
 
 	formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go")
+	formatWriteAndClose(writeSIMDFeatures(deduped), path, "src/"+simdPackage+"/cpu.go")
 	formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/ops_amd64.go")
 	formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go")
 	formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go")

From 0bf34ca4f31739c2faf8b6c4a75d783f5f7cfa55 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Mon, 11 Aug 2025 17:04:16 -0400
Subject: [PATCH 192/200] internal/simdgen: make sure that output is based on
 sorted data

there was still some variation, this may not be "the best"
order in all cases, but it is definitely better than no order,
and we can tweak individual files as we decide it is suitable.

this does not change the current generated files, but that
turns out to be just luck.

Change-Id: I38c6ac72f69b9d29c71de3250985cff8b7fcd677
Reviewed-on: https://go-review.googlesource.com/c/arch/+/695335
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_simdrules.go | 42 +++++++++++++++++++++++--------
 internal/simdgen/godefs.go        |  1 +
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index d1db2545..9a0bfd51 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -11,6 +11,17 @@ import (
 	"text/template"
 )
 
+type tplRuleData struct {
+	tplName        string // e.g. "sftimm"
+	GoOp           string // e.g. "ShiftAllLeft"
+	GoType         string // e.g. "Uint32x8"
+	Args           string // e.g. "x y"
+	Asm            string // e.g. "VPSLLD256"
+	ArgsOut        string // e.g. "x y"
+	MaskInConvert  string // e.g. "VPMOVVec32x8ToM"
+	MaskOutConvert string // e.g. "VPMOVMToVec32x8"
+}
+
 var (
 	ruleTemplates = template.Must(template.New("simdRules").Parse(`
 {{define "pureVreg"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} {{.ArgsOut}})
@@ -28,19 +39,17 @@ var (
 `))
 )
 
-type tplRuleData struct {
-	tplName        string
-	GoOp           string
-	GoType         string
-	Args           string
-	Asm            string
-	ArgsOut        string
-	MaskInConvert  string
-	MaskOutConvert string
+// SSA rewrite rules need to appear in a most-to-least-specific order.  This works for that.
+var tmplOrder = map[string]int{
+	"masksftimm":    0,
+	"sftimm":        1,
+	"maskInMaskOut": 2,
+	"maskOut":       3,
+	"maskIn":        4,
+	"pureVreg":      5,
 }
 
 func compareTplRuleData(x, y tplRuleData) int {
-	// TODO should MaskedXYZ compare just after XYZ?
 	if c := compareNatural(x.GoOp, y.GoOp); c != 0 {
 		return c
 	}
@@ -50,7 +59,18 @@ func compareTplRuleData(x, y tplRuleData) int {
 	if c := compareNatural(x.Args, y.Args); c != 0 {
 		return c
 	}
-	return 0
+	if x.tplName == y.tplName {
+		return 0
+	}
+	xo, xok := tmplOrder[x.tplName]
+	yo, yok := tmplOrder[y.tplName]
+	if !xok {
+		panic(fmt.Errorf("Unexpected template name %s, please add to tmplOrder", x.tplName))
+	}
+	if !yok {
+		panic(fmt.Errorf("Unexpected template name %s, please add to tmplOrder", y.tplName))
+	}
+	return xo - yo
 }
 
 // writeSIMDRules generates the lowering and rewrite rules for ssa and writes it to simdAMD64.rules
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
index 7c65d0ad..3a830ead 100644
--- a/internal/simdgen/godefs.go
+++ b/internal/simdgen/godefs.go
@@ -330,6 +330,7 @@ func writeGoDefs(path string, cl unify.Closure) error {
 	// The parsed XED data might contain duplicates, like
 	// 512 bits VPADDP.
 	deduped := dedup(ops)
+	slices.SortFunc(deduped, compareOperations)
 
 	if *Verbose {
 		log.Printf("dedup len: %d\n", len(ops))

From 0177facd94fd367ffbefb7fe104f951543883b4b Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 12 Aug 2025 16:59:31 -0400
Subject: [PATCH 193/200] internal/simdgen: fix generated rules for shifts

the rewrite rules don't always apply in the friendliest
order, be sure that they are defined so they work for
all orders.

this generates dev.simd CL 695475

Change-Id: I80784b1df90108fa97ea6156cdc9259fd2696868
Reviewed-on: https://go-review.googlesource.com/c/arch/+/695455
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 internal/simdgen/gen_simdrules.go | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
index 9a0bfd51..b0fc7e62 100644
--- a/internal/simdgen/gen_simdrules.go
+++ b/internal/simdgen/gen_simdrules.go
@@ -32,9 +32,9 @@ var (
 {{end}}
 {{define "maskInMaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} <types.TypeMask> mask)))
 {{end}}
-{{define "sftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c])) => ({{.Asm}}const [uint8(c)] x)
+{{define "sftimm"}}({{.Asm}} x (MOVQconst [c])) => ({{.Asm}}const [uint8(c)] x)
 {{end}}
-{{define "masksftimm"}}({{.GoOp}}{{.GoType}} x (MOVQconst [c]) mask) => ({{.Asm}}const [uint8(c)] x ({{.MaskInConvert}} <types.TypeMask> mask))
+{{define "masksftimm"}}({{.Asm}} x (MOVQconst [c]) mask) => ({{.Asm}}const [uint8(c)] x mask)
 {{end}}
 `))
 )
@@ -176,22 +176,24 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
 
 		if gOp.SpecialLower != nil {
 			if *gOp.SpecialLower == "sftimm" {
-				sftImmData := data
-				if tplName == "maskIn" {
-					sftImmData.tplName = "masksftimm"
-				} else {
-					sftImmData.tplName = "sftimm"
+				if data.GoType[0] == 'I' {
+					// only do these for signed types, it is a duplicate rewrite for unsigned
+					sftImmData := data
+					if tplName == "maskIn" {
+						sftImmData.tplName = "masksftimm"
+					} else {
+						sftImmData.tplName = "sftimm"
+					}
+					allData = append(allData, sftImmData)
 				}
-				allData = append(allData, sftImmData)
 			} else {
 				panic("simdgen sees unknwon special lower " + *gOp.SpecialLower + ", maybe implement it?")
 			}
-		} else {
-			// SpecialLower rules cannot use "...".
-			if tplName == "pureVreg" && data.Args == data.ArgsOut {
-				data.Args = "..."
-				data.ArgsOut = "..."
-			}
+		}
+
+		if tplName == "pureVreg" && data.Args == data.ArgsOut {
+			data.Args = "..."
+			data.ArgsOut = "..."
 		}
 		data.tplName = tplName
 		allData = append(allData, data)

From faba133cd546b7e2eb39b29f0e38f4d65b873d13 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Tue, 5 Aug 2025 16:30:53 -0400
Subject: [PATCH 194/200] internal/simdgen: add broadcast helper methods and
 SetElem for floats

Generates dev.simd CL 693758

Change-Id: I97b34d453b09054dd1eef4b3f192c2946ff4875f
Reviewed-on: https://go-review.googlesource.com/c/arch/+/693599
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/gen_utility.go            |   3 +
 internal/simdgen/ops/Moves/categories.yaml |  15 +++
 internal/simdgen/ops/Moves/go.yaml         | 128 +++++++++++++++++++--
 3 files changed, 138 insertions(+), 8 deletions(-)

diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
index 8a3e1735..20ce3c13 100644
--- a/internal/simdgen/gen_utility.go
+++ b/internal/simdgen/gen_utility.go
@@ -642,6 +642,9 @@ func overwrite(ops []Operation) error {
 		} else if op[idx].OverwriteBase != nil {
 			oBase := *op[idx].OverwriteBase
 			*op[idx].Go = strings.ReplaceAll(*op[idx].Go, capitalizeFirst(*op[idx].Base), capitalizeFirst(oBase))
+			if op[idx].Class == "greg" {
+				*op[idx].Go = strings.ReplaceAll(*op[idx].Go, *op[idx].Base, oBase)
+			}
 			*op[idx].Base = oBase
 		}
 		return nil
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
index cd9260ab..ef8e0360 100644
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ b/internal/simdgen/ops/Moves/categories.yaml
@@ -55,3 +55,18 @@
   documentation: !string |-
     // NAME performs an expansion on a vector x whose elements are packed to lower parts.
     // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
+- go: Broadcast128
+  commutative: false
+  documentation: !string |-
+    // NAME copies element zero of its (128-bit) input to all elements of
+    // the 128-bit output vector.
+- go: Broadcast256
+  commutative: false
+  documentation: !string |-
+    // NAME copies element zero of its (128-bit) input to all elements of
+    // the 256-bit output vector.
+- go: Broadcast512
+  commutative: false
+  documentation: !string |-
+    // NAME copies element zero of its (128-bit) input to all elements of
+    // the 512-bit output vector.
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
index 50e2869e..71981c12 100644
--- a/internal/simdgen/ops/Moves/go.yaml
+++ b/internal/simdgen/ops/Moves/go.yaml
@@ -15,6 +15,24 @@
   out:
   - *t
 
+- go: SetElem
+  asm: "VPINSR[DQ]"
+  in:
+  - &t
+    class: vreg
+    base: int
+    OverwriteBase: float
+  - class: greg
+    base: int
+    OverwriteBase: float
+    lanes: 1 # Scalar, darn it!
+  - &imm
+    class: immediate
+    immOffset: 0
+    name: index
+  out:
+  - *t
+
 - go: GetElem
   asm: "VPEXTR[BWDQ]"
   in:
@@ -195,10 +213,10 @@
 
 - go: Permute2
   asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
-  # Because we are overwriting the receiver's type, we 
+  # Because we are overwriting the receiver's type, we
   # have to move the receiver to be a parameter so that
   # we can have no duplication.
-  operandOrder: "231Type1" 
+  operandOrder: "231Type1"
   in:
   - *anyindices # result in arg 0
   - *any
@@ -218,16 +236,16 @@
 # For now a non-public method because
 # (1) [OverwriteClass] must be set together with [OverwriteBase]
 # (2) "simdgen does not support [OverwriteClass] in inputs".
-# That means the signature is wrong.  
+# That means the signature is wrong.
 - go: blend
   asm: VPBLENDVB
-  in: 
+  in:
   - &v
     go: $t
     class: vreg
     base: int
   - *v
-  - 
+  -
     class: vreg
     base: int
     name: mask
@@ -237,15 +255,15 @@
 # For AVX512
 - go: blend
   asm: VPBLENDM[BWDQ]
-  in: 
+  in:
   - &v
     go: $t
     bits: 512
     class: vreg
     base: int
   - *v
-  inVariant: 
-  - 
+  inVariant:
+  -
     class: mask
   out:
   - *v
@@ -258,3 +276,97 @@
   - *any
   out:
   - *any
+
+- go: Broadcast128
+  asm: VPBROADCAST[BWDQ]
+  in:
+  - class: vreg
+    bits: 128
+    elemBits: $e
+    base: $b
+  out:
+  - class: vreg
+    bits: 128
+    elemBits: $e
+    base: $b
+
+# weirdly, this one case on AVX2 is memory-operand-only
+- go: Broadcast128
+  asm: VPBROADCASTQ
+  in:
+  - class: vreg
+    bits: 128
+    elemBits: 64
+    base: int
+    OverwriteBase: float
+  out:
+  - class: vreg
+    bits: 128
+    elemBits: 64
+    base: int
+    OverwriteBase: float
+
+- go: Broadcast256
+  asm: VPBROADCAST[BWDQ]
+  in:
+  - class: vreg
+    bits: 128
+    elemBits: $e
+    base: $b
+  out:
+  - class: vreg
+    bits: 256
+    elemBits: $e
+    base: $b
+
+- go: Broadcast512
+  asm: VPBROADCAST[BWDQ]
+  in:
+  - class: vreg
+    bits: 128
+    elemBits: $e
+    base: $b
+  out:
+  - class: vreg
+    bits: 512
+    elemBits: $e
+    base: $b
+
+- go: Broadcast128
+  asm: VBROADCASTS[SD]
+  in:
+  - class: vreg
+    bits: 128
+    elemBits: $e
+    base: $b
+  out:
+  - class: vreg
+    bits: 128
+    elemBits: $e
+    base: $b
+
+- go: Broadcast256
+  asm: VBROADCASTS[SD]
+  in:
+  - class: vreg
+    bits: 128
+    elemBits: $e
+    base: $b
+  out:
+  - class: vreg
+    bits: 256
+    elemBits: $e
+    base: $b
+
+- go: Broadcast512
+  asm: VBROADCASTS[SD]
+  in:
+  - class: vreg
+    bits: 128
+    elemBits: $e
+    base: $b
+  out:
+  - class: vreg
+    bits: 512
+    elemBits: $e
+    base: $b

From d90dca2d02c32d001dc2abba41e7ce3fb86c7992 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 1 Aug 2025 09:26:12 -0400
Subject: [PATCH 195/200] internal/simdgen: remove non-masked 128/256-bit
 AVX512 comparisons

If we intend to emulate these on AVX2, and also give them the
"good names", then we can't use those same names for AVX512 versions
of the comparisons.

Generates dev.simd CL 692335

Change-Id: I4e814b4de42cea38fb6e81c293a21cc56f45c13e
Reviewed-on: https://go-review.googlesource.com/c/arch/+/692355
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/main.go              |  2 +-
 internal/simdgen/ops/Compares/go.yaml | 46 +++++++++++++++++++++++++--
 internal/unify/domain.go              | 35 +++++++++++++++++++-
 internal/unify/env.go                 |  5 +++
 internal/unify/value.go               |  7 ++++
 5 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
index 69eb85f9..a7f0b0de 100644
--- a/internal/simdgen/main.go
+++ b/internal/simdgen/main.go
@@ -250,7 +250,7 @@ func validate(cl unify.Closure, required map[*unify.Value]struct{}) {
 		}
 
 		if !def.Exact() {
-			fmt.Fprintf(os.Stderr, "%s: def not reduced to an exact value:\n", def.PosString())
+			fmt.Fprintf(os.Stderr, "%s: def not reduced to an exact value, why is %s:\n", def.PosString(), def.WhyNotExact())
 			fmt.Fprintf(os.Stderr, "\t%s\n", strings.ReplaceAll(def.String(), "\n", "\n\t"))
 		}
 
diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml
index a8e2368f..0f916283 100644
--- a/internal/simdgen/ops/Compares/go.yaml
+++ b/internal/simdgen/ops/Compares/go.yaml
@@ -36,6 +36,8 @@
     overwriteElementBits: 64
     overwriteClass: mask
     overwriteBase: int
+
+# TODO these are redundant with VPCMP operations.
 # AVX-512 compares produce masks.
 - go: Equal
   asm: "V?PCMPEQ[BWDQ]"
@@ -51,21 +53,61 @@
   - *int
   out:
   - class: mask
-# The const imm predicated compares after AVX512, please see categories.yaml
-# for const imm specification.
+
+# MASKED signed comparisons for X/Y registers
+# unmasked would clash with emulations on AVX2
 - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMP[BWDQ]"
   in:
+  - &int
+    bits: (128|256)
+    go: $t
+    base: int
   - *int
+  - class: immediate
+    const: 0 # Just a placeholder, will be overwritten by const imm porting.
+  inVariant:
+  - class: mask
+  out:
+  - class: mask
+
+# MASKED unsigned comparisons for X/Y registers
+# unmasked would clash with emulations on AVX2
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
+  asm: "VPCMPU[BWDQ]"
+  in:
+  - &uint
+    bits: (128|256)
+    go: $t
+    base: uint
+  - *uint
+  - class: immediate
+    const: 0
+  inVariant:
+  - class: mask
+  out:
+  - class: mask
+
+# masked/unmasked signed comparisons for Z registers
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
+  asm: "VPCMP[BWDQ]"
+  in:
+  - &int
+    bits: 512
+    go: $t
+    base: int
   - *int
   - class: immediate
     const: 0 # Just a placeholder, will be overwritten by const imm porting.
   out:
   - class: mask
+
+# masked/unmasked unsigned comparisons for Z registers
 - go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
   asm: "VPCMPU[BWDQ]"
   in:
   - &uint
+    bits: 512
     go: $t
     base: uint
   - *uint
diff --git a/internal/unify/domain.go b/internal/unify/domain.go
index 5c4d349f..1e0f2be6 100644
--- a/internal/unify/domain.go
+++ b/internal/unify/domain.go
@@ -39,6 +39,7 @@ import (
 //   - [Var] - A value captured in the environment.
 type Domain interface {
 	Exact() bool
+	WhyNotExact() string
 
 	// decode stores this value in a Go value. If this value is not exact, this
 	// returns a potentially wrapped *inexactError.
@@ -77,7 +78,8 @@ func (e *decodeError) Error() string {
 // Top represents all possible values of all possible types.
 type Top struct{}
 
-func (t Top) Exact() bool { return false }
+func (t Top) Exact() bool         { return false }
+func (t Top) WhyNotExact() string { return "is top" }
 
 func (t Top) decode(rv reflect.Value) error {
 	// We can decode Top into a pointer-typed value as nil.
@@ -125,6 +127,17 @@ func (d Def) Exact() bool {
 	return true
 }
 
+// WhyNotExact returns why the value is not exact
+func (d Def) WhyNotExact() string {
+	for s, v := range d.fields {
+		if !v.Exact() {
+			w := v.WhyNotExact()
+			return "field " + s + ": " + w
+		}
+	}
+	return ""
+}
+
 func (d Def) decode(rv reflect.Value) error {
 	if rv.Kind() != reflect.Struct {
 		return fmt.Errorf("cannot decode Def into %s", rv.Type())
@@ -219,6 +232,19 @@ func (d Tuple) Exact() bool {
 	return true
 }
 
+func (d Tuple) WhyNotExact() string {
+	if d.repeat != nil {
+		return "d.repeat is not nil"
+	}
+	for i, v := range d.vs {
+		if !v.Exact() {
+			w := v.WhyNotExact()
+			return "index " + strconv.FormatInt(int64(i), 10) + ": " + w
+		}
+	}
+	return ""
+}
+
 func (d Tuple) decode(rv reflect.Value) error {
 	if d.repeat != nil {
 		return &inexactError{"repeated tuple", rv.Type().String()}
@@ -300,6 +326,13 @@ func (d String) Exact() bool {
 	return d.kind == stringExact
 }
 
+func (d String) WhyNotExact() string {
+	if d.kind == stringExact {
+		return ""
+	}
+	return "string is not exact"
+}
+
 func (d String) decode(rv reflect.Value) error {
 	if d.kind != stringExact {
 		return &inexactError{"regex", rv.Type().String()}
diff --git a/internal/unify/env.go b/internal/unify/env.go
index 1200eb36..3331ff79 100644
--- a/internal/unify/env.go
+++ b/internal/unify/env.go
@@ -384,6 +384,11 @@ func (d Var) Exact() bool {
 	panic("Exact called on non-concrete Value")
 }
 
+func (d Var) WhyNotExact() string {
+	// These can't appear in concrete Values.
+	return "WhyNotExact called on non-concrete Value"
+}
+
 func (d Var) decode(rv reflect.Value) error {
 	return &inexactError{"var", rv.Type().String()}
 }
diff --git a/internal/unify/value.go b/internal/unify/value.go
index 87387bbf..ffc25b87 100644
--- a/internal/unify/value.go
+++ b/internal/unify/value.go
@@ -69,6 +69,13 @@ func (v *Value) PosString() string {
 	return string(b)
 }
 
+func (v *Value) WhyNotExact() string {
+	if v.Domain == nil {
+		return "v.Domain is nil"
+	}
+	return v.Domain.WhyNotExact()
+}
+
 func (v *Value) Exact() bool {
 	if v.Domain == nil {
 		return false

From 6ad8cbc456cf7deb7d97d2ec7d914a1a7fe19225 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 13 Aug 2025 15:21:58 -0400
Subject: [PATCH 196/200] internal/simdgen: add missing copyright header

Change-Id: I69912c7c9be7ccf5b22c01db0c3bec46fa478127
Reviewed-on: https://go-review.googlesource.com/c/arch/+/695619
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 internal/simdgen/sort_test.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/internal/simdgen/sort_test.go b/internal/simdgen/sort_test.go
index 43a9fd64..399acf03 100644
--- a/internal/simdgen/sort_test.go
+++ b/internal/simdgen/sort_test.go
@@ -1,3 +1,7 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
 package main
 
 import "testing"

From fb55ef737e0789cb87b0a66b25916fd051da93db Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 13 Aug 2025 15:13:38 -0400
Subject: [PATCH 197/200] internal/{simdgen,unify}: delete, move to main repo

This deletes internal/simdgen and its supporting unify package. They
have been moved to the dev.simd branch of the main Go repo as of CL
695975.

Change-Id: I6247c7f97dd869b5f6934d1bc72f5b20f5f1705e
Reviewed-on: https://go-review.googlesource.com/c/arch/+/695796
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
---
 go.mod                                        |   5 +-
 go.sum                                        |   4 -
 internal/simdgen/.gitignore                   |   3 -
 internal/simdgen/asm.yaml.toy                 | 107 ---
 internal/simdgen/categories.yaml              |   1 -
 internal/simdgen/etetest.sh                   |  33 -
 internal/simdgen/gen_simdGenericOps.go        |  70 --
 internal/simdgen/gen_simdIntrinsics.go        | 151 ----
 internal/simdgen/gen_simdMachineOps.go        | 122 ---
 internal/simdgen/gen_simdTypes.go             | 631 --------------
 internal/simdgen/gen_simdrules.go             | 211 -----
 internal/simdgen/gen_simdssa.go               | 173 ----
 internal/simdgen/gen_utility.go               | 729 ----------------
 internal/simdgen/go.yaml                      |   1 -
 internal/simdgen/godefs.go                    | 379 ---------
 internal/simdgen/main.go                      | 280 -------
 internal/simdgen/ops/AddSub/categories.yaml   |  37 -
 internal/simdgen/ops/AddSub/go.yaml           |  77 --
 .../simdgen/ops/BitwiseLogic/categories.yaml  |  20 -
 internal/simdgen/ops/BitwiseLogic/go.yaml     | 128 ---
 internal/simdgen/ops/Compares/categories.yaml |  43 -
 internal/simdgen/ops/Compares/go.yaml         | 141 ----
 internal/simdgen/ops/Converts/categories.yaml |  10 -
 internal/simdgen/ops/Converts/go.yaml         |  21 -
 .../simdgen/ops/FPonlyArith/categories.yaml   |  85 --
 internal/simdgen/ops/FPonlyArith/go.yaml      |  62 --
 .../simdgen/ops/GaloisField/categories.yaml   |  21 -
 internal/simdgen/ops/GaloisField/go.yaml      |  32 -
 .../simdgen/ops/IntOnlyArith/categories.yaml  |  21 -
 internal/simdgen/ops/IntOnlyArith/go.yaml     |  45 -
 internal/simdgen/ops/MLOps/categories.yaml    |  47 --
 internal/simdgen/ops/MLOps/go.yaml            | 113 ---
 internal/simdgen/ops/MinMax/categories.yaml   |   9 -
 internal/simdgen/ops/MinMax/go.yaml           |  42 -
 internal/simdgen/ops/Moves/categories.yaml    |  72 --
 internal/simdgen/ops/Moves/go.yaml            | 372 ---------
 internal/simdgen/ops/Mul/categories.yaml      |  14 -
 internal/simdgen/ops/Mul/go.yaml              |  73 --
 .../simdgen/ops/ShiftRotate/categories.yaml   | 103 ---
 internal/simdgen/ops/ShiftRotate/go.yaml      | 172 ----
 internal/simdgen/pprint.go                    |  73 --
 internal/simdgen/sort_test.go                 |  41 -
 internal/simdgen/types.yaml                   |  90 --
 internal/simdgen/xed.go                       | 780 ------------------
 internal/unify/closure.go                     | 154 ----
 internal/unify/domain.go                      | 359 --------
 internal/unify/dot.go                         | 221 -----
 internal/unify/env.go                         | 480 -----------
 internal/unify/html.go                        | 123 ---
 internal/unify/pos.go                         |  33 -
 internal/unify/testdata/stress.yaml           |  33 -
 internal/unify/testdata/unify.yaml            | 174 ----
 internal/unify/testdata/vars.yaml             | 175 ----
 internal/unify/trace.go                       | 168 ----
 internal/unify/unify.go                       | 322 --------
 internal/unify/unify_test.go                  | 154 ----
 internal/unify/value.go                       | 167 ----
 internal/unify/value_test.go                  |  50 --
 internal/unify/yaml.go                        | 619 --------------
 internal/unify/yaml_test.go                   | 202 -----
 60 files changed, 1 insertion(+), 9077 deletions(-)
 delete mode 100644 internal/simdgen/.gitignore
 delete mode 100644 internal/simdgen/asm.yaml.toy
 delete mode 100644 internal/simdgen/categories.yaml
 delete mode 100755 internal/simdgen/etetest.sh
 delete mode 100644 internal/simdgen/gen_simdGenericOps.go
 delete mode 100644 internal/simdgen/gen_simdIntrinsics.go
 delete mode 100644 internal/simdgen/gen_simdMachineOps.go
 delete mode 100644 internal/simdgen/gen_simdTypes.go
 delete mode 100644 internal/simdgen/gen_simdrules.go
 delete mode 100644 internal/simdgen/gen_simdssa.go
 delete mode 100644 internal/simdgen/gen_utility.go
 delete mode 100644 internal/simdgen/go.yaml
 delete mode 100644 internal/simdgen/godefs.go
 delete mode 100644 internal/simdgen/main.go
 delete mode 100644 internal/simdgen/ops/AddSub/categories.yaml
 delete mode 100644 internal/simdgen/ops/AddSub/go.yaml
 delete mode 100644 internal/simdgen/ops/BitwiseLogic/categories.yaml
 delete mode 100644 internal/simdgen/ops/BitwiseLogic/go.yaml
 delete mode 100644 internal/simdgen/ops/Compares/categories.yaml
 delete mode 100644 internal/simdgen/ops/Compares/go.yaml
 delete mode 100644 internal/simdgen/ops/Converts/categories.yaml
 delete mode 100644 internal/simdgen/ops/Converts/go.yaml
 delete mode 100644 internal/simdgen/ops/FPonlyArith/categories.yaml
 delete mode 100644 internal/simdgen/ops/FPonlyArith/go.yaml
 delete mode 100644 internal/simdgen/ops/GaloisField/categories.yaml
 delete mode 100644 internal/simdgen/ops/GaloisField/go.yaml
 delete mode 100644 internal/simdgen/ops/IntOnlyArith/categories.yaml
 delete mode 100644 internal/simdgen/ops/IntOnlyArith/go.yaml
 delete mode 100644 internal/simdgen/ops/MLOps/categories.yaml
 delete mode 100644 internal/simdgen/ops/MLOps/go.yaml
 delete mode 100644 internal/simdgen/ops/MinMax/categories.yaml
 delete mode 100644 internal/simdgen/ops/MinMax/go.yaml
 delete mode 100644 internal/simdgen/ops/Moves/categories.yaml
 delete mode 100644 internal/simdgen/ops/Moves/go.yaml
 delete mode 100644 internal/simdgen/ops/Mul/categories.yaml
 delete mode 100644 internal/simdgen/ops/Mul/go.yaml
 delete mode 100644 internal/simdgen/ops/ShiftRotate/categories.yaml
 delete mode 100644 internal/simdgen/ops/ShiftRotate/go.yaml
 delete mode 100644 internal/simdgen/pprint.go
 delete mode 100644 internal/simdgen/sort_test.go
 delete mode 100644 internal/simdgen/types.yaml
 delete mode 100644 internal/simdgen/xed.go
 delete mode 100644 internal/unify/closure.go
 delete mode 100644 internal/unify/domain.go
 delete mode 100644 internal/unify/dot.go
 delete mode 100644 internal/unify/env.go
 delete mode 100644 internal/unify/html.go
 delete mode 100644 internal/unify/pos.go
 delete mode 100644 internal/unify/testdata/stress.yaml
 delete mode 100644 internal/unify/testdata/unify.yaml
 delete mode 100644 internal/unify/testdata/vars.yaml
 delete mode 100644 internal/unify/trace.go
 delete mode 100644 internal/unify/unify.go
 delete mode 100644 internal/unify/unify_test.go
 delete mode 100644 internal/unify/value.go
 delete mode 100644 internal/unify/value_test.go
 delete mode 100644 internal/unify/yaml.go
 delete mode 100644 internal/unify/yaml_test.go

diff --git a/go.mod b/go.mod
index bcca36b6..b72ba1a5 100644
--- a/go.mod
+++ b/go.mod
@@ -2,7 +2,4 @@ module golang.org/x/arch
 
 go 1.23.0
 
-require (
-	gopkg.in/yaml.v3 v3.0.1
-	rsc.io/pdf v0.1.1
-)
+require rsc.io/pdf v0.1.1
diff --git a/go.sum b/go.sum
index cf7dae80..e854d25c 100644
--- a/go.sum
+++ b/go.sum
@@ -1,6 +1,2 @@
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 rsc.io/pdf v0.1.1 h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4=
 rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/internal/simdgen/.gitignore b/internal/simdgen/.gitignore
deleted file mode 100644
index de579f6b..00000000
--- a/internal/simdgen/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-testdata/*
-.gemini/*
-.gemini*
diff --git a/internal/simdgen/asm.yaml.toy b/internal/simdgen/asm.yaml.toy
deleted file mode 100644
index 7885c776..00000000
--- a/internal/simdgen/asm.yaml.toy
+++ /dev/null
@@ -1,107 +0,0 @@
-# Hand-written toy input like -xedPath would generate.
-# This input can be substituted for -xedPath.
-!sum
-- asm: ADDPS
-  goarch: amd64
-  feature: "SSE2"
-  in:
-    - asmPos: 0
-      class: vreg
-      base: float
-      elemBits: 32
-      bits: 128
-    - asmPos: 1
-      class: vreg
-      base: float
-      elemBits: 32
-      bits: 128
-  out:
-    - asmPos: 0
-      class: vreg
-      base: float
-      elemBits: 32
-      bits: 128
-
-- asm: ADDPD
-  goarch: amd64
-  feature: "SSE2"
-  in:
-    - asmPos: 0
-      class: vreg
-      base: float
-      elemBits: 64
-      bits: 128
-    - asmPos: 1
-      class: vreg
-      base: float
-      elemBits: 64
-      bits: 128
-  out:
-    - asmPos: 0
-      class: vreg
-      base: float
-      elemBits: 64
-      bits: 128
-
-- asm: PADDB
-  goarch: amd64
-  feature: "SSE2"
-  in:
-    - asmPos: 0
-      class: vreg
-      base: int|uint
-      elemBits: 32
-      bits: 128
-    - asmPos: 1
-      class: vreg
-      base: int|uint
-      elemBits: 32
-      bits: 128
-  out:
-    - asmPos: 0
-      class: vreg
-      base: int|uint
-      elemBits: 32
-      bits: 128
-
-- asm: VPADDB
-  goarch: amd64
-  feature: "AVX"
-  in:
-    - asmPos: 1
-      class: vreg
-      base: int|uint
-      elemBits: 8
-      bits: 128
-    - asmPos: 2
-      class: vreg
-      base: int|uint
-      elemBits: 8
-      bits: 128
-  out:
-    - asmPos: 0
-      class: vreg
-      base: int|uint
-      elemBits: 8
-      bits: 128
-
-- asm: VPADDB
-  goarch: amd64
-  feature: "AVX2"
-  in:
-    - asmPos: 1
-      class: vreg
-      base: int|uint
-      elemBits: 8
-      bits: 256
-    - asmPos: 2
-      class: vreg
-      base: int|uint
-      elemBits: 8
-      bits: 256
-  out:
-    - asmPos: 0
-      class: vreg
-      base: int|uint
-      elemBits: 8
-      bits: 256
diff --git a/internal/simdgen/categories.yaml b/internal/simdgen/categories.yaml
deleted file mode 100644
index ed4c9645..00000000
--- a/internal/simdgen/categories.yaml
+++ /dev/null
@@ -1 +0,0 @@
-!import ops/*/categories.yaml
diff --git a/internal/simdgen/etetest.sh b/internal/simdgen/etetest.sh
deleted file mode 100755
index 7b5001ec..00000000
--- a/internal/simdgen/etetest.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash -x
-
-cat <<\\EOF
-
-This is an end-to-end test of Go SIMD. It checks out a fresh Go
-repository from the go.simd branch, then generates the SIMD input
-files and runs simdgen writing into the fresh repository.
-
-After that it generates the modified ssa pattern matching files, then
-builds the compiler.
-
-\EOF
-
-rm -rf go-test
-git clone https://go.googlesource.com/go -b dev.simd go-test
-go run . -xedPath xeddata  -o godefs -goroot ./go-test  go.yaml types.yaml categories.yaml
-(cd go-test/src/cmd/compile/internal/ssa/_gen ; go run *.go )
-(cd go-test/src ; GOEXPERIMENT=simd  ./make.bash )
-(cd go-test/bin; b=`pwd` ; cd ../src/simd/testdata; GOARCH=amd64 $b/go run .)
-(cd go-test/bin; b=`pwd` ; cd ../src ;
-GOEXPERIMENT=simd GOARCH=amd64 $b/go test -v simd
-GOEXPERIMENT=simd $b/go test go/doc
-GOEXPERIMENT=simd $b/go test go/build
-GOEXPERIMENT=simd $b/go test cmd/api -v -check
-$b/go test go/doc
-$b/go test go/build
-$b/go test cmd/api -v -check
-
-$b/go test cmd/compile/internal/ssagen -simd=0
-GOEXPERIMENT=simd $b/go test cmd/compile/internal/ssagen -simd=0
-)
-
-# next, add some tests of SIMD itself
diff --git a/internal/simdgen/gen_simdGenericOps.go b/internal/simdgen/gen_simdGenericOps.go
deleted file mode 100644
index 3dbbeb09..00000000
--- a/internal/simdgen/gen_simdGenericOps.go
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"bytes"
-	"fmt"
-	"sort"
-)
-
-const simdGenericOpsTmpl = `
-package main
-
-func simdGenericOps() []opData {
-	return []opData{
-{{- range .Ops }}
-		{name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}},
-{{- end }}
-{{- range .OpsImm }}
-		{name: "{{.OpName}}", argLength: {{.OpInLen}}, commutative: {{.Comm}}, aux: "UInt8"},
-{{- end }}
-	}
-}
-`
-
-// writeSIMDGenericOps generates the generic ops and writes it to simdAMD64ops.go
-// within the specified directory.
-func writeSIMDGenericOps(ops []Operation) *bytes.Buffer {
-	t := templateOf(simdGenericOpsTmpl, "simdgenericOps")
-	buffer := new(bytes.Buffer)
-	buffer.WriteString(generatedHeader)
-
-	type genericOpsData struct {
-		OpName  string
-		OpInLen int
-		Comm    bool
-	}
-	type opData struct {
-		Ops    []genericOpsData
-		OpsImm []genericOpsData
-	}
-	var opsData opData
-	for _, op := range ops {
-		if op.NoGenericOps != nil && *op.NoGenericOps == "true" {
-			continue
-		}
-		_, _, _, immType, gOp := op.shape()
-		gOpData := genericOpsData{gOp.GenericName(), len(gOp.In), op.Commutative}
-		if immType == VarImm || immType == ConstVarImm {
-			opsData.OpsImm = append(opsData.OpsImm, gOpData)
-		} else {
-			opsData.Ops = append(opsData.Ops, gOpData)
-		}
-	}
-	sort.Slice(opsData.Ops, func(i, j int) bool {
-		return compareNatural(opsData.Ops[i].OpName, opsData.Ops[j].OpName) < 0
-	})
-	sort.Slice(opsData.OpsImm, func(i, j int) bool {
-		return compareNatural(opsData.OpsImm[i].OpName, opsData.OpsImm[j].OpName) < 0
-	})
-
-	err := t.Execute(buffer, opsData)
-	if err != nil {
-		panic(fmt.Errorf("failed to execute template: %w", err))
-	}
-
-	return buffer
-}
diff --git a/internal/simdgen/gen_simdIntrinsics.go b/internal/simdgen/gen_simdIntrinsics.go
deleted file mode 100644
index 6a1501e1..00000000
--- a/internal/simdgen/gen_simdIntrinsics.go
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"bytes"
-	"fmt"
-	"slices"
-)
-
-const simdIntrinsicsTmpl = `
-{{define "header"}}
-package ssagen
-
-import (
-	"cmd/compile/internal/ir"
-	"cmd/compile/internal/ssa"
-	"cmd/compile/internal/types"
-	"cmd/internal/sys"
-)
-
-const simdPackage = "` + simdPackage + `"
-
-func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
-{{end}}
-
-{{define "op1"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen1(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op2"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op2_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op2_21Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op3"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op3_21"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op3_21Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_21(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op3_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3_231(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op3_31"}}	addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen3_31(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op4"}}	addF(simdPackage, "{{(index .In 0).Go}}.{{.Go}}", opLen4(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op4_231Type1"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4_231(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op4_31"}}	addF(simdPackage, "{{(index .In 2).Go}}.{{.Go}}", opLen4_31(ssa.Op{{.GenericName}}, {{.SSAType}}), sys.AMD64)
-{{end}}
-{{define "op1Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen1Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
-{{end}}
-{{define "op2Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
-{{end}}
-{{define "op2Imm8_2I"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
-{{end}}
-{{define "op3Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
-{{end}}
-{{define "op3Imm8_2I"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
-{{end}}
-{{define "op4Imm8"}}	addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen4Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
-{{end}}
-
-{{define "vectorConversion"}}	addF(simdPackage, "{{.Tsrc.Name}}.As{{.Tdst.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
-{{end}}
-
-{{define "loadStore"}}	addF(simdPackage, "Load{{.Name}}", simdLoad(), sys.AMD64)
-	addF(simdPackage, "{{.Name}}.Store", simdStore(), sys.AMD64)
-{{end}}
-
-{{define "maskedLoadStore"}}	addF(simdPackage, "LoadMasked{{.Name}}", simdMaskedLoad(ssa.OpLoadMasked{{.ElemBits}}), sys.AMD64)
-	addF(simdPackage, "{{.Name}}.StoreMasked", simdMaskedStore(ssa.OpStoreMasked{{.ElemBits}}), sys.AMD64)
-{{end}}
-
-{{define "mask"}}	addF(simdPackage, "{{.Name}}.As{{.VectorCounterpart}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
-	addF(simdPackage, "{{.VectorCounterpart}}.As{{.Name}}", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
-	addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
-	addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
-	addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
-	addF(simdPackage, "{{.Name}}.StoreToBits", simdStoreMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
-	addF(simdPackage, "{{.Name}}FromBits", simdCvtVToMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
-	addF(simdPackage, "{{.Name}}.ToBits", simdCvtMaskToV({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
-{{end}}
-
-{{define "footer"}}}
-{{end}}
-`
-
-// writeSIMDIntrinsics generates the intrinsic mappings and writes it to simdintrinsics.go
-// within the specified directory.
-func writeSIMDIntrinsics(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
-	t := templateOf(simdIntrinsicsTmpl, "simdintrinsics")
-	buffer := new(bytes.Buffer)
-	buffer.WriteString(generatedHeader)
-
-	if err := t.ExecuteTemplate(buffer, "header", nil); err != nil {
-		panic(fmt.Errorf("failed to execute header template: %w", err))
-	}
-
-	slices.SortFunc(ops, compareOperations)
-
-	for _, op := range ops {
-		if op.NoTypes != nil && *op.NoTypes == "true" {
-			continue
-		}
-		if s, op, err := classifyOp(op); err == nil {
-			if err := t.ExecuteTemplate(buffer, s, op); err != nil {
-				panic(fmt.Errorf("failed to execute template %s for op %s: %w", s, op.Go, err))
-			}
-
-		} else {
-			panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err))
-		}
-	}
-
-	for _, conv := range vConvertFromTypeMap(typeMap) {
-		if err := t.ExecuteTemplate(buffer, "vectorConversion", conv); err != nil {
-			panic(fmt.Errorf("failed to execute vectorConversion template: %w", err))
-		}
-	}
-
-	for _, typ := range typesFromTypeMap(typeMap) {
-		if typ.Type != "mask" {
-			if err := t.ExecuteTemplate(buffer, "loadStore", typ); err != nil {
-				panic(fmt.Errorf("failed to execute loadStore template: %w", err))
-			}
-		}
-	}
-
-	for _, typ := range typesFromTypeMap(typeMap) {
-		if typ.MaskedLoadStoreFilter() {
-			if err := t.ExecuteTemplate(buffer, "maskedLoadStore", typ); err != nil {
-				panic(fmt.Errorf("failed to execute maskedLoadStore template: %w", err))
-			}
-		}
-	}
-
-	for _, mask := range masksFromTypeMap(typeMap) {
-		if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil {
-			panic(fmt.Errorf("failed to execute mask template: %w", err))
-		}
-	}
-
-	if err := t.ExecuteTemplate(buffer, "footer", nil); err != nil {
-		panic(fmt.Errorf("failed to execute footer template: %w", err))
-	}
-
-	return buffer
-}
diff --git a/internal/simdgen/gen_simdMachineOps.go b/internal/simdgen/gen_simdMachineOps.go
deleted file mode 100644
index 64918e55..00000000
--- a/internal/simdgen/gen_simdMachineOps.go
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"bytes"
-	"fmt"
-	"sort"
-	"strings"
-)
-
-const simdMachineOpsTmpl = `
-package main
-
-func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw regInfo) []opData {
-	return []opData{
-{{- range .OpsData }}
-		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
-{{- end }}
-{{- range .OpsDataImm }}
-		{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
-{{- end }}
-	}
-}
-`
-
-// writeSIMDMachineOps generates the machine ops and writes it to simdAMD64ops.go
-// within the specified directory.
-func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
-	t := templateOf(simdMachineOpsTmpl, "simdAMD64Ops")
-	buffer := new(bytes.Buffer)
-	buffer.WriteString(generatedHeader)
-
-	type opData struct {
-		OpName       string
-		Asm          string
-		OpInLen      int
-		RegInfo      string
-		Comm         bool
-		Type         string
-		ResultInArg0 bool
-	}
-	type machineOpsData struct {
-		OpsData    []opData
-		OpsDataImm []opData
-	}
-	seen := map[string]struct{}{}
-	regInfoSet := map[string]bool{
-		"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true,
-		"w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true}
-	opsData := make([]opData, 0)
-	opsDataImm := make([]opData, 0)
-	for _, op := range ops {
-		shapeIn, shapeOut, maskType, _, gOp := op.shape()
-		asm := machineOpName(maskType, gOp)
-
-		// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
-		// one here with a name suffix "Merging". The rewrite rules will need them.
-		if _, ok := seen[asm]; ok {
-			continue
-		}
-		seen[asm] = struct{}{}
-		regInfo, err := op.regShape()
-		if err != nil {
-			panic(err)
-		}
-		idx, err := checkVecAsScalar(op)
-		if err != nil {
-			panic(err)
-		}
-		if idx != -1 {
-			if regInfo == "v21" {
-				regInfo = "vfpv"
-			} else if regInfo == "v2kv" {
-				regInfo = "vfpkv"
-			} else {
-				panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regInfo, op))
-			}
-		}
-		// Makes AVX512 operations use upper registers
-		if strings.Contains(op.CPUFeature, "AVX512") {
-			regInfo = strings.ReplaceAll(regInfo, "v", "w")
-		}
-		if _, ok := regInfoSet[regInfo]; !ok {
-			panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s.  Op is %s", regInfo, op))
-		}
-		var outType string
-		if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil {
-			// If class overwrite is happening, that's not really a mask but a vreg.
-			outType = fmt.Sprintf("Vec%d", *gOp.Out[0].Bits)
-		} else if shapeOut == OneGregOut {
-			outType = gOp.GoType() // this is a straight Go type, not a VecNNN type
-		} else if shapeOut == OneKmaskOut {
-			outType = "Mask"
-		} else {
-			panic(fmt.Errorf("simdgen does not recognize this output shape: %d", shapeOut))
-		}
-		resultInArg0 := false
-		if shapeOut == OneVregOutAtIn {
-			resultInArg0 = true
-		}
-		if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
-			opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
-		} else {
-			opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
-		}
-	}
-	sort.Slice(opsData, func(i, j int) bool {
-		return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
-	})
-	sort.Slice(opsDataImm, func(i, j int) bool {
-		return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
-	})
-	err := t.Execute(buffer, machineOpsData{opsData, opsDataImm})
-	if err != nil {
-		panic(fmt.Errorf("failed to execute template: %w", err))
-	}
-
-	return buffer
-}
diff --git a/internal/simdgen/gen_simdTypes.go b/internal/simdgen/gen_simdTypes.go
deleted file mode 100644
index a367cce0..00000000
--- a/internal/simdgen/gen_simdTypes.go
+++ /dev/null
@@ -1,631 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"bytes"
-	"cmp"
-	"fmt"
-	"maps"
-	"slices"
-	"sort"
-	"strings"
-)
-
-type simdType struct {
-	Name                    string // The go type name of this simd type, for example Int32x4.
-	Lanes                   int    // The number of elements in this vector/mask.
-	Base                    string // The element's type, like for Int32x4 it will be int32.
-	Fields                  string // The struct fields, it should be right formatted.
-	Type                    string // Either "mask" or "vreg"
-	VectorCounterpart       string // For mask use only: just replacing the "Mask" in [simdType.Name] with "Int"
-	ReshapedVectorWithAndOr string // For mask use only: vector AND and OR are only available in some shape with element width 32.
-	Size                    int    // The size of the vector type
-}
-
-func (x simdType) ElemBits() int {
-	return x.Size / x.Lanes
-}
-
-// LanesContainer returns the smallest int/uint bit size that is
-// large enough to hold one bit for each lane.  E.g., Mask32x4
-// is 4 lanes, and a uint8 is the smallest uint that has 4 bits.
-func (x simdType) LanesContainer() int {
-	if x.Lanes > 64 {
-		panic("too many lanes")
-	}
-	if x.Lanes > 32 {
-		return 64
-	}
-	if x.Lanes > 16 {
-		return 32
-	}
-	if x.Lanes > 8 {
-		return 16
-	}
-	return 8
-}
-
-// MaskedLoadStoreFilter encodes which simd type type currently
-// get masked loads/stores generated, it is used in two places,
-// this forces coordination.
-func (x simdType) MaskedLoadStoreFilter() bool {
-	return x.Size == 512 || x.ElemBits() >= 32 && x.Type != "mask"
-}
-
-func (x simdType) IntelSizeSuffix() string {
-	switch x.ElemBits() {
-	case 8:
-		return "B"
-	case 16:
-		return "W"
-	case 32:
-		return "D"
-	case 64:
-		return "Q"
-	}
-	panic("oops")
-}
-
-func (x simdType) MaskedLoadDoc() string {
-	if x.Size == 512 || x.ElemBits() < 32 {
-		return fmt.Sprintf("// Asm: VMOVDQU%d.Z, CPU Feature: AVX512", x.ElemBits())
-	} else {
-		return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
-	}
-}
-
-func (x simdType) MaskedStoreDoc() string {
-	if x.Size == 512 || x.ElemBits() < 32 {
-		return fmt.Sprintf("// Asm: VMOVDQU%d, CPU Feature: AVX512", x.ElemBits())
-	} else {
-		return fmt.Sprintf("// Asm: VMASKMOV%s, CPU Feature: AVX2", x.IntelSizeSuffix())
-	}
-}
-
-func compareSimdTypes(x, y simdType) int {
-	// "vreg" then "mask"
-	if c := -compareNatural(x.Type, y.Type); c != 0 {
-		return c
-	}
-	// want "flo" < "int" < "uin" (and then 8 < 16 < 32 < 64),
-	// not "int16" < "int32" < "int64" < "int8")
-	// so limit comparison to first 3 bytes in string.
-	if c := compareNatural(x.Base[:3], y.Base[:3]); c != 0 {
-		return c
-	}
-	// base type size, 8 < 16 < 32 < 64
-	if c := x.ElemBits() - y.ElemBits(); c != 0 {
-		return c
-	}
-	// vector size last
-	return x.Size - y.Size
-}
-
-type simdTypeMap map[int][]simdType
-
-type simdTypePair struct {
-	Tsrc simdType
-	Tdst simdType
-}
-
-func compareSimdTypePairs(x, y simdTypePair) int {
-	c := compareSimdTypes(x.Tsrc, y.Tsrc)
-	if c != 0 {
-		return c
-	}
-	return compareSimdTypes(x.Tdst, y.Tdst)
-}
-
-const simdPackageHeader = generatedHeader + `
-//go:build goexperiment.simd
-
-package simd
-`
-
-const simdTypesTemplates = `
-{{define "sizeTmpl"}}
-// v{{.}} is a tag type that tells the compiler that this is really {{.}}-bit SIMD
-type v{{.}} struct {
-	_{{.}} struct{}
-}
-{{end}}
-
-{{define "typeTmpl"}}
-// {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}}
-type {{.Name}} struct {
-{{.Fields}}
-}
-
-{{end}}
-`
-
-const simdFeaturesTemplate = `
-import "internal/cpu"
-
-{{range .}}
-{{- if eq .Feature "AVX512"}}
-// Has{{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
-//
-// These five CPU features are bundled together, and no use of AVX-512
-// is allowed unless all of these features are supported together.
-// Nearly every CPU that has shipped with any support for AVX-512 has
-// supported all five of these features.
-{{- else -}}
-// Has{{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
-{{- end}}
-//
-// Has{{.Feature}} is defined on all GOARCHes, but will only return true on
-// GOARCH {{.GoArch}}.
-func Has{{.Feature}}() bool {
-	return cpu.X86.Has{{.Feature}}
-}
-{{end}}
-`
-
-const simdLoadStoreTemplate = `
-// Len returns the number of elements in a {{.Name}}
-func (x {{.Name}}) Len() int { return {{.Lanes}} }
-
-// Load{{.Name}} loads a {{.Name}} from an array
-//
-//go:noescape
-func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}}
-
-// Store stores a {{.Name}} to an array
-//
-//go:noescape
-func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
-`
-
-const simdMaskFromBitsTemplate = `
-// Load{{.Name}}FromBits constructs a {{.Name}} from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower {{.Lanes}} bits of y are used.
-//
-// CPU Features: AVX512
-//go:noescape
-func Load{{.Name}}FromBits(y *uint64) {{.Name}}
-
-// StoreToBits stores a {{.Name}} as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower {{.Lanes}} bits of y are used.
-//
-// CPU Features: AVX512
-//go:noescape
-func (x {{.Name}}) StoreToBits(y *uint64)
-`
-
-const simdMaskFromValTemplate = `
-// {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset.
-// Only the lower {{.Lanes}} bits of y are used.
-//
-// Asm: KMOV{{.IntelSizeSuffix}}, CPU Feature: AVX512
-func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}}
-
-// ToBits constructs a bitmap from a {{.Name}}, where 1 means set for the indexed element, 0 means unset.
-// Only the lower {{.Lanes}} bits of y are used.
-//
-// Asm: KMOV{{.IntelSizeSuffix}}, CPU Features: AVX512
-func (x {{.Name}}) ToBits() uint{{.LanesContainer}}
-`
-
-const simdMaskedLoadStoreTemplate = `
-// LoadMasked{{.Name}} loads a {{.Name}} from an array,
-// at those elements enabled by mask
-//
-{{.MaskedLoadDoc}}
-//
-//go:noescape
-func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}}
-
-// StoreMasked stores a {{.Name}} to an array,
-// at those elements enabled by mask
-//
-{{.MaskedStoreDoc}}
-//
-//go:noescape
-func (x {{.Name}}) StoreMasked(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}})
-`
-
-const simdStubsTmpl = `
-{{define "op1"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op0NameAndType "x"}}) {{.Go}}() {{.GoType}}
-{{end}}
-
-{{define "op2"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}) {{.GoType}}
-{{end}}
-
-{{define "op2_21"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
-{{end}}
-
-{{define "op2_21Type1"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}) {{.GoType}}
-{{end}}
-
-{{define "op3"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
-{{end}}
-
-{{define "op3_31"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}}
-{{end}}
-
-{{define "op3_21"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
-{{end}}
-
-{{define "op3_21Type1"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op0NameAndType "y"}}, {{.Op2NameAndType "z"}}) {{.GoType}}
-{{end}}
-
-{{define "op3_231Type1"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}) {{.GoType}}
-{{end}}
-
-{{define "op2VecAsScalar"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}) {{(index .Out 0).Go}}
-{{end}}
-
-{{define "op3VecAsScalar"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op0NameAndType "x"}}) {{.Go}}(y uint{{(index .In 1).TreatLikeAScalarOfSize}}, {{.Op2NameAndType "z"}}) {{(index .Out 0).Go}}
-{{end}}
-
-{{define "op4"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op0NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op2NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
-{{end}}
-
-{{define "op4_231Type1"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
-{{end}}
-
-{{define "op4_31"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op2NameAndType "x"}}) {{.Go}}({{.Op1NameAndType "y"}}, {{.Op0NameAndType "z"}}, {{.Op3NameAndType "u"}}) {{.GoType}}
-{{end}}
-
-{{define "op1Imm8"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8) {{.GoType}}
-{{end}}
-
-{{define "op2Imm8"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
-{{end}}
-
-{{define "op2Imm8_2I"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
-{{end}}
-
-
-{{define "op3Imm8"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}) {{.GoType}}
-{{end}}
-
-{{define "op3Imm8_2I"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8, {{.Op3NameAndType "z"}}) {{.GoType}}
-{{end}}
-
-
-{{define "op4Imm8"}}
-{{if .Documentation}}{{.Documentation}}
-//{{end}}
-// {{.ImmName}} results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
-func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}, {{.Op3NameAndType "z"}}, {{.Op4NameAndType "u"}}) {{.GoType}}
-{{end}}
-
-{{define "vectorConversion"}}
-// {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}}
-func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}})
-{{end}}
-
-{{define "mask"}}
-// converts from {{.Name}} to {{.VectorCounterpart}}
-func (from {{.Name}}) As{{.VectorCounterpart}}() (to {{.VectorCounterpart}})
-
-// converts from {{.VectorCounterpart}} to {{.Name}}
-func (from {{.VectorCounterpart}}) As{{.Name}}() (to {{.Name}})
-
-func (x {{.Name}}) And(y {{.Name}}) {{.Name}}
-
-func (x {{.Name}}) Or(y {{.Name}}) {{.Name}}
-{{end}}
-`
-
-// parseSIMDTypes groups go simd types by their vector sizes, and
-// returns a map whose key is the vector size, value is the simd type.
-func parseSIMDTypes(ops []Operation) simdTypeMap {
-	// TODO: maybe instead of going over ops, let's try go over types.yaml.
-	ret := map[int][]simdType{}
-	seen := map[string]struct{}{}
-	processArg := func(arg Operand) {
-		if arg.Class == "immediate" || arg.Class == "greg" {
-			// Immediates are not encoded as vector types.
-			return
-		}
-		if _, ok := seen[*arg.Go]; ok {
-			return
-		}
-		seen[*arg.Go] = struct{}{}
-
-		lanes := *arg.Lanes
-		base := fmt.Sprintf("%s%d", *arg.Base, *arg.ElemBits)
-		tagFieldNameS := fmt.Sprintf("%sx%d", base, lanes)
-		tagFieldS := fmt.Sprintf("%s v%d", tagFieldNameS, *arg.Bits)
-		valFieldS := fmt.Sprintf("vals%s[%d]%s", strings.Repeat(" ", len(tagFieldNameS)-3), lanes, base)
-		fields := fmt.Sprintf("\t%s\n\t%s", tagFieldS, valFieldS)
-		if arg.Class == "mask" {
-			vectorCounterpart := strings.ReplaceAll(*arg.Go, "Mask", "Int")
-			reshapedVectorWithAndOr := fmt.Sprintf("Int32x%d", *arg.Bits/32)
-			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, vectorCounterpart, reshapedVectorWithAndOr, *arg.Bits})
-			// In case the vector counterpart of a mask is not present, put its vector counterpart typedef into the map as well.
-			if _, ok := seen[vectorCounterpart]; !ok {
-				seen[vectorCounterpart] = struct{}{}
-				ret[*arg.Bits] = append(ret[*arg.Bits], simdType{vectorCounterpart, lanes, base, fields, "vreg", "", "", *arg.Bits})
-			}
-		} else {
-			ret[*arg.Bits] = append(ret[*arg.Bits], simdType{*arg.Go, lanes, base, fields, arg.Class, "", "", *arg.Bits})
-		}
-	}
-	for _, op := range ops {
-		for _, arg := range op.In {
-			processArg(arg)
-		}
-		for _, arg := range op.Out {
-			processArg(arg)
-		}
-	}
-	return ret
-}
-
-func vConvertFromTypeMap(typeMap simdTypeMap) []simdTypePair {
-	v := []simdTypePair{}
-	for _, ts := range typeMap {
-		for i, tsrc := range ts {
-			for j, tdst := range ts {
-				if i != j && tsrc.Type == tdst.Type && tsrc.Type == "vreg" &&
-					tsrc.Lanes > 1 && tdst.Lanes > 1 {
-					v = append(v, simdTypePair{tsrc, tdst})
-				}
-			}
-		}
-	}
-	slices.SortFunc(v, compareSimdTypePairs)
-	return v
-}
-
-func masksFromTypeMap(typeMap simdTypeMap) []simdType {
-	m := []simdType{}
-	for _, ts := range typeMap {
-		for _, tsrc := range ts {
-			if tsrc.Type == "mask" {
-				m = append(m, tsrc)
-			}
-		}
-	}
-	slices.SortFunc(m, compareSimdTypes)
-	return m
-}
-
-func typesFromTypeMap(typeMap simdTypeMap) []simdType {
-	m := []simdType{}
-	for _, ts := range typeMap {
-		for _, tsrc := range ts {
-			if tsrc.Lanes > 1 {
-				m = append(m, tsrc)
-			}
-		}
-	}
-	slices.SortFunc(m, compareSimdTypes)
-	return m
-}
-
-// writeSIMDTypes generates the simd vector types into a bytes.Buffer
-func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
-	t := templateOf(simdTypesTemplates, "types_amd64")
-	loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64")
-	maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64")
-	maskFromBits := templateOf(simdMaskFromBitsTemplate, "maskFromBits_amd64")
-	maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64")
-
-	buffer := new(bytes.Buffer)
-	buffer.WriteString(simdPackageHeader)
-
-	sizes := make([]int, 0, len(typeMap))
-	for size, types := range typeMap {
-		slices.SortFunc(types, compareSimdTypes)
-		sizes = append(sizes, size)
-	}
-	sort.Ints(sizes)
-
-	for _, size := range sizes {
-		if size <= 64 {
-			// these are scalar
-			continue
-		}
-		if err := t.ExecuteTemplate(buffer, "sizeTmpl", size); err != nil {
-			panic(fmt.Errorf("failed to execute size template for size %d: %w", size, err))
-		}
-		for _, typeDef := range typeMap[size] {
-			if typeDef.Lanes == 1 {
-				continue
-			}
-			if err := t.ExecuteTemplate(buffer, "typeTmpl", typeDef); err != nil {
-				panic(fmt.Errorf("failed to execute type template for type %s: %w", typeDef.Name, err))
-			}
-			if typeDef.Type != "mask" {
-				if err := loadStore.ExecuteTemplate(buffer, "loadstore_amd64", typeDef); err != nil {
-					panic(fmt.Errorf("failed to execute loadstore template for type %s: %w", typeDef.Name, err))
-				}
-				// restrict to AVX2 masked loads/stores first.
-				if typeDef.MaskedLoadStoreFilter() {
-					if err := maskedLoadStore.ExecuteTemplate(buffer, "maskedloadstore_amd64", typeDef); err != nil {
-						panic(fmt.Errorf("failed to execute maskedloadstore template for type %s: %w", typeDef.Name, err))
-					}
-				}
-			} else {
-				if err := maskFromBits.ExecuteTemplate(buffer, "maskFromBits_amd64", typeDef); err != nil {
-					panic(fmt.Errorf("failed to execute maskFromBits template for type %s: %w", typeDef.Name, err))
-				}
-				if err := maskFromVal.ExecuteTemplate(buffer, "maskFromVal_amd64", typeDef); err != nil {
-					panic(fmt.Errorf("failed to execute maskFromVal template for type %s: %w", typeDef.Name, err))
-				}
-			}
-		}
-	}
-
-	return buffer
-}
-
-func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
-	// Gather all features
-	type featureKey struct {
-		GoArch  string
-		Feature string
-	}
-	featureSet := make(map[featureKey]struct{})
-	for _, op := range ops {
-		featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{}
-	}
-	features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
-		if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
-			return c
-		}
-		return compareNatural(a.Feature, b.Feature)
-	})
-
-	// If we ever have the same feature name on more than one GOARCH, we'll have
-	// to be more careful about this.
-	t := templateOf(simdFeaturesTemplate, "features")
-
-	buffer := new(bytes.Buffer)
-	buffer.WriteString(simdPackageHeader)
-
-	if err := t.Execute(buffer, features); err != nil {
-		panic(fmt.Errorf("failed to execute features template: %w", err))
-	}
-
-	return buffer
-}
-
-// writeSIMDStubs generates the simd vector intrinsic stubs and writes it to ops_amd64.go and ops_internal_amd64.go
-// within the specified directory.
-func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) *bytes.Buffer {
-	t := templateOf(simdStubsTmpl, "simdStubs")
-	buffer := new(bytes.Buffer)
-	buffer.WriteString(simdPackageHeader)
-
-	slices.SortFunc(ops, compareOperations)
-
-	for i, op := range ops {
-		if op.NoTypes != nil && *op.NoTypes == "true" {
-			continue
-		}
-		idxVecAsScalar, err := checkVecAsScalar(op)
-		if err != nil {
-			panic(err)
-		}
-		if s, op, err := classifyOp(op); err == nil {
-			if idxVecAsScalar != -1 {
-				if s == "op2" || s == "op3" {
-					s += "VecAsScalar"
-				} else {
-					panic(fmt.Errorf("simdgen only supports op2 or op3 with TreatLikeAScalarOfSize"))
-				}
-			}
-			if i == 0 || op.Go != ops[i-1].Go {
-				fmt.Fprintf(buffer, "\n/* %s */\n", op.Go)
-			}
-			if err := t.ExecuteTemplate(buffer, s, op); err != nil {
-				panic(fmt.Errorf("failed to execute template %s for op %v: %w", s, op, err))
-			}
-		} else {
-			panic(fmt.Errorf("failed to classify op %v: %w", op.Go, err))
-		}
-	}
-
-	vectorConversions := vConvertFromTypeMap(typeMap)
-	for _, conv := range vectorConversions {
-		if err := t.ExecuteTemplate(buffer, "vectorConversion", conv); err != nil {
-			panic(fmt.Errorf("failed to execute vectorConversion template: %w", err))
-		}
-	}
-
-	masks := masksFromTypeMap(typeMap)
-	for _, mask := range masks {
-		if err := t.ExecuteTemplate(buffer, "mask", mask); err != nil {
-			panic(fmt.Errorf("failed to execute mask template for mask %s: %w", mask.Name, err))
-		}
-	}
-
-	return buffer
-}
diff --git a/internal/simdgen/gen_simdrules.go b/internal/simdgen/gen_simdrules.go
deleted file mode 100644
index b0fc7e62..00000000
--- a/internal/simdgen/gen_simdrules.go
+++ /dev/null
@@ -1,211 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"bytes"
-	"fmt"
-	"slices"
-	"text/template"
-)
-
-type tplRuleData struct {
-	tplName        string // e.g. "sftimm"
-	GoOp           string // e.g. "ShiftAllLeft"
-	GoType         string // e.g. "Uint32x8"
-	Args           string // e.g. "x y"
-	Asm            string // e.g. "VPSLLD256"
-	ArgsOut        string // e.g. "x y"
-	MaskInConvert  string // e.g. "VPMOVVec32x8ToM"
-	MaskOutConvert string // e.g. "VPMOVMToVec32x8"
-}
-
-var (
-	ruleTemplates = template.Must(template.New("simdRules").Parse(`
-{{define "pureVreg"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.Asm}} {{.ArgsOut}})
-{{end}}
-{{define "maskIn"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} <types.TypeMask> mask))
-{{end}}
-{{define "maskOut"}}({{.GoOp}}{{.GoType}} {{.Args}}) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}}))
-{{end}}
-{{define "maskInMaskOut"}}({{.GoOp}}{{.GoType}} {{.Args}} mask) => ({{.MaskOutConvert}} ({{.Asm}} {{.ArgsOut}} ({{.MaskInConvert}} <types.TypeMask> mask)))
-{{end}}
-{{define "sftimm"}}({{.Asm}} x (MOVQconst [c])) => ({{.Asm}}const [uint8(c)] x)
-{{end}}
-{{define "masksftimm"}}({{.Asm}} x (MOVQconst [c]) mask) => ({{.Asm}}const [uint8(c)] x mask)
-{{end}}
-`))
-)
-
-// SSA rewrite rules need to appear in a most-to-least-specific order.  This works for that.
-var tmplOrder = map[string]int{
-	"masksftimm":    0,
-	"sftimm":        1,
-	"maskInMaskOut": 2,
-	"maskOut":       3,
-	"maskIn":        4,
-	"pureVreg":      5,
-}
-
-func compareTplRuleData(x, y tplRuleData) int {
-	if c := compareNatural(x.GoOp, y.GoOp); c != 0 {
-		return c
-	}
-	if c := compareNatural(x.GoType, y.GoType); c != 0 {
-		return c
-	}
-	if c := compareNatural(x.Args, y.Args); c != 0 {
-		return c
-	}
-	if x.tplName == y.tplName {
-		return 0
-	}
-	xo, xok := tmplOrder[x.tplName]
-	yo, yok := tmplOrder[y.tplName]
-	if !xok {
-		panic(fmt.Errorf("Unexpected template name %s, please add to tmplOrder", x.tplName))
-	}
-	if !yok {
-		panic(fmt.Errorf("Unexpected template name %s, please add to tmplOrder", y.tplName))
-	}
-	return xo - yo
-}
-
-// writeSIMDRules generates the lowering and rewrite rules for ssa and writes it to simdAMD64.rules
-// within the specified directory.
-func writeSIMDRules(ops []Operation) *bytes.Buffer {
-	buffer := new(bytes.Buffer)
-	buffer.WriteString(generatedHeader + "\n")
-
-	var allData []tplRuleData
-
-	for _, opr := range ops {
-		if opr.NoGenericOps != nil && *opr.NoGenericOps == "true" {
-			continue
-		}
-		opInShape, opOutShape, maskType, immType, gOp := opr.shape()
-		asm := machineOpName(maskType, gOp)
-		vregInCnt := len(gOp.In)
-		if maskType == OneMask {
-			vregInCnt--
-		}
-
-		data := tplRuleData{
-			GoOp: gOp.Go,
-			Asm:  asm,
-		}
-
-		if vregInCnt == 1 {
-			data.Args = "x"
-			data.ArgsOut = data.Args
-		} else if vregInCnt == 2 {
-			data.Args = "x y"
-			data.ArgsOut = data.Args
-		} else if vregInCnt == 3 {
-			data.Args = "x y z"
-			data.ArgsOut = data.Args
-		} else {
-			panic(fmt.Errorf("simdgen does not support more than 3 vreg in inputs"))
-		}
-		if immType == ConstImm {
-			data.ArgsOut = fmt.Sprintf("[%s] %s", *opr.In[0].Const, data.ArgsOut)
-		} else if immType == VarImm {
-			data.Args = fmt.Sprintf("[a] %s", data.Args)
-			data.ArgsOut = fmt.Sprintf("[a] %s", data.ArgsOut)
-		} else if immType == ConstVarImm {
-			data.Args = fmt.Sprintf("[a] %s", data.Args)
-			data.ArgsOut = fmt.Sprintf("[a+%s] %s", *opr.In[0].Const, data.ArgsOut)
-		}
-
-		goType := func(op Operation) string {
-			if op.OperandOrder != nil {
-				switch *op.OperandOrder {
-				case "21Type1", "231Type1":
-					// Permute uses operand[1] for method receiver.
-					return *op.In[1].Go
-				}
-			}
-			return *op.In[0].Go
-		}
-		var tplName string
-		// If class overwrite is happening, that's not really a mask but a vreg.
-		if opOutShape == OneVregOut || opOutShape == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil {
-			switch opInShape {
-			case OneImmIn:
-				tplName = "pureVreg"
-				data.GoType = goType(gOp)
-			case PureVregIn:
-				tplName = "pureVreg"
-				data.GoType = goType(gOp)
-			case OneKmaskImmIn:
-				fallthrough
-			case OneKmaskIn:
-				tplName = "maskIn"
-				data.GoType = goType(gOp)
-				rearIdx := len(gOp.In) - 1
-				// Mask is at the end.
-				data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes)
-			case PureKmaskIn:
-				panic(fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations"))
-			}
-		} else if opOutShape == OneGregOut {
-			tplName = "pureVreg" // TODO this will be wrong
-			data.GoType = goType(gOp)
-		} else {
-			// OneKmaskOut case
-			data.MaskOutConvert = fmt.Sprintf("VPMOVMToVec%dx%d", *gOp.Out[0].ElemBits, *gOp.In[0].Lanes)
-			switch opInShape {
-			case OneImmIn:
-				fallthrough
-			case PureVregIn:
-				tplName = "maskOut"
-				data.GoType = goType(gOp)
-			case OneKmaskImmIn:
-				fallthrough
-			case OneKmaskIn:
-				tplName = "maskInMaskOut"
-				data.GoType = goType(gOp)
-				rearIdx := len(gOp.In) - 1
-				data.MaskInConvert = fmt.Sprintf("VPMOVVec%dx%dToM", *gOp.In[rearIdx].ElemBits, *gOp.In[rearIdx].Lanes)
-			case PureKmaskIn:
-				panic(fmt.Errorf("simdgen does not support pure k mask instructions, they should be generated by compiler optimizations"))
-			}
-		}
-
-		if gOp.SpecialLower != nil {
-			if *gOp.SpecialLower == "sftimm" {
-				if data.GoType[0] == 'I' {
-					// only do these for signed types, it is a duplicate rewrite for unsigned
-					sftImmData := data
-					if tplName == "maskIn" {
-						sftImmData.tplName = "masksftimm"
-					} else {
-						sftImmData.tplName = "sftimm"
-					}
-					allData = append(allData, sftImmData)
-				}
-			} else {
-				panic("simdgen sees unknwon special lower " + *gOp.SpecialLower + ", maybe implement it?")
-			}
-		}
-
-		if tplName == "pureVreg" && data.Args == data.ArgsOut {
-			data.Args = "..."
-			data.ArgsOut = "..."
-		}
-		data.tplName = tplName
-		allData = append(allData, data)
-	}
-
-	slices.SortFunc(allData, compareTplRuleData)
-
-	for _, data := range allData {
-		if err := ruleTemplates.ExecuteTemplate(buffer, data.tplName, data); err != nil {
-			panic(fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.GoOp+data.GoType, err))
-		}
-	}
-
-	return buffer
-}
diff --git a/internal/simdgen/gen_simdssa.go b/internal/simdgen/gen_simdssa.go
deleted file mode 100644
index 5a5421a8..00000000
--- a/internal/simdgen/gen_simdssa.go
+++ /dev/null
@@ -1,173 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"bytes"
-	"fmt"
-	"strings"
-	"text/template"
-)
-
-var (
-	ssaTemplates = template.Must(template.New("simdSSA").Parse(`
-{{define "header"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
-
-package amd64
-
-import (
-	"cmd/compile/internal/ssa"
-	"cmd/compile/internal/ssagen"
-	"cmd/internal/obj"
-	"cmd/internal/obj/x86"
-)
-
-func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
-	var p *obj.Prog
-	switch v.Op {{"{"}}{{end}}
-{{define "case"}}
-	case {{.Cases}}:
-		p = {{.Helper}}(s, v)
-{{end}}
-{{define "footer"}}
-	default:
-		// Unknown reg shape
-		return false
-	}
-{{end}}
-{{define "zeroing"}}
-	// Masked operation are always compiled with zeroing.
-	switch v.Op {
-	case {{.}}:
-		x86.ParseSuffix(p, "Z")
-	}
-{{end}}
-{{define "ending"}}
-	return true
-}
-{{end}}`))
-)
-
-type tplSSAData struct {
-	Cases  string
-	Helper string
-}
-
-// writeSIMDSSA generates the ssa to prog lowering codes and writes it to simdssa.go
-// within the specified directory.
-func writeSIMDSSA(ops []Operation) *bytes.Buffer {
-	var ZeroingMask []string
-	regInfoKeys := []string{
-		"v11",
-		"v21",
-		"v2k",
-		"v2kv",
-		"v2kk",
-		"vkv",
-		"v31",
-		"v3kv",
-		"v11Imm8",
-		"vkvImm8",
-		"v21Imm8",
-		"v2kImm8",
-		"v2kkImm8",
-		"v31ResultInArg0",
-		"v3kvResultInArg0",
-		"vfpv",
-		"vfpkv",
-		"vgpvImm8",
-		"vgpImm8",
-		"v2kvImm8",
-	}
-	regInfoSet := map[string][]string{}
-	for _, key := range regInfoKeys {
-		regInfoSet[key] = []string{}
-	}
-
-	seen := map[string]struct{}{}
-	allUnseen := make(map[string][]Operation)
-	for _, op := range ops {
-		shapeIn, shapeOut, maskType, _, gOp := op.shape()
-		asm := machineOpName(maskType, gOp)
-
-		if _, ok := seen[asm]; ok {
-			continue
-		}
-		seen[asm] = struct{}{}
-		caseStr := fmt.Sprintf("ssa.OpAMD64%s", asm)
-		if shapeIn == OneKmaskIn || shapeIn == OneKmaskImmIn {
-			if gOp.Zeroing == nil {
-				ZeroingMask = append(ZeroingMask, caseStr)
-			}
-		}
-		regShape, err := op.regShape()
-		if err != nil {
-			panic(err)
-		}
-		if shapeOut == OneVregOutAtIn {
-			regShape += "ResultInArg0"
-		}
-		if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
-			regShape += "Imm8"
-		}
-		idx, err := checkVecAsScalar(op)
-		if err != nil {
-			panic(err)
-		}
-		if idx != -1 {
-			if regShape == "v21" {
-				regShape = "vfpv"
-			} else if regShape == "v2kv" {
-				regShape = "vfpkv"
-			} else {
-				panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regShape, op))
-			}
-		}
-		if _, ok := regInfoSet[regShape]; !ok {
-			allUnseen[regShape] = append(allUnseen[regShape], op)
-		}
-		regInfoSet[regShape] = append(regInfoSet[regShape], caseStr)
-	}
-	if len(allUnseen) != 0 {
-		panic(fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen))
-	}
-
-	buffer := new(bytes.Buffer)
-
-	if err := ssaTemplates.ExecuteTemplate(buffer, "header", nil); err != nil {
-		panic(fmt.Errorf("failed to execute header template: %w", err))
-	}
-
-	for _, regShape := range regInfoKeys {
-		// Stable traversal of regInfoSet
-		cases := regInfoSet[regShape]
-		if len(cases) == 0 {
-			continue
-		}
-		data := tplSSAData{
-			Cases:  strings.Join(cases, ",\n\t\t"),
-			Helper: "simd" + capitalizeFirst(regShape),
-		}
-		if err := ssaTemplates.ExecuteTemplate(buffer, "case", data); err != nil {
-			panic(fmt.Errorf("failed to execute case template for %s: %w", regShape, err))
-		}
-	}
-
-	if err := ssaTemplates.ExecuteTemplate(buffer, "footer", nil); err != nil {
-		panic(fmt.Errorf("failed to execute footer template: %w", err))
-	}
-
-	if len(ZeroingMask) != 0 {
-		if err := ssaTemplates.ExecuteTemplate(buffer, "zeroing", strings.Join(ZeroingMask, ",\n\t\t")); err != nil {
-			panic(fmt.Errorf("failed to execute footer template: %w", err))
-		}
-	}
-
-	if err := ssaTemplates.ExecuteTemplate(buffer, "ending", nil); err != nil {
-		panic(fmt.Errorf("failed to execute footer template: %w", err))
-	}
-
-	return buffer
-}
diff --git a/internal/simdgen/gen_utility.go b/internal/simdgen/gen_utility.go
deleted file mode 100644
index 20ce3c13..00000000
--- a/internal/simdgen/gen_utility.go
+++ /dev/null
@@ -1,729 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"bufio"
-	"bytes"
-	"fmt"
-	"go/format"
-	"log"
-	"os"
-	"path/filepath"
-	"reflect"
-	"slices"
-	"sort"
-	"strings"
-	"text/template"
-	"unicode"
-)
-
-func templateOf(temp, name string) *template.Template {
-	t, err := template.New(name).Parse(temp)
-	if err != nil {
-		panic(fmt.Errorf("failed to parse template %s: %w", name, err))
-	}
-	return t
-}
-
-func createPath(goroot string, file string) (*os.File, error) {
-	fp := filepath.Join(goroot, file)
-	dir := filepath.Dir(fp)
-	err := os.MkdirAll(dir, 0755)
-	if err != nil {
-		return nil, fmt.Errorf("failed to create directory %s: %w", dir, err)
-	}
-	f, err := os.Create(fp)
-	if err != nil {
-		return nil, fmt.Errorf("failed to create file %s: %w", fp, err)
-	}
-	return f, nil
-}
-
-func formatWriteAndClose(out *bytes.Buffer, goroot string, file string) {
-	b, err := format.Source(out.Bytes())
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "%v\n", err)
-		fmt.Fprintf(os.Stderr, "%s\n", numberLines(out.Bytes()))
-		fmt.Fprintf(os.Stderr, "%v\n", err)
-		panic(err)
-	} else {
-		writeAndClose(b, goroot, file)
-	}
-}
-
-func writeAndClose(b []byte, goroot string, file string) {
-	ofile, err := createPath(goroot, file)
-	if err != nil {
-		panic(err)
-	}
-	ofile.Write(b)
-	ofile.Close()
-}
-
-// numberLines takes a slice of bytes, and returns a string where each line
-// is numbered, starting from 1.
-func numberLines(data []byte) string {
-	var buf bytes.Buffer
-	r := bytes.NewReader(data)
-	s := bufio.NewScanner(r)
-	for i := 1; s.Scan(); i++ {
-		fmt.Fprintf(&buf, "%d: %s\n", i, s.Text())
-	}
-	return buf.String()
-}
-
-type inShape uint8
-type outShape uint8
-type maskShape uint8
-type immShape uint8
-
-const (
-	InvalidIn     inShape = iota
-	PureVregIn            // vector register input only
-	OneKmaskIn            // vector and kmask input
-	OneImmIn              // vector and immediate input
-	OneKmaskImmIn         // vector, kmask, and immediate inputs
-	PureKmaskIn           // only mask inputs.
-)
-
-const (
-	InvalidOut     outShape = iota
-	NoOut                   // no output
-	OneVregOut              // (one) vector register output
-	OneGregOut              // (one) general register output
-	OneKmaskOut             // mask output
-	OneVregOutAtIn          // the first input is also the output
-)
-
-const (
-	InvalidMask maskShape = iota
-	NoMask                // no mask
-	OneMask               // with mask (K1 to K7)
-	AllMasks              // a K mask instruction (K0-K7)
-)
-
-const (
-	InvalidImm  immShape = iota
-	NoImm                // no immediate
-	ConstImm             // const only immediate
-	VarImm               // pure imm argument provided by the users
-	ConstVarImm          // a combination of user arg and const
-)
-
-// opShape returns the several integers describing the shape of the operation,
-// and modified versions of the op:
-//
-// opNoImm is op with its inputs excluding the const imm.
-//
-// This function does not modify op.
-func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskShape, immType immShape,
-	opNoImm Operation) {
-	if len(op.Out) > 1 {
-		panic(fmt.Errorf("simdgen only supports 1 output: %s", op))
-	}
-	var outputReg int
-	if len(op.Out) == 1 {
-		outputReg = op.Out[0].AsmPos
-		if op.Out[0].Class == "vreg" {
-			shapeOut = OneVregOut
-		} else if op.Out[0].Class == "greg" {
-			shapeOut = OneGregOut
-		} else if op.Out[0].Class == "mask" {
-			shapeOut = OneKmaskOut
-		} else {
-			panic(fmt.Errorf("simdgen only supports output of class vreg or mask: %s", op))
-		}
-	} else {
-		shapeOut = NoOut
-		// TODO: are these only Load/Stores?
-		// We manually supported two Load and Store, are those enough?
-		panic(fmt.Errorf("simdgen only supports 1 output: %s", op))
-	}
-	hasImm := false
-	maskCount := 0
-	hasVreg := false
-	for _, in := range op.In {
-		if in.AsmPos == outputReg {
-			if shapeOut != OneVregOutAtIn && in.AsmPos == 0 && in.Class == "vreg" {
-				shapeOut = OneVregOutAtIn
-			} else {
-				panic(fmt.Errorf("simdgen only support output and input sharing the same position case of \"the first input is vreg and the only output\": %s", op))
-			}
-		}
-		if in.Class == "immediate" {
-			// A manual check on XED data found that AMD64 SIMD instructions at most
-			// have 1 immediates. So we don't need to check this here.
-			if *in.Bits != 8 {
-				panic(fmt.Errorf("simdgen only supports immediates of 8 bits: %s", op))
-			}
-			hasImm = true
-		} else if in.Class == "mask" {
-			maskCount++
-		} else {
-			hasVreg = true
-		}
-	}
-	opNoImm = *op
-
-	removeImm := func(o *Operation) {
-		o.In = o.In[1:]
-	}
-	if hasImm {
-		removeImm(&opNoImm)
-		if op.In[0].Const != nil {
-			if op.In[0].ImmOffset != nil {
-				immType = ConstVarImm
-			} else {
-				immType = ConstImm
-			}
-		} else if op.In[0].ImmOffset != nil {
-			immType = VarImm
-		} else {
-			panic(fmt.Errorf("simdgen requires imm to have at least one of ImmOffset or Const set: %s", op))
-		}
-	} else {
-		immType = NoImm
-	}
-	if maskCount == 0 {
-		maskType = NoMask
-	} else {
-		maskType = OneMask
-	}
-	checkPureMask := func() bool {
-		if hasImm {
-			panic(fmt.Errorf("simdgen does not support immediates in pure mask operations: %s", op))
-		}
-		if hasVreg {
-			panic(fmt.Errorf("simdgen does not support more than 1 masks in non-pure mask operations: %s", op))
-		}
-		return false
-	}
-	if !hasImm && maskCount == 0 {
-		shapeIn = PureVregIn
-	} else if !hasImm && maskCount > 0 {
-		if maskCount == 1 {
-			shapeIn = OneKmaskIn
-		} else {
-			if checkPureMask() {
-				return
-			}
-			shapeIn = PureKmaskIn
-			maskType = AllMasks
-		}
-	} else if hasImm && maskCount == 0 {
-		shapeIn = OneImmIn
-	} else {
-		if maskCount == 1 {
-			shapeIn = OneKmaskImmIn
-		} else {
-			checkPureMask()
-			return
-		}
-	}
-	return
-}
-
-// regShape returns a string representation of the register shape.
-func (op *Operation) regShape() (string, error) {
-	_, _, _, _, gOp := op.shape()
-	var regInfo string
-	var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int
-	for _, in := range gOp.In {
-		if in.Class == "vreg" {
-			vRegInCnt++
-		} else if in.Class == "greg" {
-			gRegInCnt++
-		} else if in.Class == "mask" {
-			kMaskInCnt++
-		}
-	}
-	for _, out := range gOp.Out {
-		// If class overwrite is happening, that's not really a mask but a vreg.
-		if out.Class == "vreg" || out.OverwriteClass != nil {
-			vRegOutCnt++
-		} else if out.Class == "greg" {
-			gRegOutCnt++
-		} else if out.Class == "mask" {
-			kMaskOutCnt++
-		}
-	}
-	var inRegs, inMasks, outRegs, outMasks string
-
-	rmAbbrev := func(s string, i int) string {
-		if i == 0 {
-			return ""
-		}
-		if i == 1 {
-			return s
-		}
-		return fmt.Sprintf("%s%d", s, i)
-
-	}
-
-	inRegs = rmAbbrev("v", vRegInCnt)
-	inRegs += rmAbbrev("gp", gRegInCnt)
-	inMasks = rmAbbrev("k", kMaskInCnt)
-
-	outRegs = rmAbbrev("v", vRegOutCnt)
-	outRegs += rmAbbrev("gp", gRegOutCnt)
-	outMasks = rmAbbrev("k", kMaskOutCnt)
-
-	if kMaskInCnt == 0 && kMaskOutCnt == 0 && gRegInCnt == 0 && gRegOutCnt == 0 {
-		// For pure v we can abbreviate it as v%d%d.
-		regInfo = fmt.Sprintf("v%d%d", vRegInCnt, vRegOutCnt)
-	} else if kMaskInCnt == 0 && kMaskOutCnt == 0 {
-		regInfo = fmt.Sprintf("%s%s", inRegs, outRegs)
-	} else {
-		regInfo = fmt.Sprintf("%s%s%s%s", inRegs, inMasks, outRegs, outMasks)
-	}
-	return regInfo, nil
-}
-
-// sortOperand sorts op.In by putting immediates first, then vreg, and mask the last.
-// TODO: verify that this is a safe assumption of the prog structure.
-// from my observation looks like in asm, imms are always the first,
-// masks are always the last, with vreg in between.
-func (op *Operation) sortOperand() {
-	priority := map[string]int{"immediate": 0, "vreg": 1, "greg": 1, "mask": 2}
-	sort.SliceStable(op.In, func(i, j int) bool {
-		pi := priority[op.In[i].Class]
-		pj := priority[op.In[j].Class]
-		if pi != pj {
-			return pi < pj
-		}
-		return op.In[i].AsmPos < op.In[j].AsmPos
-	})
-}
-
-// goNormalType returns the Go type name for the result of an Op that
-// does not return a vector, i.e., that returns a result in a general
-// register.  Currently there's only one family of Ops in Go's simd library
-// that does this (GetElem), and so this is specialized to work for that,
-// but the problem (mismatch betwen hardware register width and Go type
-// width) seems likely to recur if there are any other cases.
-func (op Operation) goNormalType() string {
-	if op.Go == "GetElem" {
-		// GetElem returns an element of the vector into a general register
-		// but as far as the hardware is concerned, that result is either 32
-		// or 64 bits wide, no matter what the vector element width is.
-		// This is not "wrong" but it is not the right answer for Go source code.
-		// To get the Go type right, combine the base type ("int", "uint", "float"),
-		// with the input vector element width in bits (8,16,32,64).
-
-		at := 0 // proper value of at depends on whether immediate was stripped or not
-		if op.In[at].Class == "immediate" {
-			at++
-		}
-		return fmt.Sprintf("%s%d", *op.Out[0].Base, *op.In[at].ElemBits)
-	}
-	panic(fmt.Errorf("Implement goNormalType for %v", op))
-}
-
-// SSAType returns the string for the type reference in SSA generation,
-// for example in the intrinsics generating template.
-func (op Operation) SSAType() string {
-	if op.Out[0].Class == "greg" {
-		return fmt.Sprintf("types.Types[types.T%s]", strings.ToUpper(op.goNormalType()))
-	}
-	return fmt.Sprintf("types.TypeVec%d", *op.Out[0].Bits)
-}
-
-// GoType returns the Go type returned by this operation (relative to the simd package),
-// for example "int32" or "Int8x16".  This is used in a template.
-func (op Operation) GoType() string {
-	if op.Out[0].Class == "greg" {
-		return op.goNormalType()
-	}
-	return *op.Out[0].Go
-}
-
-// ImmName returns the name to use for an operation's immediate operand.
-// This can be overriden in the yaml with "name" on an operand,
-// otherwise, for now, "constant"
-func (op Operation) ImmName() string {
-	return op.Op0Name("constant")
-}
-
-func (o Operand) OpName(s string) string {
-	if n := o.Name; n != nil {
-		return *n
-	}
-	if o.Class == "mask" {
-		return "mask"
-	}
-	return s
-}
-
-func (o Operand) OpNameAndType(s string) string {
-	return o.OpName(s) + " " + *o.Go
-}
-
-// GoExported returns [Go] with first character capitalized.
-func (op Operation) GoExported() string {
-	return capitalizeFirst(op.Go)
-}
-
-// DocumentationExported returns [Documentation] with method name capitalized.
-func (op Operation) DocumentationExported() string {
-	return strings.ReplaceAll(op.Documentation, op.Go, op.GoExported())
-}
-
-// Op0Name returns the name to use for the 0 operand,
-// if any is present, otherwise the parameter is used.
-func (op Operation) Op0Name(s string) string {
-	return op.In[0].OpName(s)
-}
-
-// Op1Name returns the name to use for the 1 operand,
-// if any is present, otherwise the parameter is used.
-func (op Operation) Op1Name(s string) string {
-	return op.In[1].OpName(s)
-}
-
-// Op2Name returns the name to use for the 2 operand,
-// if any is present, otherwise the parameter is used.
-func (op Operation) Op2Name(s string) string {
-	return op.In[2].OpName(s)
-}
-
-// Op3Name returns the name to use for the 3 operand,
-// if any is present, otherwise the parameter is used.
-func (op Operation) Op3Name(s string) string {
-	return op.In[3].OpName(s)
-}
-
-// Op0NameAndType returns the name and type to use for
-// the 0 operand, if a name is provided, otherwise
-// the parameter value is used as the default.
-func (op Operation) Op0NameAndType(s string) string {
-	return op.In[0].OpNameAndType(s)
-}
-
-// Op1NameAndType returns the name and type to use for
-// the 1 operand, if a name is provided, otherwise
-// the parameter value is used as the default.
-func (op Operation) Op1NameAndType(s string) string {
-	return op.In[1].OpNameAndType(s)
-}
-
-// Op2NameAndType returns the name and type to use for
-// the 2 operand, if a name is provided, otherwise
-// the parameter value is used as the default.
-func (op Operation) Op2NameAndType(s string) string {
-	return op.In[2].OpNameAndType(s)
-}
-
-// Op3NameAndType returns the name and type to use for
-// the 3 operand, if a name is provided, otherwise
-// the parameter value is used as the default.
-func (op Operation) Op3NameAndType(s string) string {
-	return op.In[3].OpNameAndType(s)
-}
-
-// Op4NameAndType returns the name and type to use for
-// the 4 operand, if a name is provided, otherwise
-// the parameter value is used as the default.
-func (op Operation) Op4NameAndType(s string) string {
-	return op.In[4].OpNameAndType(s)
-}
-
-var immClasses []string = []string{"BAD0Imm", "BAD1Imm", "op1Imm8", "op2Imm8", "op3Imm8", "op4Imm8"}
-var classes []string = []string{"BAD0", "op1", "op2", "op3", "op4"}
-
-// classifyOp returns a classification string, modified operation, and perhaps error based
-// on the stub and intrinsic shape for the operation.
-// The classification string is in the regular expression set "op[1234](Imm8)?(_<order>)?"
-// where the "<order>" suffix is optionally attached to the Operation in its input yaml.
-// The classification string is used to select a template or a clause of a template
-// for intrinsics declaration and the ssagen intrinisics glue code in the compiler.
-func classifyOp(op Operation) (string, Operation, error) {
-	_, _, _, immType, gOp := op.shape()
-
-	var class string
-
-	if immType == VarImm || immType == ConstVarImm {
-		switch l := len(op.In); l {
-		case 1:
-			return "", op, fmt.Errorf("simdgen does not recognize this operation of only immediate input: %s", op)
-		case 2, 3, 4, 5:
-			class = immClasses[l]
-		default:
-			return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(op.In), op)
-		}
-		if order := op.OperandOrder; order != nil {
-			class += "_" + *order
-		}
-		return class, op, nil
-	} else {
-		switch l := len(gOp.In); l {
-		case 1, 2, 3, 4:
-			class = classes[l]
-		default:
-			return "", op, fmt.Errorf("simdgen does not recognize this operation of input length %d: %s", len(op.In), op)
-		}
-		if order := op.OperandOrder; order != nil {
-			class += "_" + *order
-		}
-		return class, gOp, nil
-	}
-}
-
-func checkVecAsScalar(op Operation) (idx int, err error) {
-	idx = -1
-	sSize := 0
-	for i, o := range op.In {
-		if o.TreatLikeAScalarOfSize != nil {
-			if idx == -1 {
-				idx = i
-				sSize = *o.TreatLikeAScalarOfSize
-			} else {
-				err = fmt.Errorf("simdgen only supports one TreatLikeAScalarOfSize in the arg list: %s", op)
-				return
-			}
-		}
-	}
-	if idx >= 0 {
-		if idx != 1 {
-			err = fmt.Errorf("simdgen only supports TreatLikeAScalarOfSize at the 2nd arg of the arg list: %s", op)
-			return
-		}
-		if sSize != 8 && sSize != 16 && sSize != 32 && sSize != 64 {
-			err = fmt.Errorf("simdgen does not recognize this uint size: %d, %s", sSize, op)
-			return
-		}
-	}
-	return
-}
-
-// dedup is deduping operations in the full structure level.
-func dedup(ops []Operation) (deduped []Operation) {
-	for _, op := range ops {
-		seen := false
-		for _, dop := range deduped {
-			if reflect.DeepEqual(op, dop) {
-				seen = true
-				break
-			}
-		}
-		if !seen {
-			deduped = append(deduped, op)
-		}
-	}
-	return
-}
-
-func (op Operation) GenericName() string {
-	if op.OperandOrder != nil {
-		switch *op.OperandOrder {
-		case "21Type1", "231Type1":
-			// Permute uses operand[1] for method receiver.
-			return op.Go + *op.In[1].Go
-		}
-	}
-	if op.In[0].Class == "immediate" {
-		return op.Go + *op.In[1].Go
-	}
-	return op.Go + *op.In[0].Go
-}
-
-// dedupGodef is deduping operations in [Op.Go]+[*Op.In[0].Go] level.
-// By deduping, it means picking the least advanced architecture that satisfy the requirement:
-// AVX512 will be least preferred.
-// If FlagNoDedup is set, it will report the duplicates to the console.
-func dedupGodef(ops []Operation) ([]Operation, error) {
-	seen := map[string][]Operation{}
-	for _, op := range ops {
-		_, _, _, _, gOp := op.shape()
-
-		gN := gOp.GenericName()
-		seen[gN] = append(seen[gN], op)
-	}
-	if *FlagReportDup {
-		for gName, dup := range seen {
-			if len(dup) > 1 {
-				log.Printf("Duplicate for %s:\n", gName)
-				for _, op := range dup {
-					log.Printf("%s\n", op)
-				}
-			}
-		}
-		return ops, nil
-	}
-	isAVX512 := func(op Operation) bool {
-		return strings.Contains(op.CPUFeature, "AVX512")
-	}
-	deduped := []Operation{}
-	for _, dup := range seen {
-		if len(dup) > 1 {
-			slices.SortFunc(dup, func(i, j Operation) int {
-				// Put non-AVX512 candidates at the beginning
-				if !isAVX512(i) && isAVX512(j) {
-					return -1
-				}
-				if isAVX512(i) && !isAVX512(j) {
-					return 1
-				}
-				return strings.Compare(i.CPUFeature, j.CPUFeature)
-			})
-		}
-		deduped = append(deduped, dup[0])
-	}
-	slices.SortFunc(deduped, compareOperations)
-	return deduped, nil
-}
-
-// Copy op.ConstImm to op.In[0].Const
-// This is a hack to reduce the size of defs we need for const imm operations.
-func copyConstImm(ops []Operation) error {
-	for _, op := range ops {
-		if op.ConstImm == nil {
-			continue
-		}
-		_, _, _, immType, _ := op.shape()
-
-		if immType == ConstImm || immType == ConstVarImm {
-			op.In[0].Const = op.ConstImm
-		}
-		// Otherwise, just not port it - e.g. {VPCMP[BWDQ] imm=0} and {VPCMPEQ[BWDQ]} are
-		// the same operations "Equal", [dedupgodef] should be able to distinguish them.
-	}
-	return nil
-}
-
-func capitalizeFirst(s string) string {
-	if s == "" {
-		return ""
-	}
-	// Convert the string to a slice of runes to handle multi-byte characters correctly.
-	r := []rune(s)
-	r[0] = unicode.ToUpper(r[0])
-	return string(r)
-}
-
-// overwrite corrects some errors due to:
-//   - The XED data is wrong
-//   - Go's SIMD API requirement, for example AVX2 compares should also produce masks.
-//     This rewrite has strict constraints, please see the error message.
-//     These constraints are also explointed in [writeSIMDRules], [writeSIMDMachineOps]
-//     and [writeSIMDSSA], please be careful when updating these constraints.
-func overwrite(ops []Operation) error {
-	hasClassOverwrite := false
-	overwrite := func(op []Operand, idx int, o Operation) error {
-		if op[idx].OverwriteElementBits != nil {
-			if op[idx].ElemBits == nil {
-				panic(fmt.Errorf("ElemBits is nil at operand %d of %v", idx, o))
-			}
-			*op[idx].ElemBits = *op[idx].OverwriteElementBits
-			*op[idx].Lanes = *op[idx].Bits / *op[idx].ElemBits
-			*op[idx].Go = fmt.Sprintf("%s%dx%d", capitalizeFirst(*op[idx].Base), *op[idx].ElemBits, *op[idx].Lanes)
-		}
-		if op[idx].OverwriteClass != nil {
-			if op[idx].OverwriteBase == nil {
-				panic(fmt.Errorf("simdgen: [OverwriteClass] must be set together with [OverwriteBase]: %s", op[idx]))
-			}
-			oBase := *op[idx].OverwriteBase
-			oClass := *op[idx].OverwriteClass
-			if oClass != "mask" {
-				panic(fmt.Errorf("simdgen: [Class] overwrite only supports overwritting to mask: %s", op[idx]))
-			}
-			if oBase != "int" {
-				panic(fmt.Errorf("simdgen: [Class] overwrite must set [OverwriteBase] to int: %s", op[idx]))
-			}
-			if op[idx].Class != "vreg" {
-				panic(fmt.Errorf("simdgen: [Class] overwrite must be overwriting [Class] from vreg: %s", op[idx]))
-			}
-			hasClassOverwrite = true
-			*op[idx].Base = oBase
-			op[idx].Class = oClass
-			*op[idx].Go = fmt.Sprintf("Mask%dx%d", *op[idx].ElemBits, *op[idx].Lanes)
-		} else if op[idx].OverwriteBase != nil {
-			oBase := *op[idx].OverwriteBase
-			*op[idx].Go = strings.ReplaceAll(*op[idx].Go, capitalizeFirst(*op[idx].Base), capitalizeFirst(oBase))
-			if op[idx].Class == "greg" {
-				*op[idx].Go = strings.ReplaceAll(*op[idx].Go, *op[idx].Base, oBase)
-			}
-			*op[idx].Base = oBase
-		}
-		return nil
-	}
-	for i, o := range ops {
-		hasClassOverwrite = false
-		for j := range ops[i].In {
-			if err := overwrite(ops[i].In, j, o); err != nil {
-				return err
-			}
-			if hasClassOverwrite {
-				return fmt.Errorf("simdgen does not support [OverwriteClass] in inputs: %s", ops[i])
-			}
-		}
-		for j := range ops[i].Out {
-			if err := overwrite(ops[i].Out, j, o); err != nil {
-				return err
-			}
-		}
-		if hasClassOverwrite {
-			for _, in := range ops[i].In {
-				if in.Class == "mask" {
-					return fmt.Errorf("simdgen only supports [OverwriteClass] for operations without mask inputs")
-				}
-			}
-		}
-	}
-	return nil
-}
-
-// reportXEDInconsistency reports potential XED inconsistencies.
-// We can add more fields to [Operation] to enable more checks and implement it here.
-// Supported checks:
-// [NameAndSizeCheck]: NAME[BWDQ] should set the elemBits accordingly.
-// This check is useful to find inconsistencies, then we can add overwrite fields to
-// those defs to correct them manually.
-func reportXEDInconsistency(ops []Operation) error {
-	for _, o := range ops {
-		if o.NameAndSizeCheck != nil {
-			suffixSizeMap := map[byte]int{'B': 8, 'W': 16, 'D': 32, 'Q': 64}
-			checkOperand := func(opr Operand) error {
-				if opr.ElemBits == nil {
-					return fmt.Errorf("simdgen expects elemBits to be set when performing NameAndSizeCheck")
-				}
-				if v, ok := suffixSizeMap[o.Asm[len(o.Asm)-1]]; !ok {
-					return fmt.Errorf("simdgen expects asm to end with [BWDQ] when performing NameAndSizeCheck")
-				} else {
-					if v != *opr.ElemBits {
-						return fmt.Errorf("simdgen finds NameAndSizeCheck inconsistency in def: %s", o)
-					}
-				}
-				return nil
-			}
-			for _, in := range o.In {
-				if in.Class != "vreg" && in.Class != "mask" {
-					continue
-				}
-				if in.TreatLikeAScalarOfSize != nil {
-					// This is an irregular operand, don't check it.
-					continue
-				}
-				if err := checkOperand(in); err != nil {
-					return err
-				}
-			}
-			for _, out := range o.Out {
-				if err := checkOperand(out); err != nil {
-					return err
-				}
-			}
-		}
-	}
-	return nil
-}
-
-func (o Operation) String() string {
-	return pprints(o)
-}
-
-func (op Operand) String() string {
-	return pprints(op)
-}
diff --git a/internal/simdgen/go.yaml b/internal/simdgen/go.yaml
deleted file mode 100644
index 4f077c81..00000000
--- a/internal/simdgen/go.yaml
+++ /dev/null
@@ -1 +0,0 @@
-!import ops/*/go.yaml
diff --git a/internal/simdgen/godefs.go b/internal/simdgen/godefs.go
deleted file mode 100644
index 3a830ead..00000000
--- a/internal/simdgen/godefs.go
+++ /dev/null
@@ -1,379 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"fmt"
-	"log"
-	"regexp"
-	"slices"
-	"strconv"
-	"strings"
-
-	"golang.org/x/arch/internal/unify"
-)
-
-type Operation struct {
-	rawOperation
-
-	// Go is the Go method name of this operation.
-	//
-	// It is derived from the raw Go method name by adding optional suffixes.
-	// Currently, "Masked" is the only suffix.
-	Go string
-
-	// Documentation is the doc string for this API.
-	//
-	// It is computed from the raw documentation:
-	//
-	// - "NAME" is replaced by the Go method name.
-	//
-	// - For masked operation, a sentence about masking is added.
-	Documentation string
-
-	// In is the sequence of parameters to the Go method.
-	//
-	// For masked operations, this will have the mask operand appended.
-	In []Operand
-}
-
-// rawOperation is the unifier representation of an [Operation]. It is
-// translated into a more parsed form after unifier decoding.
-type rawOperation struct {
-	Go string // Base Go method name
-
-	GoArch       string  // GOARCH for this definition
-	Asm          string  // Assembly mnemonic
-	OperandOrder *string // optional Operand order for better Go declarations
-	// Optional tag to indicate this operation is paired with special generic->machine ssa lowering rules.
-	// Should be paired with special templates in gen_simdrules.go
-	SpecialLower *string
-
-	In            []Operand // Parameters
-	InVariant     []Operand // Optional parameters
-	Out           []Operand // Results
-	Commutative   bool      // Commutativity
-	CPUFeature    string    // CPUID/Has* feature name
-	Zeroing       *bool     // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
-	Documentation *string   // Documentation will be appended to the stubs comments.
-	// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
-	// If present, it will be copied to [In[0].Const].
-	ConstImm *string
-	// NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits.
-	NameAndSizeCheck *bool
-	// If non-nil, all generation in gen_simdTypes.go and gen_intrinsics will be skipped.
-	NoTypes *string
-	// If non-nil, all generation in gen_simdGenericOps and gen_simdrules will be skipped.
-	NoGenericOps *string
-	// If non-nil, this string will be attached to the machine ssa op name.
-	SSAVariant *string
-}
-
-func (o *Operation) DecodeUnified(v *unify.Value) error {
-	if err := v.Decode(&o.rawOperation); err != nil {
-		return err
-	}
-
-	isMasked := false
-	if len(o.InVariant) == 0 {
-		// No variant
-	} else if len(o.InVariant) == 1 && o.InVariant[0].Class == "mask" {
-		isMasked = true
-	} else {
-		return fmt.Errorf("unknown inVariant")
-	}
-
-	// Compute full Go method name.
-	o.Go = o.rawOperation.Go
-	if isMasked {
-		o.Go += "Masked"
-	}
-
-	// Compute doc string.
-	if o.rawOperation.Documentation != nil {
-		o.Documentation = *o.rawOperation.Documentation
-	} else {
-		o.Documentation = "// UNDOCUMENTED"
-	}
-	o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
-	if isMasked {
-		o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
-	}
-
-	o.In = append(o.rawOperation.In, o.rawOperation.InVariant...)
-
-	return nil
-}
-
-func (o *Operation) VectorWidth() int {
-	out := o.Out[0]
-	if out.Class == "vreg" {
-		return *out.Bits
-	} else if out.Class == "greg" || out.Class == "mask" {
-		for i := range o.In {
-			if o.In[i].Class == "vreg" {
-				return *o.In[i].Bits
-			}
-		}
-	}
-	panic(fmt.Errorf("Figure out what the vector width is for %v and implement it", *o))
-}
-
-func machineOpName(maskType maskShape, gOp Operation) string {
-	asm := gOp.Asm
-	if maskType == 2 {
-		asm += "Masked"
-	}
-	asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth())
-	if gOp.SSAVariant != nil {
-		asm += *gOp.SSAVariant
-	}
-	return asm
-}
-
-func compareStringPointers(x, y *string) int {
-	if x != nil && y != nil {
-		return compareNatural(*x, *y)
-	}
-	if x == nil && y == nil {
-		return 0
-	}
-	if x == nil {
-		return -1
-	}
-	return 1
-}
-
-func compareIntPointers(x, y *int) int {
-	if x != nil && y != nil {
-		return *x - *y
-	}
-	if x == nil && y == nil {
-		return 0
-	}
-	if x == nil {
-		return -1
-	}
-	return 1
-}
-
-func compareOperations(x, y Operation) int {
-	if c := compareNatural(x.Go, y.Go); c != 0 {
-		return c
-	}
-	xIn, yIn := x.In, y.In
-
-	if len(xIn) > len(yIn) && xIn[len(xIn)-1].Class == "mask" {
-		xIn = xIn[:len(xIn)-1]
-	} else if len(xIn) < len(yIn) && yIn[len(yIn)-1].Class == "mask" {
-		yIn = yIn[:len(yIn)-1]
-	}
-
-	if len(xIn) < len(yIn) {
-		return -1
-	}
-	if len(xIn) > len(yIn) {
-		return 1
-	}
-	if len(x.Out) < len(y.Out) {
-		return -1
-	}
-	if len(x.Out) > len(y.Out) {
-		return 1
-	}
-	for i := range xIn {
-		ox, oy := &xIn[i], &yIn[i]
-		if c := compareOperands(ox, oy); c != 0 {
-			return c
-		}
-	}
-	return 0
-}
-
-func compareOperands(x, y *Operand) int {
-	if c := compareNatural(x.Class, y.Class); c != 0 {
-		return c
-	}
-	if x.Class == "immediate" {
-		return compareStringPointers(x.ImmOffset, y.ImmOffset)
-	} else {
-		if c := compareStringPointers(x.Base, y.Base); c != 0 {
-			return c
-		}
-		if c := compareIntPointers(x.ElemBits, y.ElemBits); c != 0 {
-			return c
-		}
-		if c := compareIntPointers(x.Bits, y.Bits); c != 0 {
-			return c
-		}
-		return 0
-	}
-}
-
-type Operand struct {
-	Class string // One of "mask", "immediate", "vreg", "greg", and "mem"
-
-	Go     *string // Go type of this operand
-	AsmPos int     // Position of this operand in the assembly instruction
-
-	Base     *string // Base Go type ("int", "uint", "float")
-	ElemBits *int    // Element bit width
-	Bits     *int    // Total vector bit width
-
-	Const *string // Optional constant value for immediates.
-	// Optional immediate arg offsets. If this field is non-nil,
-	// This operand will be an immediate operand:
-	// The compiler will right-shift the user-passed value by ImmOffset and set it as the AuxInt
-	// field of the operation.
-	ImmOffset *string
-	Name      *string // optional name in the Go intrinsic declaration
-	Lanes     *int    // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1
-	// TreatLikeAScalarOfSize means only the lower $TreatLikeAScalarOfSize bits of the vector
-	// is used, so at the API level we can make it just a scalar value of this size; Then we
-	// can overwrite it to a vector of the right size during intrinsics stage.
-	TreatLikeAScalarOfSize *int
-	// If non-nil, it means the [Class] field is overwritten here, right now this is used to
-	// overwrite the results of AVX2 compares to masks.
-	OverwriteClass *string
-	// If non-nil, it means the [Base] field is overwritten here. This field exist solely
-	// because Intel's XED data is inconsistent. e.g. VANDNP[SD] marks its operand int.
-	OverwriteBase *string
-	// If non-nil, it means the [ElementBits] field is overwritten. This field exist solely
-	// because Intel's XED data is inconsistent. e.g. AVX512 VPMADDUBSW marks its operand
-	// elemBits 16, which should be 8.
-	OverwriteElementBits *int
-}
-
-// isDigit returns true if the byte is an ASCII digit.
-func isDigit(b byte) bool {
-	return b >= '0' && b <= '9'
-}
-
-// compareNatural performs a "natural sort" comparison of two strings.
-// It compares non-digit sections lexicographically and digit sections
-// numerically.  In the case of string-unequal "equal" strings like
-// "a01b" and "a1b", strings.Compare breaks the tie.
-//
-// It returns:
-//
-//	-1 if s1 < s2
-//	 0 if s1 == s2
-//	+1 if s1 > s2
-func compareNatural(s1, s2 string) int {
-	i, j := 0, 0
-	len1, len2 := len(s1), len(s2)
-
-	for i < len1 && j < len2 {
-		// Find a non-digit segment or a number segment in both strings.
-		if isDigit(s1[i]) && isDigit(s2[j]) {
-			// Number segment comparison.
-			numStart1 := i
-			for i < len1 && isDigit(s1[i]) {
-				i++
-			}
-			num1, _ := strconv.Atoi(s1[numStart1:i])
-
-			numStart2 := j
-			for j < len2 && isDigit(s2[j]) {
-				j++
-			}
-			num2, _ := strconv.Atoi(s2[numStart2:j])
-
-			if num1 < num2 {
-				return -1
-			}
-			if num1 > num2 {
-				return 1
-			}
-			// If numbers are equal, continue to the next segment.
-		} else {
-			// Non-digit comparison.
-			if s1[i] < s2[j] {
-				return -1
-			}
-			if s1[i] > s2[j] {
-				return 1
-			}
-			i++
-			j++
-		}
-	}
-
-	// deal with a01b vs a1b; there needs to be an order.
-	return strings.Compare(s1, s2)
-}
-
-const generatedHeader = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
-`
-
-func writeGoDefs(path string, cl unify.Closure) error {
-	// TODO: Merge operations with the same signature but multiple
-	// implementations (e.g., SSE vs AVX)
-	var ops []Operation
-	for def := range cl.All() {
-		var op Operation
-		if !def.Exact() {
-			continue
-		}
-		if err := def.Decode(&op); err != nil {
-			log.Println(err.Error())
-			log.Println(def)
-			continue
-		}
-		// TODO: verify that this is safe.
-		op.sortOperand()
-		ops = append(ops, op)
-	}
-	slices.SortFunc(ops, compareOperations)
-	// The parsed XED data might contain duplicates, like
-	// 512 bits VPADDP.
-	deduped := dedup(ops)
-	slices.SortFunc(deduped, compareOperations)
-
-	if *Verbose {
-		log.Printf("dedup len: %d\n", len(ops))
-	}
-	var err error
-	if err = overwrite(deduped); err != nil {
-		return err
-	}
-	if *Verbose {
-		log.Printf("dedup len: %d\n", len(deduped))
-	}
-	if *Verbose {
-		log.Printf("dedup len: %d\n", len(deduped))
-	}
-	if !*FlagNoDedup {
-		// TODO: This can hide mistakes in the API definitions, especially when
-		// multiple patterns result in the same API unintentionally. Make it stricter.
-		if deduped, err = dedupGodef(deduped); err != nil {
-			return err
-		}
-	}
-	if *Verbose {
-		log.Printf("dedup len: %d\n", len(deduped))
-	}
-	if !*FlagNoConstImmPorting {
-		if err = copyConstImm(deduped); err != nil {
-			return err
-		}
-	}
-	if *Verbose {
-		log.Printf("dedup len: %d\n", len(deduped))
-	}
-	reportXEDInconsistency(deduped)
-	typeMap := parseSIMDTypes(deduped)
-
-	formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go")
-	formatWriteAndClose(writeSIMDFeatures(deduped), path, "src/"+simdPackage+"/cpu.go")
-	formatWriteAndClose(writeSIMDStubs(deduped, typeMap), path, "src/"+simdPackage+"/ops_amd64.go")
-	formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go")
-	formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go")
-	formatWriteAndClose(writeSIMDMachineOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go")
-	formatWriteAndClose(writeSIMDSSA(deduped), path, "src/cmd/compile/internal/amd64/simdssa.go")
-	writeAndClose(writeSIMDRules(deduped).Bytes(), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules")
-
-	return nil
-}
diff --git a/internal/simdgen/main.go b/internal/simdgen/main.go
deleted file mode 100644
index a7f0b0de..00000000
--- a/internal/simdgen/main.go
+++ /dev/null
@@ -1,280 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// simdgen is an experiment in generating Go <-> asm SIMD mappings.
-//
-// Usage: simdgen [-xedPath=path] [-q=query] input.yaml...
-//
-// If -xedPath is provided, one of the inputs is a sum of op-code definitions
-// generated from the Intel XED data at path.
-//
-// If input YAML files are provided, each file is read as an input value. See
-// [unify.Closure.UnmarshalYAML] or "go doc unify.Closure.UnmarshalYAML" for the
-// format of these files.
-//
-// TODO: Example definitions and values.
-//
-// The command unifies across all of the inputs and prints all possible results
-// of this unification.
-//
-// If the -q flag is provided, its string value is parsed as a value and treated
-// as another input to unification. This is intended as a way to "query" the
-// result, typically by narrowing it down to a small subset of results.
-//
-// Typical usage:
-//
-//	go run . -xedPath $XEDPATH *.yaml
-//
-// To see just the definitions generated from XED, run:
-//
-//	go run . -xedPath $XEDPATH
-//
-// (This works because if there's only one input, there's nothing to unify it
-// with, so the result is simply itself.)
-//
-// To see just the definitions for VPADDQ:
-//
-//	go run . -xedPath $XEDPATH -q '{asm: VPADDQ}'
-//
-// simdgen can also generate Go definitions of SIMD mappings:
-// To generate go files to the go root, run:
-//
-//	go run . -xedPath $XEDPATH -o godefs -goroot $PATH/TO/go go.yaml categories.yaml types.yaml
-//
-// types.yaml is already written, it specifies the shapes of vectors.
-// categories.yaml and go.yaml contains definitions that unifies with types.yaml and XED
-// data, you can find an example in ops/AddSub/.
-//
-// When generating Go definitions, simdgen do 3 "magic"s:
-// - It splits masked operations(with op's [Masked] field set) to const and non const:
-//   - One is a normal masked operation, the original
-//   - The other has its mask operand's [Const] fields set to "K0".
-//   - This way the user does not need to provide a separate "K0"-masked operation def.
-//
-// - It deduplicates intrinsic names that have duplicates:
-//   - If there are two operations that shares the same signature, one is AVX512 the other
-//     is before AVX512, the other will be selected.
-//   - This happens often when some operations are defined both before AVX512 and after.
-//     This way the user does not need to provide a separate "K0" operation for the
-//     AVX512 counterpart.
-//
-// - It copies the op's [ConstImm] field to its immediate operand's [Const] field.
-//   - This way the user does not need to provide verbose op definition while only
-//     the const immediate field is different. This is useful to reduce verbosity of
-//     compares with imm control predicates.
-//
-// These 3 magics could be disabled by enabling -nosplitmask, -nodedup or
-// -noconstimmporting flags.
-//
-// simdgen right now only supports amd64, -arch=$OTHERARCH will trigger a fatal error.
-package main
-
-// Big TODOs:
-//
-// - This can produce duplicates, which can also lead to less efficient
-// environment merging. Add hashing and use it for deduplication. Be careful
-// about how this shows up in debug traces, since it could make things
-// confusing if we don't show it happening.
-//
-// - Do I need Closure, Value, and Domain? It feels like I should only need two
-// types.
-
-import (
-	"cmp"
-	"flag"
-	"fmt"
-	"log"
-	"maps"
-	"os"
-	"path/filepath"
-	"runtime/pprof"
-	"slices"
-	"strings"
-
-	"golang.org/x/arch/internal/unify"
-	"gopkg.in/yaml.v3"
-)
-
-var (
-	xedPath               = flag.String("xedPath", "", "load XED datafiles from `path`")
-	flagQ                 = flag.String("q", "", "query: read `def` as another input (skips final validation)")
-	flagO                 = flag.String("o", "yaml", "output type: yaml, godefs (generate definitions into a Go source tree")
-	flagGoDefRoot         = flag.String("goroot", ".", "the path to the Go dev directory that will receive the generated files")
-	FlagNoDedup           = flag.Bool("nodedup", false, "disable deduplicating godefs of 2 qualifying operations from different extensions")
-	FlagNoConstImmPorting = flag.Bool("noconstimmporting", false, "disable const immediate porting from op to imm operand")
-	FlagArch              = flag.String("arch", "amd64", "the target architecture")
-
-	Verbose = flag.Bool("v", false, "verbose")
-
-	flagDebugXED   = flag.Bool("debug-xed", false, "show XED instructions")
-	flagDebugUnify = flag.Bool("debug-unify", false, "print unification trace")
-	flagDebugHTML  = flag.String("debug-html", "", "write unification trace to `file.html`")
-	FlagReportDup  = flag.Bool("reportdup", false, "report the duplicate godefs")
-
-	flagCPUProfile = flag.String("cpuprofile", "", "write CPU profile to `file`")
-	flagMemProfile = flag.String("memprofile", "", "write memory profile to `file`")
-)
-
-const simdPackage = "simd"
-
-func main() {
-	flag.Parse()
-
-	if *flagCPUProfile != "" {
-		f, err := os.Create(*flagCPUProfile)
-		if err != nil {
-			log.Fatalf("-cpuprofile: %s", err)
-		}
-		defer f.Close()
-		pprof.StartCPUProfile(f)
-		defer pprof.StopCPUProfile()
-	}
-	if *flagMemProfile != "" {
-		f, err := os.Create(*flagMemProfile)
-		if err != nil {
-			log.Fatalf("-memprofile: %s", err)
-		}
-		defer func() {
-			pprof.WriteHeapProfile(f)
-			f.Close()
-		}()
-	}
-
-	var inputs []unify.Closure
-
-	if *FlagArch != "amd64" {
-		log.Fatalf("simdgen only supports amd64")
-	}
-
-	// Load XED into a defs set.
-	if *xedPath != "" {
-		xedDefs := loadXED(*xedPath)
-		inputs = append(inputs, unify.NewSum(xedDefs...))
-	}
-
-	// Load query.
-	if *flagQ != "" {
-		r := strings.NewReader(*flagQ)
-		def, err := unify.Read(r, "<query>", unify.ReadOpts{})
-		if err != nil {
-			log.Fatalf("parsing -q: %s", err)
-		}
-		inputs = append(inputs, def)
-	}
-
-	// Load defs files.
-	must := make(map[*unify.Value]struct{})
-	for _, path := range flag.Args() {
-		defs, err := unify.ReadFile(path, unify.ReadOpts{})
-		if err != nil {
-			log.Fatal(err)
-		}
-		inputs = append(inputs, defs)
-
-		if filepath.Base(path) == "go.yaml" {
-			// These must all be used in the final result
-			for def := range defs.Summands() {
-				must[def] = struct{}{}
-			}
-		}
-	}
-
-	// Prepare for unification
-	if *flagDebugUnify {
-		unify.Debug.UnifyLog = os.Stderr
-	}
-	if *flagDebugHTML != "" {
-		f, err := os.Create(*flagDebugHTML)
-		if err != nil {
-			log.Fatal(err)
-		}
-		unify.Debug.HTML = f
-		defer f.Close()
-	}
-
-	// Unify!
-	unified, err := unify.Unify(inputs...)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	// Print results.
-	switch *flagO {
-	case "yaml":
-		// Produce a result that looks like encoding a slice, but stream it.
-		fmt.Println("!sum")
-		var val1 [1]*unify.Value
-		for val := range unified.All() {
-			val1[0] = val
-			// We have to make a new encoder each time or it'll print a document
-			// separator between each object.
-			enc := yaml.NewEncoder(os.Stdout)
-			if err := enc.Encode(val1); err != nil {
-				log.Fatal(err)
-			}
-			enc.Close()
-		}
-	case "godefs":
-		if err := writeGoDefs(*flagGoDefRoot, unified); err != nil {
-			log.Fatalf("Failed writing godefs: %+v", err)
-		}
-	}
-
-	if !*Verbose && *xedPath != "" {
-		if operandRemarks == 0 {
-			fmt.Fprintf(os.Stderr, "XED decoding generated no errors, which is unusual.\n")
-		} else {
-			fmt.Fprintf(os.Stderr, "XED decoding generated %d \"errors\" which is not cause for alarm, use -v for details.\n", operandRemarks)
-		}
-	}
-
-	// Validate results.
-	//
-	// Don't validate if this is a command-line query because that tends to
-	// eliminate lots of required defs and is used in cases where maybe defs
-	// aren't enumerable anyway.
-	if *flagQ == "" && len(must) > 0 {
-		validate(unified, must)
-	}
-}
-
-func validate(cl unify.Closure, required map[*unify.Value]struct{}) {
-	// Validate that:
-	// 1. All final defs are exact
-	// 2. All required defs are used
-	for def := range cl.All() {
-		if _, ok := def.Domain.(unify.Def); !ok {
-			fmt.Fprintf(os.Stderr, "%s: expected Def, got %T\n", def.PosString(), def.Domain)
-			continue
-		}
-
-		if !def.Exact() {
-			fmt.Fprintf(os.Stderr, "%s: def not reduced to an exact value, why is %s:\n", def.PosString(), def.WhyNotExact())
-			fmt.Fprintf(os.Stderr, "\t%s\n", strings.ReplaceAll(def.String(), "\n", "\n\t"))
-		}
-
-		for root := range def.Provenance() {
-			delete(required, root)
-		}
-	}
-	// Report unused defs
-	unused := slices.SortedFunc(maps.Keys(required),
-		func(a, b *unify.Value) int {
-			return cmp.Or(
-				cmp.Compare(a.Pos().Path, b.Pos().Path),
-				cmp.Compare(a.Pos().Line, b.Pos().Line),
-			)
-		})
-	for _, def := range unused {
-		// TODO: Can we say anything more actionable? This is always a problem
-		// with unification: if it fails, it's very hard to point a finger at
-		// any particular reason. We could go back and try unifying this again
-		// with each subset of the inputs (starting with individual inputs) to
-		// at least say "it doesn't unify with anything in x.yaml". That's a lot
-		// of work, but if we have trouble debugging unification failure it may
-		// be worth it.
-		fmt.Fprintf(os.Stderr, "%s: def required, but did not unify (%v)\n",
-			def.PosString(), def)
-	}
-}
diff --git a/internal/simdgen/ops/AddSub/categories.yaml b/internal/simdgen/ops/AddSub/categories.yaml
deleted file mode 100644
index 35e81042..00000000
--- a/internal/simdgen/ops/AddSub/categories.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-!sum
-- go: Add
-  commutative: true
-  documentation: !string |-
-    // NAME adds corresponding elements of two vectors.
-- go: AddSaturated
-  commutative: true
-  documentation: !string |-
-    // NAME adds corresponding elements of two vectors with saturation.
-- go: Sub
-  commutative: false
-  documentation: !string |-
-    // NAME subtracts corresponding elements of two vectors.
-- go: SubSaturated
-  commutative: false
-  documentation: !string |-
-    // NAME subtracts corresponding elements of two vectors with saturation.
-- go: AddPairs
-  commutative: false
-  documentation: !string |-
-    // NAME horizontally adds adjacent pairs of elements.
-    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-- go: SubPairs
-  commutative: false
-  documentation: !string |-
-    // NAME horizontally subtracts adjacent pairs of elements.
-    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
-- go: AddPairsSaturated
-  commutative: false
-  documentation: !string |-
-    // NAME horizontally adds adjacent pairs of elements with saturation.
-    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-- go: SubPairsSaturated
-  commutative: false
-  documentation: !string |-
-    // NAME horizontally subtracts adjacent pairs of elements with saturation.
-    // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
diff --git a/internal/simdgen/ops/AddSub/go.yaml b/internal/simdgen/ops/AddSub/go.yaml
deleted file mode 100644
index 4423d8c7..00000000
--- a/internal/simdgen/ops/AddSub/go.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-!sum
-# Add
-- go: Add
-  asm: "VPADD[BWDQ]|VADDP[SD]"
-  in:
-  - &any
-    go: $t
-  - *any
-  out:
-  - *any
-# Add Saturated
-- go: AddSaturated
-  asm: "VPADDS[BWDQ]"
-  in:
-  - &int
-    go: $t
-    base: int
-  - *int
-  out:
-  - *int
-- go: AddSaturated
-  asm: "VPADDUS[BWDQ]"
-  in:
-  - &uint
-    go: $t
-    base: uint
-  - *uint
-  out:
-  - *uint
-
-# Sub
-- go: Sub
-  asm: "VPSUB[BWDQ]|VSUBP[SD]"
-  in: &2any
-  - *any
-  - *any
-  out: &1any
-  - *any
-# Sub Saturated
-- go: SubSaturated
-  asm: "VPSUBS[BWDQ]"
-  in: &2int
-  - *int
-  - *int
-  out: &1int
-  - *int
-- go: SubSaturated
-  asm: "VPSUBUS[BWDQ]"
-  in:
-  - *uint
-  - *uint
-  out:
-  - *uint
-- go: AddPairs
-  asm: "VPHADD[DW]"
-  in: *2any
-  out: *1any
-- go: SubPairs
-  asm: "VPHSUB[DW]"
-  in: *2any
-  out: *1any
-- go: AddPairs
-  asm: "VHADDP[SD]" # floats
-  in: *2any
-  out: *1any
-- go: SubPairs
-  asm: "VHSUBP[SD]"  # floats
-  in: *2any
-  out: *1any
-- go: AddPairsSaturated
-  asm: "VPHADDS[DW]"
-  in: *2int
-  out: *1int
-- go: SubPairsSaturated
-  asm: "VPHSUBS[DW]"
-  in: *2int
-  out: *1int
diff --git a/internal/simdgen/ops/BitwiseLogic/categories.yaml b/internal/simdgen/ops/BitwiseLogic/categories.yaml
deleted file mode 100644
index 3142d191..00000000
--- a/internal/simdgen/ops/BitwiseLogic/categories.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-!sum
-- go: And
-  commutative: true
-  documentation: !string |-
-    // NAME performs a bitwise AND operation between two vectors.
-- go: Or
-  commutative: true
-  documentation: !string |-
-    // NAME performs a bitwise OR operation between two vectors.
-- go: AndNot
-  commutative: false
-  documentation: !string |-
-    // NAME performs a bitwise x &^ y.
-- go: Xor
-  commutative: true
-  documentation: !string |-
-    // NAME performs a bitwise XOR operation between two vectors.
-
-# We also have PTEST and VPTERNLOG, those should be hidden from the users
-# and only appear in rewrite rules.
diff --git a/internal/simdgen/ops/BitwiseLogic/go.yaml b/internal/simdgen/ops/BitwiseLogic/go.yaml
deleted file mode 100644
index ab344438..00000000
--- a/internal/simdgen/ops/BitwiseLogic/go.yaml
+++ /dev/null
@@ -1,128 +0,0 @@
-!sum
-# In the XED data, *all* floating point bitwise logic operation has their
-# operand type marked as uint. We are not trying to understand why Intel
-# decided that they want FP bit-wise logic operations, but this irregularity
-# has to be dealed with in separate rules with some overwrites.
-
-# For many bit-wise operations, we have the following non-orthogonal
-# choices:
-#
-# - Non-masked AVX operations have no element width (because it
-# doesn't matter), but only cover 128 and 256 bit vectors.
-#
-# - Masked AVX-512 operations have an element width (because it needs
-# to know how to interpret the mask), and cover 128, 256, and 512 bit
-# vectors. These only cover 32- and 64-bit element widths.
-#
-# - Non-masked AVX-512 operations still have an element width (because
-# they're just the masked operations with an implicit K0 mask) but it
-# doesn't matter! This is the only option for non-masked 512 bit
-# operations, and we can pick any of the element widths.
-#
-# We unify with ALL of these operations and the compiler generator
-# picks when there are multiple options.
-
-# TODO: We don't currently generate unmasked bit-wise operations on 512 bit
-# vectors of 8- or 16-bit elements. AVX-512 only has *masked* bit-wise
-# operations for 32- and 64-bit elements; while the element width doesn't matter
-# for unmasked operations, right now we don't realize that we can just use the
-# 32- or 64-bit version for the unmasked form. Maybe in the XED decoder we
-# should recognize bit-wise operations when generating unmasked versions and
-# omit the element width.
-
-# For binary operations, we constrain their two inputs and one output to the
-# same Go type using a variable.
-
-- go: And
-  asm: "VPAND[DQ]?"
-  in:
-  - &any
-    go: $t
-  - *any
-  out:
-  - *any
-
-- go: And
-  asm: "VPANDD" # Fill in the gap, And is missing for Uint8x64 and Int8x64
-  inVariant: []
-  in: &twoI8x64
-  - &i8x64
-    go: $t
-    overwriteElementBits: 8
-  - *i8x64
-  out: &oneI8x64
-  - *i8x64
-
-- go: And
-  asm: "VPANDD" # Fill in the gap, And is missing for Uint16x32 and Int16x32
-  inVariant: []
-  in: &twoI16x32
-  - &i16x32
-    go: $t
-    overwriteElementBits: 16
-  - *i16x32
-  out: &oneI16x32
-  - *i16x32
-
-- go: AndNot
-  asm: "VPANDN[DQ]?"
-  operandOrder: "21" # switch the arg order
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-
-- go: AndNot
-  asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint8x64 and Int8x64
-  operandOrder: "21" # switch the arg order
-  inVariant: []
-  in: *twoI8x64
-  out: *oneI8x64
-
-- go: AndNot
-  asm: "VPANDND" # Fill in the gap, AndNot is missing for Uint16x32 and Int16x32
-  operandOrder: "21" # switch the arg order
-  inVariant: []
-  in: *twoI16x32
-  out: *oneI16x32
-
-- go: Or
-  asm: "VPOR[DQ]?"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-
-- go: Or
-  asm: "VPORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
-  inVariant: []
-  in: *twoI8x64
-  out: *oneI8x64
-
-- go: Or
-  asm: "VPORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
-  inVariant: []
-  in: *twoI16x32
-  out: *oneI16x32
-
-- go: Xor
-  asm: "VPXOR[DQ]?"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-
-- go: Xor
-  asm: "VPXORD" # Fill in the gap, Or is missing for Uint8x64 and Int8x64
-  inVariant: []
-  in: *twoI8x64
-  out: *oneI8x64
-
-- go: Xor
-  asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
-  inVariant: []
-  in: *twoI16x32
-  out: *oneI16x32
\ No newline at end of file
diff --git a/internal/simdgen/ops/Compares/categories.yaml b/internal/simdgen/ops/Compares/categories.yaml
deleted file mode 100644
index aa07ade2..00000000
--- a/internal/simdgen/ops/Compares/categories.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-!sum
-# const imm predicate(holds for both float and int|uint):
-# 0: Equal
-# 1: Less
-# 2: LessEqual
-# 4: NotEqual
-# 5: GreaterEqual
-# 6: Greater
-- go: Equal
-  constImm: 0
-  commutative: true
-  documentation: !string |-
-    // NAME compares for equality.
-- go: Less
-  constImm: 1
-  commutative: false
-  documentation: !string |-
-    // NAME compares for less than.
-- go: LessEqual
-  constImm: 2
-  commutative: false
-  documentation: !string |-
-    // NAME compares for less than or equal.
-- go: IsNan # For float only.
-  constImm: 3
-  commutative: true
-  documentation: !string |-
-    // NAME checks if elements are NaN. Use as x.IsNan(x).
-- go: NotEqual
-  constImm: 4
-  commutative: true
-  documentation: !string |-
-    // NAME compares for inequality.
-- go: GreaterEqual
-  constImm: 13
-  commutative: false
-  documentation: !string |-
-    // NAME compares for greater than or equal.
-- go: Greater
-  constImm: 14
-  commutative: false
-  documentation: !string |-
-    // NAME compares for greater than.
diff --git a/internal/simdgen/ops/Compares/go.yaml b/internal/simdgen/ops/Compares/go.yaml
deleted file mode 100644
index 0f916283..00000000
--- a/internal/simdgen/ops/Compares/go.yaml
+++ /dev/null
@@ -1,141 +0,0 @@
-!sum
-# Ints
-- go: Equal
-  asm: "V?PCMPEQ[BWDQ]"
-  in:
-  - &any
-    go: $t
-  - *any
-  out:
-  - &anyvregToMask
-    go: $t
-    overwriteBase: int
-    overwriteClass: mask
-- go: Greater
-  asm: "V?PCMPGT[BWDQ]"
-  in:
-  - &int
-    go: $t
-    base: int
-  - *int
-  out:
-  - *anyvregToMask
-# 256-bit VCMPGTQ's output elemBits is marked 32-bit in the XED data, we
-# believe this is an error, so add this definition to overwrite.
-- go: Greater
-  asm: "VPCMPGTQ"
-  in:
-  - &int64
-    go: $t
-    base: int
-    elemBits: 64
-  - *int64
-  out:
-  - base: int
-    elemBits: 32
-    overwriteElementBits: 64
-    overwriteClass: mask
-    overwriteBase: int
-
-# TODO these are redundant with VPCMP operations.
-# AVX-512 compares produce masks.
-- go: Equal
-  asm: "V?PCMPEQ[BWDQ]"
-  in:
-  - *any
-  - *any
-  out:
-  - class: mask
-- go: Greater
-  asm: "V?PCMPGT[BWDQ]"
-  in:
-  - *int
-  - *int
-  out:
-  - class: mask
-
-# MASKED signed comparisons for X/Y registers
-# unmasked would clash with emulations on AVX2
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
-  asm: "VPCMP[BWDQ]"
-  in:
-  - &int
-    bits: (128|256)
-    go: $t
-    base: int
-  - *int
-  - class: immediate
-    const: 0 # Just a placeholder, will be overwritten by const imm porting.
-  inVariant:
-  - class: mask
-  out:
-  - class: mask
-
-# MASKED unsigned comparisons for X/Y registers
-# unmasked would clash with emulations on AVX2
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
-  asm: "VPCMPU[BWDQ]"
-  in:
-  - &uint
-    bits: (128|256)
-    go: $t
-    base: uint
-  - *uint
-  - class: immediate
-    const: 0
-  inVariant:
-  - class: mask
-  out:
-  - class: mask
-
-# masked/unmasked signed comparisons for Z registers
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
-  asm: "VPCMP[BWDQ]"
-  in:
-  - &int
-    bits: 512
-    go: $t
-    base: int
-  - *int
-  - class: immediate
-    const: 0 # Just a placeholder, will be overwritten by const imm porting.
-  out:
-  - class: mask
-
-# masked/unmasked unsigned comparisons for Z registers
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
-  asm: "VPCMPU[BWDQ]"
-  in:
-  - &uint
-    bits: 512
-    go: $t
-    base: uint
-  - *uint
-  - class: immediate
-    const: 0
-  out:
-  - class: mask
-
-# Floats
-- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan
-  asm: "VCMPP[SD]"
-  in:
-  - &float
-    go: $t
-    base: float
-  - *float
-  - class: immediate
-    const: 0
-  out:
-  - go: $t
-    overwriteBase: int
-    overwriteClass: mask
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
-  asm: "VCMPP[SD]"
-  in:
-  - *float
-  - *float
-  - class: immediate
-    const: 0
-  out:
-  - class: mask
\ No newline at end of file
diff --git a/internal/simdgen/ops/Converts/categories.yaml b/internal/simdgen/ops/Converts/categories.yaml
deleted file mode 100644
index cc6c419d..00000000
--- a/internal/simdgen/ops/Converts/categories.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-!sum
-- go: ConvertToInt32
-  commutative: false
-  documentation: !string |-
-    // ConvertToInt32 converts element values to int32.
-
-- go: ConvertToUint32
-  commutative: false
-  documentation: !string |-
-    // ConvertToUint32Masked converts element values to uint32.
diff --git a/internal/simdgen/ops/Converts/go.yaml b/internal/simdgen/ops/Converts/go.yaml
deleted file mode 100644
index 4e251728..00000000
--- a/internal/simdgen/ops/Converts/go.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-!sum
-- go: ConvertToInt32
-  asm: "VCVTTPS2DQ"
-  in:
-  - &fp
-    go: $t
-    base: float
-  out:
-  - &i32
-    go: $u
-    base: int
-    elemBits: 32
-- go: ConvertToUint32
-  asm: "VCVTPS2UDQ"
-  in:
-  - *fp
-  out:
-  - &u32
-    go: $u
-    base: uint
-    elemBits: 32
diff --git a/internal/simdgen/ops/FPonlyArith/categories.yaml b/internal/simdgen/ops/FPonlyArith/categories.yaml
deleted file mode 100644
index f2d8af68..00000000
--- a/internal/simdgen/ops/FPonlyArith/categories.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-!sum
-- go: Div
-  commutative: false
-  documentation: !string |-
-    // NAME divides elements of two vectors.
-- go: Sqrt
-  commutative: false
-  documentation: !string |-
-    // NAME computes the square root of each element.
-- go: Reciprocal
-  commutative: false
-  documentation: !string |-
-    // NAME computes an approximate reciprocal of each element.
-- go: ReciprocalSqrt
-  commutative: false
-  documentation: !string |-
-    // NAME computes an approximate reciprocal of the square root of each element.
-- go: Scale
-  commutative: false
-  documentation: !string |-
-    // NAME multiplies elements by a power of 2.
-- go: RoundToEven
-  commutative: false
-  constImm: 0
-  documentation: !string |-
-    // NAME rounds elements to the nearest integer.
-- go: RoundToEvenScaled
-  commutative: false
-  constImm: 0
-  documentation: !string |-
-    // NAME rounds elements with specified precision.
-- go: RoundToEvenScaledResidue
-  commutative: false
-  constImm: 0
-  documentation: !string |-
-    // NAME computes the difference after rounding with specified precision.
-- go: Floor
-  commutative: false
-  constImm: 1
-  documentation: !string |-
-    // NAME rounds elements down to the nearest integer.
-- go: FloorScaled
-  commutative: false
-  constImm: 1
-  documentation: !string |-
-    // NAME rounds elements down with specified precision.
-- go: FloorScaledResidue
-  commutative: false
-  constImm: 1
-  documentation: !string |-
-    // NAME computes the difference after flooring with specified precision.
-- go: Ceil
-  commutative: false
-  constImm: 2
-  documentation: !string |-
-    // NAME rounds elements up to the nearest integer.
-- go: CeilScaled
-  commutative: false
-  constImm: 2
-  documentation: !string |-
-    // NAME rounds elements up with specified precision.
-- go: CeilScaledResidue
-  commutative: false
-  constImm: 2
-  documentation: !string |-
-    // NAME computes the difference after ceiling with specified precision.
-- go: Trunc
-  commutative: false
-  constImm: 3
-  documentation: !string |-
-    // NAME truncates elements towards zero.
-- go: TruncScaled
-  commutative: false
-  constImm: 3
-  documentation: !string |-
-    // NAME truncates elements with specified precision.
-- go: TruncScaledResidue
-  commutative: false
-  constImm: 3
-  documentation: !string |-
-    // NAME computes the difference after truncating with specified precision.
-- go: AddSub
-  commutative: false
-  documentation: !string |-
-    // NAME subtracts even elements and adds odd elements of two vectors.
diff --git a/internal/simdgen/ops/FPonlyArith/go.yaml b/internal/simdgen/ops/FPonlyArith/go.yaml
deleted file mode 100644
index e164f7b7..00000000
--- a/internal/simdgen/ops/FPonlyArith/go.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-!sum
-- go: Div
-  asm: "V?DIVP[SD]"
-  in: &2fp
-  - &fp
-    go: $t
-    base: float
-  - *fp
-  out: &1fp
-  - *fp
-- go: Sqrt
-  asm: "V?SQRTP[SD]"
-  in: *1fp
-  out: *1fp
-# TODO: Provide separate methods for 12-bit precision and 14-bit precision?
-- go: Reciprocal
-  asm: "VRCP(14)?P[SD]"
-  in: *1fp
-  out: *1fp
-- go: ReciprocalSqrt
-  asm: "V?RSQRT(14)?P[SD]"
-  in: *1fp
-  out: *1fp
-- go: Scale
-  asm: "VSCALEFP[SD]"
-  in: *2fp
-  out: *1fp
-
-- go: "RoundToEven|Ceil|Floor|Trunc"
-  asm: "VROUNDP[SD]"
-  in:
-  - *fp
-  - class: immediate
-    const: 0 # place holder
-  out: *1fp
-
-- go: "(RoundToEven|Ceil|Floor|Trunc)Scaled"
-  asm: "VRNDSCALEP[SD]"
-  in:
-  - *fp
-  - class: immediate
-    const: 0 # place holder
-    immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
-    name: prec
-  out: *1fp
-- go: "(RoundToEven|Ceil|Floor|Trunc)ScaledResidue"
-  asm: "VREDUCEP[SD]"
-  in:
-  - *fp
-  - class: immediate
-    const: 0 # place holder
-    immOffset: 4 # "M", round to numbers with M digits after dot(by means of binary number).
-    name: prec
-  out: *1fp
-
-- go: "AddSub"
-  asm: "VADDSUBP[SD]"
-  in:
-  - *fp
-  - *fp
-  out:
-  - *fp
diff --git a/internal/simdgen/ops/GaloisField/categories.yaml b/internal/simdgen/ops/GaloisField/categories.yaml
deleted file mode 100644
index 25824625..00000000
--- a/internal/simdgen/ops/GaloisField/categories.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-!sum
-- go: GaloisFieldAffineTransform
-  commutative: false
-  documentation: !string |-
-    // NAME computes an affine transformation in GF(2^8):
-    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-    // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-    // corresponding to a group of 8 elements in x.
-- go: GaloisFieldAffineTransformInverse
-  commutative: false
-  documentation: !string |-
-    // NAME computes an affine transformation in GF(2^8),
-    // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-    // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-    // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-    // corresponding to a group of 8 elements in x.
-- go: GaloisFieldMul
-  commutative: false
-  documentation: !string |-
-    // NAME computes element-wise GF(2^8) multiplication with
-    // reduction polynomial x^8 + x^4 + x^3 + x + 1.
diff --git a/internal/simdgen/ops/GaloisField/go.yaml b/internal/simdgen/ops/GaloisField/go.yaml
deleted file mode 100644
index e86211cb..00000000
--- a/internal/simdgen/ops/GaloisField/go.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-!sum
-- go: GaloisFieldAffineTransform
-  asm: VGF2P8AFFINEQB
-  operandOrder: 2I # 2nd operand, then immediate
-  in: &AffineArgs
-  - &uint8
-    go: $t
-    base: uint
-  - &uint8x8
-    go: $t2
-    base: uint
-  - &pureImmVar
-    class: immediate
-    immOffset: 0
-    name: b
-  out:
-  - *uint8
-
-- go: GaloisFieldAffineTransformInverse
-  asm: VGF2P8AFFINEINVQB
-  operandOrder: 2I # 2nd operand, then immediate
-  in: *AffineArgs
-  out:
-  - *uint8
-
-- go: GaloisFieldMul
-  asm: VGF2P8MULB
-  in:
-  - *uint8
-  - *uint8
-  out:
-  - *uint8
diff --git a/internal/simdgen/ops/IntOnlyArith/categories.yaml b/internal/simdgen/ops/IntOnlyArith/categories.yaml
deleted file mode 100644
index bf33642a..00000000
--- a/internal/simdgen/ops/IntOnlyArith/categories.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-!sum
-- go: Average
-  commutative: true
-  documentation: !string |-
-    // NAME computes the rounded average of corresponding elements.
-- go: Abs
-  commutative: false
-  # Unary operation, not commutative
-  documentation: !string |-
-    // NAME computes the absolute value of each element.
-- go: CopySign
-  # Applies sign of second operand to first: sign(val, sign_src)
-  commutative: false
-  documentation: !string |-
-    // NAME returns the product of the first operand with -1, 0, or 1,
-    // whichever constant is nearest to the value of the second operand.
-  # Sign does not have masked version
-- go: OnesCount
-  commutative: false
-  documentation: !string |-
-    // NAME counts the number of set bits in each element.
diff --git a/internal/simdgen/ops/IntOnlyArith/go.yaml b/internal/simdgen/ops/IntOnlyArith/go.yaml
deleted file mode 100644
index 54938b4f..00000000
--- a/internal/simdgen/ops/IntOnlyArith/go.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-!sum
-# Average (unsigned byte, unsigned word)
-# Instructions: VPAVGB, VPAVGW
-- go: Average
-  asm: "VPAVG[BW]" # Matches VPAVGB (byte) and VPAVGW (word)
-  in:
-  - &uint_t # $t will be Uint8xN for VPAVGB, Uint16xN for VPAVGW
-    go: $t
-    base: uint
-  - *uint_t
-  out:
-  - *uint_t
-
-# Absolute Value (signed byte, word, dword, qword)
-# Instructions: VPABSB, VPABSW, VPABSD, VPABSQ
-- go: Abs
-  asm: "VPABS[BWDQ]" # Matches VPABSB, VPABSW, VPABSD, VPABSQ
-  in:
-  - &int_t # $t will be Int8xN, Int16xN, Int32xN, Int64xN
-    go: $t
-    base: int
-  out:
-  - *int_t # Output is magnitude, fits in the same signed type
-
-# Sign Operation (signed byte, word, dword)
-# Applies sign of second operand to the first.
-# Instructions: VPSIGNB, VPSIGNW, VPSIGND
-- go: CopySign
-  asm: "VPSIGN[BWD]" # Matches VPSIGNB, VPSIGNW, VPSIGND
-  in:
-  - *int_t # value to apply sign to
-  - *int_t # value from which to take the sign
-  out:
-  - *int_t
-
-# Population Count (count set bits in each element)
-# Instructions: VPOPCNTB, VPOPCNTW (AVX512_BITALG)
-#               VPOPCNTD, VPOPCNTQ (AVX512_VPOPCNTDQ)
-- go: OnesCount
-  asm: "VPOPCNT[BWDQ]"
-  in:
-  - &any
-    go: $t
-  out:
-  - *any
diff --git a/internal/simdgen/ops/MLOps/categories.yaml b/internal/simdgen/ops/MLOps/categories.yaml
deleted file mode 100644
index 97381e1e..00000000
--- a/internal/simdgen/ops/MLOps/categories.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-!sum
-- go: DotProdPairs
-  commutative: false
-  documentation: !string |-
-    // NAME multiplies the elements and add the pairs together,
-    // yielding a vector of half as many elements with twice the input element size.
-# TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
-- go: DotProdPairsSaturated
-  commutative: false
-  documentation: !string |-
-    // NAME multiplies the elements and add the pairs together with saturation,
-    // yielding a vector of half as many elements with twice the input element size.
-# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
-# - go: DotProdBroadcast
-#   commutative: true
-# #   documentation: !string |-
-#     // NAME multiplies all elements and broadcasts the sum.
-- go: AddDotProdQuadruple
-  commutative: false
-  documentation: !string |-
-    // NAME performs dot products on groups of 4 elements of x and y and then adds z.
-- go: AddDotProdQuadrupleSaturated
-  commutative: false
-  documentation: !string |-
-    // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-- go: AddDotProdPairs
-  commutative: false
-  noTypes: "true"
-  noGenericOps: "true"
-  documentation: !string |-
-    // NAME performs dot products on pairs of elements of y and z and then adds x.
-- go: AddDotProdPairsSaturated
-  commutative: false
-  documentation: !string |-
-    // NAME performs dot products on pairs of elements of y and z and then adds x.
-- go: MulAdd
-  commutative: false
-  documentation: !string |-
-    // NAME performs a fused (x * y) + z.
-- go: MulAddSub
-  commutative: false
-  documentation: !string |-
-    // NAME performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-- go: MulSubAdd
-  commutative: false
-  documentation: !string |-
-    // NAME performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
diff --git a/internal/simdgen/ops/MLOps/go.yaml b/internal/simdgen/ops/MLOps/go.yaml
deleted file mode 100644
index f6b6f135..00000000
--- a/internal/simdgen/ops/MLOps/go.yaml
+++ /dev/null
@@ -1,113 +0,0 @@
-!sum
-- go: DotProdPairs
-  asm: VPMADDWD
-  in:
-  - &int
-    go: $t
-    base: int
-  - *int
-  out:
-  - &int2 # The elemBits are different
-    go: $t2
-    base: int
-- go: DotProdPairsSaturated
-  asm: VPMADDUBSW
-  in:
-  - &uint
-    go: $t
-    base: uint
-    overwriteElementBits: 8
-  - &int3
-    go: $t3
-    base: int
-    overwriteElementBits: 8
-  out:
-  - *int2
-# - go: DotProdBroadcast
-#   asm: VDPP[SD]
-#   in:
-#   - &dpb_src
-#     go: $t
-#   - *dpb_src
-#   - class: immediate
-#     const: 127
-#   out:
-#   - *dpb_src
-- go: AddDotProdQuadruple
-  asm: "VPDPBUSD"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - &qdpa_acc
-    go: $t_acc
-    base: int
-    elemBits: 32
-  - &qdpa_src1
-    go: $t_src1
-    base: uint
-    overwriteElementBits: 8
-  - &qdpa_src2
-    go: $t_src2
-    base: int
-    overwriteElementBits: 8
-  out:
-  - *qdpa_acc
-- go: AddDotProdQuadrupleSaturated
-  asm: "VPDPBUSDS"
-  operandOrder: "31" # switch operand 3 and 1
-  in:
-  - *qdpa_acc
-  - *qdpa_src1
-  - *qdpa_src2
-  out:
-  - *qdpa_acc
-- go: AddDotProdPairs
-  asm: "VPDPWSSD"
-  in:
-  - &pdpa_acc
-    go: $t_acc
-    base: int
-    elemBits: 32
-  - &pdpa_src1
-    go: $t_src1
-    base: int
-    overwriteElementBits: 16
-  - &pdpa_src2
-    go: $t_src2
-    base: int
-    overwriteElementBits: 16
-  out:
-  - *pdpa_acc
-- go: AddDotProdPairsSaturated
-  asm: "VPDPWSSDS"
-  in:
-  - *pdpa_acc
-  - *pdpa_src1
-  - *pdpa_src2
-  out:
-  - *pdpa_acc
-- go: MulAdd
-  asm: "VFMADD213PS|VFMADD213PD"
-  in:
-  - &fma_op
-   go: $t
-   base: float
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MulAddSub
-  asm: "VFMADDSUB213PS|VFMADDSUB213PD"
-  in:
-  - *fma_op
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
-- go: MulSubAdd
-  asm: "VFMSUBADD213PS|VFMSUBADD213PD"
-  in:
-  - *fma_op
-  - *fma_op
-  - *fma_op
-  out:
-  - *fma_op
\ No newline at end of file
diff --git a/internal/simdgen/ops/MinMax/categories.yaml b/internal/simdgen/ops/MinMax/categories.yaml
deleted file mode 100644
index a7e30f46..00000000
--- a/internal/simdgen/ops/MinMax/categories.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-!sum
-- go: Max
-  commutative: true
-  documentation: !string |-
-    // NAME computes the maximum of corresponding elements.
-- go: Min
-  commutative: true
-  documentation: !string |-
-    // NAME computes the minimum of corresponding elements.
diff --git a/internal/simdgen/ops/MinMax/go.yaml b/internal/simdgen/ops/MinMax/go.yaml
deleted file mode 100644
index 55f1e18b..00000000
--- a/internal/simdgen/ops/MinMax/go.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-!sum
-- go: Max
-  asm: "V?PMAXS[BWDQ]"
-  in: &2int
-  - &int
-    go: $t
-    base: int
-  - *int
-  out: &1int
-  - *int
-- go: Max
-  asm: "V?PMAXU[BWDQ]"
-  in: &2uint
-  - &uint
-    go: $t
-    base: uint
-  - *uint
-  out: &1uint
-  - *uint
-
-- go: Min
-  asm: "V?PMINS[BWDQ]"
-  in: *2int
-  out: *1int
-- go: Min
-  asm: "V?PMINU[BWDQ]"
-  in: *2uint
-  out: *1uint
-
-- go: Max
-  asm: "V?MAXP[SD]"
-  in: &2float
-  - &float
-    go: $t
-    base: float
-  - *float
-  out: &1float
-  - *float
-- go: Min
-  asm: "V?MINP[SD]"
-  in: *2float
-  out: *1float
diff --git a/internal/simdgen/ops/Moves/categories.yaml b/internal/simdgen/ops/Moves/categories.yaml
deleted file mode 100644
index ef8e0360..00000000
--- a/internal/simdgen/ops/Moves/categories.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-!sum
-- go: SetElem
-  commutative: false
-  documentation: !string |-
-    // NAME sets a single constant-indexed element's value.
-- go: GetElem
-  commutative: false
-  documentation: !string |-
-    // NAME retrieves a single constant-indexed element's value.
-- go: SetLo
-  commutative: false
-  constImm: 0
-  documentation: !string |-
-    // NAME returns x with its lower half set to y.
-- go: GetLo
-  commutative: false
-  constImm: 0
-  documentation: !string |-
-    // NAME returns the lower half of x.
-- go: SetHi
-  commutative: false
-  constImm: 1
-  documentation: !string |-
-    // NAME returns x with its upper half set to y.
-- go: GetHi
-  commutative: false
-  constImm: 1
-  documentation: !string |-
-    // NAME returns the upper half of x.
-- go: Permute
-  commutative: false
-  documentation: !string |-
-    // NAME performs a full permutation of vector x using indices:
-    // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-    // Only the needed bits to represent x's index are used in indices' elements.
-- go: Permute2 # Permute2 is only available on or after AVX512
-  commutative: false
-  documentation: !string |-
-    // NAME performs a full permutation of vector x, y using indices:
-    // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-    // where xy is x appending y.
-    // Only the needed bits to represent xy's index are used in indices' elements.
-- go: Compress
-  commutative: false
-  documentation: !string |-
-    // NAME performs a compression on vector x using mask by
-    // selecting elements as indicated by mask, and pack them to lower indexed elements.
-- go: blend
-  commutative: false
-  documentation: !string |-
-    // NAME blends two vectors based on mask values, choosing either
-    // the first or the second based on whether the third is false or true
-- go: Expand
-  commutative: false
-  documentation: !string |-
-    // NAME performs an expansion on a vector x whose elements are packed to lower parts.
-    // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
-- go: Broadcast128
-  commutative: false
-  documentation: !string |-
-    // NAME copies element zero of its (128-bit) input to all elements of
-    // the 128-bit output vector.
-- go: Broadcast256
-  commutative: false
-  documentation: !string |-
-    // NAME copies element zero of its (128-bit) input to all elements of
-    // the 256-bit output vector.
-- go: Broadcast512
-  commutative: false
-  documentation: !string |-
-    // NAME copies element zero of its (128-bit) input to all elements of
-    // the 512-bit output vector.
diff --git a/internal/simdgen/ops/Moves/go.yaml b/internal/simdgen/ops/Moves/go.yaml
deleted file mode 100644
index 71981c12..00000000
--- a/internal/simdgen/ops/Moves/go.yaml
+++ /dev/null
@@ -1,372 +0,0 @@
-!sum
-- go: SetElem
-  asm: "VPINSR[BWDQ]"
-  in:
-  - &t
-    class: vreg
-    base: $b
-  - class: greg
-    base: $b
-    lanes: 1 # Scalar, darn it!
-  - &imm
-    class: immediate
-    immOffset: 0
-    name: index
-  out:
-  - *t
-
-- go: SetElem
-  asm: "VPINSR[DQ]"
-  in:
-  - &t
-    class: vreg
-    base: int
-    OverwriteBase: float
-  - class: greg
-    base: int
-    OverwriteBase: float
-    lanes: 1 # Scalar, darn it!
-  - &imm
-    class: immediate
-    immOffset: 0
-    name: index
-  out:
-  - *t
-
-- go: GetElem
-  asm: "VPEXTR[BWDQ]"
-  in:
-  - class: vreg
-    base: $b
-    elemBits: $e
-  - *imm
-  out:
-  - class: greg
-    base: $b
-    bits: $e
-
-- go: "SetHi|SetLo"
-  asm: "VINSERTI128|VINSERTI64X4"
-  inVariant: []
-  in:
-  - &i8x2N
-    class: vreg
-    base: $t
-    OverwriteElementBits: 8
-  - &i8xN
-    class: vreg
-    base: $t
-    OverwriteElementBits: 8
-  - &imm01 # This immediate should be only 0 or 1
-    class: immediate
-    const: 0 # place holder
-    name: index
-  out:
-  - *i8x2N
-
-- go: "GetHi|GetLo"
-  asm: "VEXTRACTI128|VEXTRACTI64X4"
-  inVariant: []
-  in:
-  - *i8x2N
-  - *imm01
-  out:
-  - *i8xN
-
-- go: "SetHi|SetLo"
-  asm: "VINSERTI128|VINSERTI64X4"
-  inVariant: []
-  in:
-  - &i16x2N
-    class: vreg
-    base: $t
-    OverwriteElementBits: 16
-  - &i16xN
-    class: vreg
-    base: $t
-    OverwriteElementBits: 16
-  - *imm01
-  out:
-  - *i16x2N
-
-- go: "GetHi|GetLo"
-  asm: "VEXTRACTI128|VEXTRACTI64X4"
-  inVariant: []
-  in:
-  - *i16x2N
-  - *imm01
-  out:
-  - *i16xN
-
-- go: "SetHi|SetLo"
-  asm: "VINSERTI128|VINSERTI64X4"
-  inVariant: []
-  in:
-  - &i32x2N
-    class: vreg
-    base: $t
-    OverwriteElementBits: 32
-  - &i32xN
-    class: vreg
-    base: $t
-    OverwriteElementBits: 32
-  - *imm01
-  out:
-  - *i32x2N
-
-- go: "GetHi|GetLo"
-  asm: "VEXTRACTI128|VEXTRACTI64X4"
-  inVariant: []
-  in:
-  - *i32x2N
-  - *imm01
-  out:
-  - *i32xN
-
-- go: "SetHi|SetLo"
-  asm: "VINSERTI128|VINSERTI64X4"
-  inVariant: []
-  in:
-  - &i64x2N
-    class: vreg
-    base: $t
-    OverwriteElementBits: 64
-  - &i64xN
-    class: vreg
-    base: $t
-    OverwriteElementBits: 64
-  - *imm01
-  out:
-  - *i64x2N
-
-- go: "GetHi|GetLo"
-  asm: "VEXTRACTI128|VEXTRACTI64X4"
-  inVariant: []
-  in:
-  - *i64x2N
-  - *imm01
-  out:
-  - *i64xN
-
-- go: "SetHi|SetLo"
-  asm: "VINSERTF128|VINSERTF64X4"
-  inVariant: []
-  in:
-  - &f32x2N
-    class: vreg
-    base: $t
-    OverwriteElementBits: 32
-  - &f32xN
-    class: vreg
-    base: $t
-    OverwriteElementBits: 32
-  - *imm01
-  out:
-  - *f32x2N
-
-- go: "GetHi|GetLo"
-  asm: "VEXTRACTF128|VEXTRACTF64X4"
-  inVariant: []
-  in:
-  - *f32x2N
-  - *imm01
-  out:
-  - *f32xN
-
-- go: "SetHi|SetLo"
-  asm: "VINSERTF128|VINSERTF64X4"
-  inVariant: []
-  in:
-  - &f64x2N
-    class: vreg
-    base: $t
-    OverwriteElementBits: 64
-  - &f64xN
-    class: vreg
-    base: $t
-    OverwriteElementBits: 64
-  - *imm01
-  out:
-  - *f64x2N
-
-- go: "GetHi|GetLo"
-  asm: "VEXTRACTF128|VEXTRACTF64X4"
-  inVariant: []
-  in:
-  - *f64x2N
-  - *imm01
-  out:
-  - *f64xN
-
-- go: Permute
-  asm: "VPERM[BWDQ]|VPERMP[SD]"
-  operandOrder: "21Type1"
-  in:
-  - &anyindices
-    go: $t
-    name: indices
-    overwriteBase: uint
-  - &any
-    go: $t
-  out:
-  - *any
-
-- go: Permute2
-  asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
-  # Because we are overwriting the receiver's type, we
-  # have to move the receiver to be a parameter so that
-  # we can have no duplication.
-  operandOrder: "231Type1"
-  in:
-  - *anyindices # result in arg 0
-  - *any
-  - *any
-  out:
-  - *any
-
-- go: Compress
-  asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]"
-  in:
-    # The mask in Compress is a control mask rather than a write mask, so it's not optional.
-  - class: mask
-  - *any
-  out:
-  - *any
-
-# For now a non-public method because
-# (1) [OverwriteClass] must be set together with [OverwriteBase]
-# (2) "simdgen does not support [OverwriteClass] in inputs".
-# That means the signature is wrong.
-- go: blend
-  asm: VPBLENDVB
-  in:
-  - &v
-    go: $t
-    class: vreg
-    base: int
-  - *v
-  -
-    class: vreg
-    base: int
-    name: mask
-  out:
-  - *v
-
-# For AVX512
-- go: blend
-  asm: VPBLENDM[BWDQ]
-  in:
-  - &v
-    go: $t
-    bits: 512
-    class: vreg
-    base: int
-  - *v
-  inVariant:
-  -
-    class: mask
-  out:
-  - *v
-
-- go: Expand
-  asm: "VPEXPAND[BWDQ]|VEXPANDP[SD]"
-  in:
-    # The mask in Expand is a control mask rather than a write mask, so it's not optional.
-  - class: mask
-  - *any
-  out:
-  - *any
-
-- go: Broadcast128
-  asm: VPBROADCAST[BWDQ]
-  in:
-  - class: vreg
-    bits: 128
-    elemBits: $e
-    base: $b
-  out:
-  - class: vreg
-    bits: 128
-    elemBits: $e
-    base: $b
-
-# weirdly, this one case on AVX2 is memory-operand-only
-- go: Broadcast128
-  asm: VPBROADCASTQ
-  in:
-  - class: vreg
-    bits: 128
-    elemBits: 64
-    base: int
-    OverwriteBase: float
-  out:
-  - class: vreg
-    bits: 128
-    elemBits: 64
-    base: int
-    OverwriteBase: float
-
-- go: Broadcast256
-  asm: VPBROADCAST[BWDQ]
-  in:
-  - class: vreg
-    bits: 128
-    elemBits: $e
-    base: $b
-  out:
-  - class: vreg
-    bits: 256
-    elemBits: $e
-    base: $b
-
-- go: Broadcast512
-  asm: VPBROADCAST[BWDQ]
-  in:
-  - class: vreg
-    bits: 128
-    elemBits: $e
-    base: $b
-  out:
-  - class: vreg
-    bits: 512
-    elemBits: $e
-    base: $b
-
-- go: Broadcast128
-  asm: VBROADCASTS[SD]
-  in:
-  - class: vreg
-    bits: 128
-    elemBits: $e
-    base: $b
-  out:
-  - class: vreg
-    bits: 128
-    elemBits: $e
-    base: $b
-
-- go: Broadcast256
-  asm: VBROADCASTS[SD]
-  in:
-  - class: vreg
-    bits: 128
-    elemBits: $e
-    base: $b
-  out:
-  - class: vreg
-    bits: 256
-    elemBits: $e
-    base: $b
-
-- go: Broadcast512
-  asm: VBROADCASTS[SD]
-  in:
-  - class: vreg
-    bits: 128
-    elemBits: $e
-    base: $b
-  out:
-  - class: vreg
-    bits: 512
-    elemBits: $e
-    base: $b
diff --git a/internal/simdgen/ops/Mul/categories.yaml b/internal/simdgen/ops/Mul/categories.yaml
deleted file mode 100644
index 92491b51..00000000
--- a/internal/simdgen/ops/Mul/categories.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-!sum
-- go: Mul
-  commutative: true
-  documentation: !string |-
-    // NAME multiplies corresponding elements of two vectors.
-- go: MulEvenWiden
-  commutative: true
-  documentation: !string |-
-    // NAME multiplies even-indexed elements, widening the result.
-    // Result[i] = v1.Even[i] * v2.Even[i].
-- go: MulHigh
-  commutative: true
-  documentation: !string |-
-    // NAME multiplies elements and stores the high part of the result.
diff --git a/internal/simdgen/ops/Mul/go.yaml b/internal/simdgen/ops/Mul/go.yaml
deleted file mode 100644
index c0205a68..00000000
--- a/internal/simdgen/ops/Mul/go.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-!sum
-# "Normal" multiplication is only available for floats.
-# This only covers the single and double precision.
-- go: Mul
-  asm: "VMULP[SD]"
-  in:
-  - &fp
-    go: $t
-    base: float
-  - *fp
-  out:
-  - *fp
-
-# Integer multiplications.
-
-# MulEvenWiden
-# Dword only.
-- go: MulEvenWiden
-  asm: "VPMULDQ"
-  in:
-  - &intNot64
-    go: $t
-    elemBits: 8|16|32
-    base: int
-  - *intNot64
-  out:
-  - &int2
-    go: $t2
-    base: int
-- go: MulEvenWiden
-  asm: "VPMULUDQ"
-  in:
-  - &uintNot64
-    go: $t
-    elemBits: 8|16|32
-    base: uint
-  - *uintNot64
-  out:
-  - &uint2
-    go: $t2
-    base: uint
-
-# MulHigh
-# Word only.
-- go: MulHigh
-  asm: "VPMULHW"
-  in:
-  - &int
-    go: $t
-    base: int
-  - *int
-  out:
-  - *int
-- go: MulHigh
-  asm: "VPMULHUW"
-  in:
-  - &uint
-    go: $t
-    base: uint
-  - *uint
-  out:
-  - *uint
-
-# MulLow
-# signed and unsigned are the same for lower bits.
-- go: Mul
-  asm: "VPMULL[WDQ]"
-  in:
-  - &any
-    go: $t
-  - *any
-  out:
-  - *any
diff --git a/internal/simdgen/ops/ShiftRotate/categories.yaml b/internal/simdgen/ops/ShiftRotate/categories.yaml
deleted file mode 100644
index 0d0b006c..00000000
--- a/internal/simdgen/ops/ShiftRotate/categories.yaml
+++ /dev/null
@@ -1,103 +0,0 @@
-!sum
-- go: ShiftAllLeft
-  nameAndSizeCheck: true
-  specialLower: sftimm
-  commutative: false
-  documentation: !string |-
-    // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-- go: ShiftAllRight
-  signed: false
-  nameAndSizeCheck: true
-  specialLower: sftimm
-  commutative: false
-  documentation: !string |-
-    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-- go: ShiftAllRight
-  signed: true
-  specialLower: sftimm
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-- go: shiftAllLeftConst # no APIs, only ssa ops.
-  noTypes: "true"
-  noGenericOps: "true"
-  SSAVariant: "const" # to avoid its name colliding with reg version of this instruction, amend this to its ssa op name.
-  nameAndSizeCheck: true
-  commutative: false
-- go: shiftAllRightConst # no APIs, only ssa ops.
-  noTypes: "true"
-  noGenericOps: "true"
-  SSAVariant: "const"
-  signed: false
-  nameAndSizeCheck: true
-  commutative: false
-- go: shiftAllRightConst # no APIs, only ssa ops.
-  noTypes: "true"
-  noGenericOps: "true"
-  SSAVariant: "const"
-  signed: true
-  nameAndSizeCheck: true
-  commutative: false
-
-- go: ShiftLeft
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-- go: ShiftRight
-  signed: false
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-- go: ShiftRight
-  signed: true
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-- go: RotateAllLeft
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME rotates each element to the left by the number of bits specified by the immediate.
-- go: RotateLeft
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-- go: RotateAllRight
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME rotates each element to the right by the number of bits specified by the immediate.
-- go: RotateRight
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-- go: ShiftAllLeftConcat
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME shifts each element of x to the left by the number of bits specified by the
-    // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-- go: ShiftAllRightConcat
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME shifts each element of x to the right by the number of bits specified by the
-    // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-- go: ShiftLeftConcat
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME shifts each element of x to the left by the number of bits specified by the
-    // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-- go: ShiftRightConcat
-  nameAndSizeCheck: true
-  commutative: false
-  documentation: !string |-
-    // NAME shifts each element of x to the right by the number of bits specified by the
-    // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
diff --git a/internal/simdgen/ops/ShiftRotate/go.yaml b/internal/simdgen/ops/ShiftRotate/go.yaml
deleted file mode 100644
index e7ccdeb0..00000000
--- a/internal/simdgen/ops/ShiftRotate/go.yaml
+++ /dev/null
@@ -1,172 +0,0 @@
-!sum
-# Integers
-# ShiftAll*
-- go: ShiftAllLeft
-  asm: "VPSLL[WDQ]"
-  in:
-  - &any
-    go: $t
-  - &vecAsScalar64
-    go: "Uint.*"
-    treatLikeAScalarOfSize: 64
-  out:
-  - *any
-- go: ShiftAllRight
-  signed: false
-  asm: "VPSRL[WDQ]"
-  in:
-  - &uint
-    go: $t
-    base: uint
-  - *vecAsScalar64
-  out:
-  - *uint
-- go: ShiftAllRight
-  signed: true
-  asm: "VPSRA[WDQ]"
-  in:
-  - &int
-    go: $t
-    base: int
-  - *vecAsScalar64
-  out:
-  - *int
-
-- go: shiftAllLeftConst
-  asm: "VPSLL[WDQ]"
-  in:
-  - *any
-  - &imm
-    class: immediate
-    immOffset: 0
-  out:
-  - *any
-- go: shiftAllRightConst
-  asm: "VPSRL[WDQ]"
-  in:
-  - *int
-  - *imm
-  out:
-  - *int
-- go: shiftAllRightConst
-  asm: "VPSRA[WDQ]"
-  in:
-  - *uint
-  - *imm
-  out:
-  - *uint
-
-# Shift* (variable)
-- go: ShiftLeft
-  asm: "VPSLLV[WD]"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-# XED data of VPSLLVQ marks the element bits 32 which is off to the actual semantic, we need to overwrite
-# it to 64.
-- go: ShiftLeft
-  asm: "VPSLLVQ"
-  in:
-  - &anyOverwriteElemBits
-    go: $t
-    overwriteElementBits: 64
-  - *anyOverwriteElemBits
-  out:
-  - *anyOverwriteElemBits
-- go: ShiftRight
-  signed: false
-  asm: "VPSRLV[WD]"
-  in:
-  - *uint
-  - *uint
-  out:
-  - *uint
-# XED data of VPSRLVQ needs the same overwrite as VPSLLVQ.
-- go: ShiftRight
-  signed: false
-  asm: "VPSRLVQ"
-  in:
-  - &uintOverwriteElemBits
-    go: $t
-    base: uint
-    overwriteElementBits: 64
-  - *uintOverwriteElemBits
-  out:
-  - *uintOverwriteElemBits
-- go: ShiftRight
-  signed: true
-  asm: "VPSRAV[WDQ]"
-  in:
-  - *int
-  - *int
-  out:
-  - *int
-
-# Rotate
-- go: RotateAllLeft
-  asm: "VPROL[DQ]"
-  in:
-  - *any
-  - &pureImm
-    class: immediate
-    immOffset: 0
-    name: shift
-  out:
-  - *any
-- go: RotateAllRight
-  asm: "VPROR[DQ]"
-  in:
-  - *any
-  - *pureImm
-  out:
-  - *any
-- go: RotateLeft
-  asm: "VPROLV[DQ]"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-- go: RotateRight
-  asm: "VPRORV[DQ]"
-  in:
-  - *any
-  - *any
-  out:
-  - *any
-
-# Bizzare shifts.
-- go: ShiftAllLeftConcat
-  asm: "VPSHLD[WDQ]"
-  in:
-  - *any
-  - *any
-  - *pureImm
-  out:
-  - *any
-- go: ShiftAllRightConcat
-  asm: "VPSHRD[WDQ]"
-  in:
-  - *any
-  - *any
-  - *pureImm
-  out:
-  - *any
-- go: ShiftLeftConcat
-  asm: "VPSHLDV[WDQ]"
-  in:
-  - *any
-  - *any
-  - *any
-  out:
-  - *any
-- go: ShiftRightConcat
-  asm: "VPSHRDV[WDQ]"
-  in:
-  - *any
-  - *any
-  - *any
-  out:
-  - *any
diff --git a/internal/simdgen/pprint.go b/internal/simdgen/pprint.go
deleted file mode 100644
index 054b5176..00000000
--- a/internal/simdgen/pprint.go
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"fmt"
-	"reflect"
-	"strconv"
-)
-
-func pprints(v any) string {
-	var pp pprinter
-	pp.val(reflect.ValueOf(v), 0)
-	return string(pp.buf)
-}
-
-type pprinter struct {
-	buf []byte
-}
-
-func (p *pprinter) indent(by int) {
-	for range by {
-		p.buf = append(p.buf, '\t')
-	}
-}
-
-func (p *pprinter) val(v reflect.Value, indent int) {
-	switch v.Kind() {
-	default:
-		p.buf = fmt.Appendf(p.buf, "unsupported kind %v", v.Kind())
-
-	case reflect.Bool:
-		p.buf = strconv.AppendBool(p.buf, v.Bool())
-
-	case reflect.Int, reflect.Int16, reflect.Int32, reflect.Int64:
-		p.buf = strconv.AppendInt(p.buf, v.Int(), 10)
-
-	case reflect.String:
-		p.buf = strconv.AppendQuote(p.buf, v.String())
-
-	case reflect.Pointer:
-		if v.IsNil() {
-			p.buf = append(p.buf, "nil"...)
-		} else {
-			p.buf = append(p.buf, "&"...)
-			p.val(v.Elem(), indent)
-		}
-
-	case reflect.Slice, reflect.Array:
-		p.buf = append(p.buf, "[\n"...)
-		for i := range v.Len() {
-			p.indent(indent + 1)
-			p.val(v.Index(i), indent+1)
-			p.buf = append(p.buf, ",\n"...)
-		}
-		p.indent(indent)
-		p.buf = append(p.buf, ']')
-
-	case reflect.Struct:
-		vt := v.Type()
-		p.buf = append(append(p.buf, vt.String()...), "{\n"...)
-		for f := range v.NumField() {
-			p.indent(indent + 1)
-			p.buf = append(append(p.buf, vt.Field(f).Name...), ": "...)
-			p.val(v.Field(f), indent+1)
-			p.buf = append(p.buf, ",\n"...)
-		}
-		p.indent(indent)
-		p.buf = append(p.buf, '}')
-	}
-}
diff --git a/internal/simdgen/sort_test.go b/internal/simdgen/sort_test.go
deleted file mode 100644
index 399acf03..00000000
--- a/internal/simdgen/sort_test.go
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import "testing"
-
-func TestSort(t *testing.T) {
-	testCases := []struct {
-		s1, s2 string
-		want   int
-	}{
-		{"a1", "a2", -1},
-		{"a11a", "a11b", -1},
-		{"a01a1", "a1a01", -1},
-		{"a2", "a1", 1},
-		{"a10", "a2", 1},
-		{"a1", "a10", -1},
-		{"z11", "z2", 1},
-		{"z2", "z11", -1},
-		{"abc", "abd", -1},
-		{"123", "45", 1},
-		{"file1", "file1", 0},
-		{"file", "file1", -1},
-		{"file1", "file", 1},
-		{"a01", "a1", -1},
-		{"a1a", "a1b", -1},
-	}
-
-	for _, tc := range testCases {
-		got := compareNatural(tc.s1, tc.s2)
-		result := "✅"
-		if got != tc.want {
-			result = "❌"
-			t.Errorf("%s CompareNatural(\"%s\", \"%s\") -> got %2d, want %2d\n", result, tc.s1, tc.s2, got, tc.want)
-		} else {
-			t.Logf("%s CompareNatural(\"%s\", \"%s\") -> got %2d, want %2d\n", result, tc.s1, tc.s2, got, tc.want)
-		}
-	}
-}
diff --git a/internal/simdgen/types.yaml b/internal/simdgen/types.yaml
deleted file mode 100644
index f7a01cb3..00000000
--- a/internal/simdgen/types.yaml
+++ /dev/null
@@ -1,90 +0,0 @@
-# This file defines the possible types of each operand and result.
-#
-# In general, we're able to narrow this down on some attributes directly from
-# the machine instruction descriptions, but the Go mappings need to further
-# constrain them and how they relate. For example, on x86 we can't distinguish
-# int and uint, though we can distinguish these from float.
-
-in: !repeat
-- !sum &types
-  - {class: vreg, go: Int8x16,    base: "int",   elemBits:  8, bits: 128, lanes: 16}
-  - {class: vreg, go: Uint8x16,   base: "uint",  elemBits:  8, bits: 128, lanes: 16}
-  - {class: vreg, go: Int16x8,    base: "int",   elemBits: 16, bits: 128, lanes:  8}
-  - {class: vreg, go: Uint16x8,   base: "uint",  elemBits: 16, bits: 128, lanes:  8}
-  - {class: vreg, go: Int32x4,    base: "int",   elemBits: 32, bits: 128, lanes:  4}
-  - {class: vreg, go: Uint32x4,   base: "uint",  elemBits: 32, bits: 128, lanes:  4}
-  - {class: vreg, go: Int64x2,    base: "int",   elemBits: 64, bits: 128, lanes:  2}
-  - {class: vreg, go: Uint64x2,   base: "uint",  elemBits: 64, bits: 128, lanes:  2}
-  - {class: vreg, go: Float32x4,  base: "float", elemBits: 32, bits: 128, lanes:  4}
-  - {class: vreg, go: Float64x2,  base: "float", elemBits: 64, bits: 128, lanes:  2}
-  - {class: vreg, go: Int8x32,    base: "int",   elemBits:  8, bits: 256, lanes: 32}
-  - {class: vreg, go: Uint8x32,   base: "uint",  elemBits:  8, bits: 256, lanes: 32}
-  - {class: vreg, go: Int16x16,   base: "int",   elemBits: 16, bits: 256, lanes: 16}
-  - {class: vreg, go: Uint16x16,  base: "uint",  elemBits: 16, bits: 256, lanes: 16}
-  - {class: vreg, go: Int32x8,    base: "int",   elemBits: 32, bits: 256, lanes:  8}
-  - {class: vreg, go: Uint32x8,   base: "uint",  elemBits: 32, bits: 256, lanes:  8}
-  - {class: vreg, go: Int64x4,    base: "int",   elemBits: 64, bits: 256, lanes:  4}
-  - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 64, bits: 256, lanes:  4}
-  - {class: vreg, go: Float32x8,  base: "float", elemBits: 32, bits: 256, lanes:  8}
-  - {class: vreg, go: Float64x4,  base: "float", elemBits: 64, bits: 256, lanes:  4}
-  - {class: vreg, go: Int8x64,    base: "int",   elemBits:  8, bits: 512, lanes: 64}
-  - {class: vreg, go: Uint8x64,   base: "uint",  elemBits:  8, bits: 512, lanes: 64}
-  - {class: vreg, go: Int16x32,   base: "int",   elemBits: 16, bits: 512, lanes: 32}
-  - {class: vreg, go: Uint16x32,  base: "uint",  elemBits: 16, bits: 512, lanes: 32}
-  - {class: vreg, go: Int32x16,   base: "int",   elemBits: 32, bits: 512, lanes: 16}
-  - {class: vreg, go: Uint32x16,  base: "uint",  elemBits: 32, bits: 512, lanes: 16}
-  - {class: vreg, go: Int64x8,    base: "int",   elemBits: 64, bits: 512, lanes:  8}
-  - {class: vreg, go: Uint64x8,   base: "uint",  elemBits: 64, bits: 512, lanes:  8}
-  - {class: vreg, go: Float32x16, base: "float", elemBits: 32, bits: 512, lanes: 16}
-  - {class: vreg, go: Float64x8,  base: "float", elemBits: 64, bits: 512, lanes:  8}
-
-  - {class: mask, go: Mask8x16,   base: "int",   elemBits:  8, bits: 128, lanes: 16}
-  - {class: mask, go: Mask16x8,   base: "int",   elemBits: 16, bits: 128, lanes:  8}
-  - {class: mask, go: Mask32x4,   base: "int",   elemBits: 32, bits: 128, lanes:  4}
-  - {class: mask, go: Mask64x2,   base: "int",   elemBits: 64, bits: 128, lanes:  2}
-  - {class: mask, go: Mask8x32,   base: "int",   elemBits:  8, bits: 256, lanes: 32}
-  - {class: mask, go: Mask16x16,  base: "int",   elemBits: 16, bits: 256, lanes: 16}
-  - {class: mask, go: Mask32x8,   base: "int",   elemBits: 32, bits: 256, lanes:  8}
-  - {class: mask, go: Mask64x4,   base: "int",   elemBits: 64, bits: 256, lanes:  4}
-  - {class: mask, go: Mask8x64,   base: "int",   elemBits:  8, bits: 512, lanes: 64}
-  - {class: mask, go: Mask16x32,  base: "int",   elemBits: 16, bits: 512, lanes: 32}
-  - {class: mask, go: Mask32x16,  base: "int",   elemBits: 32, bits: 512, lanes: 16}
-  - {class: mask, go: Mask64x8,   base: "int",   elemBits: 64, bits: 512, lanes:  8}
-
-
-  - {class: greg, go: float64,    base: "float", bits:  64, lanes:  1}
-  - {class: greg, go: float32,    base: "float", bits:  32, lanes:  1}
-  - {class: greg, go: int64,      base: "int",   bits:  64, lanes:  1}
-  - {class: greg, go: int32,      base: "int",   bits:  32, lanes:  1}
-  - {class: greg, go: int16,      base: "int",   bits:  16, lanes:  1}
-  - {class: greg, go: int8,       base: "int",   bits:   8, lanes:  1}
-  - {class: greg, go: uint64,     base: "uint",  bits:  64, lanes:  1}
-  - {class: greg, go: uint32,     base: "uint",  bits:  32, lanes:  1}
-  - {class: greg, go: uint16,     base: "uint",  bits:  16, lanes:  1}
-  - {class: greg, go: uint8,      base: "uint",  bits:   8, lanes:  1}
-
-# Special shapes just to make INSERT[IF]128 work.
-# The elemBits field of these shapes are wrong, it would be overwritten by overwriteElemBits.
-  - {class: vreg, go: Int8x16,    base: "int",   elemBits: 128, bits: 128, lanes: 16}
-  - {class: vreg, go: Uint8x16,   base: "uint",  elemBits: 128, bits: 128, lanes: 16}
-  - {class: vreg, go: Int16x8,    base: "int",   elemBits: 128, bits: 128, lanes: 8}
-  - {class: vreg, go: Uint16x8,   base: "uint",  elemBits: 128, bits: 128, lanes: 8}
-  - {class: vreg, go: Int32x4,    base: "int",   elemBits: 128, bits: 128, lanes: 4}
-  - {class: vreg, go: Uint32x4,   base: "uint",  elemBits: 128, bits: 128, lanes: 4}
-  - {class: vreg, go: Int64x2,    base: "int",   elemBits: 128, bits: 128, lanes: 2}
-  - {class: vreg, go: Uint64x2,   base: "uint",  elemBits: 128, bits: 128, lanes: 2}
-
-  - {class: vreg, go: Int8x32,    base: "int",   elemBits: 128, bits: 256, lanes: 32}
-  - {class: vreg, go: Uint8x32,   base: "uint",  elemBits: 128, bits: 256, lanes: 32}
-  - {class: vreg, go: Int16x16,   base: "int",   elemBits: 128, bits: 256, lanes: 16}
-  - {class: vreg, go: Uint16x16,  base: "uint",  elemBits: 128, bits: 256, lanes: 16}
-  - {class: vreg, go: Int32x8,    base: "int",   elemBits: 128, bits: 256, lanes: 8}
-  - {class: vreg, go: Uint32x8,   base: "uint",  elemBits: 128, bits: 256, lanes: 8}
-  - {class: vreg, go: Int64x4,    base: "int",   elemBits: 128, bits: 256, lanes: 4}
-  - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 128, bits: 256, lanes: 4}
-
-  - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now.
-inVariant: !repeat
-- *types
-out: !repeat
-- *types
diff --git a/internal/simdgen/xed.go b/internal/simdgen/xed.go
deleted file mode 100644
index 3bbf2cbc..00000000
--- a/internal/simdgen/xed.go
+++ /dev/null
@@ -1,780 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"cmp"
-	"fmt"
-	"log"
-	"maps"
-	"regexp"
-	"slices"
-	"strconv"
-	"strings"
-
-	"golang.org/x/arch/internal/unify"
-	"golang.org/x/arch/x86/xeddata"
-	"gopkg.in/yaml.v3"
-)
-
-const (
-	NOT_REG_CLASS = 0 // not a register
-	VREG_CLASS    = 1 // classify as a vector register; see
-	GREG_CLASS    = 2 // classify as a general register
-)
-
-// instVariant is a bitmap indicating a variant of an instruction that has
-// optional parameters.
-type instVariant uint8
-
-const (
-	instVariantNone instVariant = 0
-
-	// instVariantMasked indicates that this is the masked variant of an
-	// optionally-masked instruction.
-	instVariantMasked instVariant = 1 << iota
-)
-
-var operandRemarks int
-
-// TODO: Doc. Returns Values with Def domains.
-func loadXED(xedPath string) []*unify.Value {
-	// TODO: Obviously a bunch more to do here.
-
-	db, err := xeddata.NewDatabase(xedPath)
-	if err != nil {
-		log.Fatalf("open database: %v", err)
-	}
-
-	var defs []*unify.Value
-	err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) {
-		inst.Pattern = xeddata.ExpandStates(db, inst.Pattern)
-
-		switch {
-		case inst.RealOpcode == "N":
-			return // Skip unstable instructions
-		case !strings.HasPrefix(inst.Extension, "AVX"):
-			// We're only interested in AVX instructions.
-			return
-		}
-
-		if *flagDebugXED {
-			fmt.Printf("%s:\n%+v\n", inst.Pos, inst)
-		}
-
-		ops, err := decodeOperands(db, strings.Fields(inst.Operands))
-		if err != nil {
-			operandRemarks++
-			if *Verbose {
-				log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err)
-			}
-			return
-		}
-
-		applyQuirks(inst, ops)
-
-		defsPos := len(defs)
-		defs = append(defs, instToUVal(inst, ops)...)
-
-		if *flagDebugXED {
-			for i := defsPos; i < len(defs); i++ {
-				y, _ := yaml.Marshal(defs[i])
-				fmt.Printf("==>\n%s\n", y)
-			}
-		}
-	})
-	if err != nil {
-		log.Fatalf("walk insts: %v", err)
-	}
-
-	if len(unknownFeatures) > 0 {
-		if !*Verbose {
-			nInst := 0
-			for _, insts := range unknownFeatures {
-				nInst += len(insts)
-			}
-			log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst)
-		} else {
-			keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int {
-				return cmp.Or(cmp.Compare(a.Extension, b.Extension),
-					cmp.Compare(a.ISASet, b.ISASet))
-			})
-			for _, key := range keys {
-				if key.ISASet == "" || key.ISASet == key.Extension {
-					log.Printf("unhandled Extension %s", key.Extension)
-				} else {
-					log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet)
-				}
-				log.Printf("  opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key])))
-			}
-		}
-	}
-
-	return defs
-}
-
-var (
-	maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]|VPEXPAND[BWDQ]|VEXPANDP[SD]`)
-	maskOptionalRe = regexp.MustCompile(`VPCMP(EQ|GT|U)?[BWDQ]|VCMPP[SD]`)
-)
-
-func applyQuirks(inst *xeddata.Inst, ops []operand) {
-	opc := inst.Opcode()
-	switch {
-	case maskRequiredRe.MatchString(opc):
-		// The mask on these instructions is marked optional, but the
-		// instruction is pointless without the mask.
-		for i, op := range ops {
-			if op, ok := op.(operandMask); ok {
-				op.optional = false
-				ops[i] = op
-			}
-		}
-
-	case maskOptionalRe.MatchString(opc):
-		// Conversely, these masks should be marked optional and aren't.
-		for i, op := range ops {
-			if op, ok := op.(operandMask); ok && op.action.r {
-				op.optional = true
-				ops[i] = op
-			}
-		}
-	}
-}
-
-type operandCommon struct {
-	action operandAction
-}
-
-// operandAction defines whether this operand is read and/or written.
-//
-// TODO: Should this live in [xeddata.Operand]?
-type operandAction struct {
-	r  bool // Read
-	w  bool // Written
-	cr bool // Read is conditional (implies r==true)
-	cw bool // Write is conditional (implies w==true)
-}
-
-type operandMem struct {
-	operandCommon
-	// TODO
-}
-
-type vecShape struct {
-	elemBits int // Element size in bits
-	bits     int // Register width in bits (total vector bits)
-}
-
-type operandVReg struct { // Vector register
-	operandCommon
-	vecShape
-	elemBaseType scalarBaseType
-}
-
-type operandGReg struct { // Vector register
-	operandCommon
-	vecShape
-	elemBaseType scalarBaseType
-}
-
-// operandMask is a vector mask.
-//
-// Regardless of the actual mask representation, the [vecShape] of this operand
-// corresponds to the "bit for bit" type of mask. That is, elemBits gives the
-// element width covered by each mask element, and bits/elemBits gives the total
-// number of mask elements. (bits gives the total number of bits as if this were
-// a bit-for-bit mask, which may be meaningless on its own.)
-type operandMask struct {
-	operandCommon
-	vecShape
-	// Bits in the mask is w/bits.
-
-	allMasks bool // If set, size cannot be inferred because all operands are masks.
-
-	// Mask can be omitted, in which case it defaults to K0/"no mask"
-	optional bool
-}
-
-type operandImm struct {
-	operandCommon
-	bits int // Immediate size in bits
-}
-
-type operand interface {
-	common() operandCommon
-	addToDef(b *unify.DefBuilder)
-}
-
-func strVal(s any) *unify.Value {
-	return unify.NewValue(unify.NewStringExact(fmt.Sprint(s)))
-}
-
-func (o operandCommon) common() operandCommon {
-	return o
-}
-
-func (o operandMem) addToDef(b *unify.DefBuilder) {
-	// TODO: w, base
-	b.Add("class", strVal("memory"))
-}
-
-func (o operandVReg) addToDef(b *unify.DefBuilder) {
-	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
-	if err != nil {
-		panic("parsing baseRe: " + err.Error())
-	}
-	b.Add("class", strVal("vreg"))
-	b.Add("bits", strVal(o.bits))
-	b.Add("base", unify.NewValue(baseDomain))
-	// If elemBits == bits, then the vector can be ANY shape. This happens with,
-	// for example, logical ops.
-	if o.elemBits != o.bits {
-		b.Add("elemBits", strVal(o.elemBits))
-	}
-}
-
-func (o operandGReg) addToDef(b *unify.DefBuilder) {
-	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
-	if err != nil {
-		panic("parsing baseRe: " + err.Error())
-	}
-	b.Add("class", strVal("greg"))
-	b.Add("bits", strVal(o.bits))
-	b.Add("base", unify.NewValue(baseDomain))
-	if o.elemBits != o.bits {
-		b.Add("elemBits", strVal(o.elemBits))
-	}
-}
-
-func (o operandMask) addToDef(b *unify.DefBuilder) {
-	b.Add("class", strVal("mask"))
-	if o.allMasks {
-		// If all operands are masks, omit sizes and let unification determine mask sizes.
-		return
-	}
-	b.Add("elemBits", strVal(o.elemBits))
-	b.Add("bits", strVal(o.bits))
-}
-
-func (o operandImm) addToDef(b *unify.DefBuilder) {
-	b.Add("class", strVal("immediate"))
-	b.Add("bits", strVal(o.bits))
-}
-
-var actionEncoding = map[string]operandAction{
-	"r":   {r: true},
-	"cr":  {r: true, cr: true},
-	"w":   {w: true},
-	"cw":  {w: true, cw: true},
-	"rw":  {r: true, w: true},
-	"crw": {r: true, w: true, cr: true},
-	"rcw": {r: true, w: true, cw: true},
-}
-
-func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
-	op, err := xeddata.NewOperand(db, operand)
-	if err != nil {
-		log.Fatalf("parsing operand %q: %v", operand, err)
-	}
-	if *flagDebugXED {
-		fmt.Printf("  %+v\n", op)
-	}
-
-	if strings.HasPrefix(op.Name, "EMX_BROADCAST") {
-		// This refers to a set of macros defined in all-state.txt that set a
-		// BCAST operand to various fixed values. But the BCAST operand is
-		// itself suppressed and "internal", so I think we can just ignore this
-		// operand.
-		return nil, nil
-	}
-
-	// TODO: See xed_decoded_inst_operand_action. This might need to be more
-	// complicated.
-	action, ok := actionEncoding[op.Action]
-	if !ok {
-		return nil, fmt.Errorf("unknown action %q", op.Action)
-	}
-	common := operandCommon{action: action}
-
-	lhs := op.NameLHS()
-	if strings.HasPrefix(lhs, "MEM") {
-		// TODO: Width, base type
-		return operandMem{
-			operandCommon: common,
-		}, nil
-	} else if strings.HasPrefix(lhs, "REG") {
-		if op.Width == "mskw" {
-			// The mask operand doesn't specify a width. We have to infer it.
-			//
-			// XED uses the marker ZEROSTR to indicate that a mask operand is
-			// optional and, if omitted, implies K0, aka "no mask".
-			return operandMask{
-				operandCommon: common,
-				optional:      op.Attributes["TXT=ZEROSTR"],
-			}, nil
-		} else {
-			class, regBits := decodeReg(op)
-			if class == NOT_REG_CLASS {
-				return nil, fmt.Errorf("failed to decode register %q", operand)
-			}
-			baseType, elemBits, ok := decodeType(op)
-			if !ok {
-				return nil, fmt.Errorf("failed to decode register width %q", operand)
-			}
-			shape := vecShape{elemBits: elemBits, bits: regBits}
-			if class == VREG_CLASS {
-				return operandVReg{
-					operandCommon: common,
-					vecShape:      shape,
-					elemBaseType:  baseType,
-				}, nil
-			}
-			// general register
-			m := min(shape.bits, shape.elemBits)
-			shape.bits, shape.elemBits = m, m
-			return operandGReg{
-				operandCommon: common,
-				vecShape:      shape,
-				elemBaseType:  baseType,
-			}, nil
-
-		}
-	} else if strings.HasPrefix(lhs, "IMM") {
-		_, bits, ok := decodeType(op)
-		if !ok {
-			return nil, fmt.Errorf("failed to decode register width %q", operand)
-		}
-		return operandImm{
-			operandCommon: common,
-			bits:          bits,
-		}, nil
-	}
-
-	// TODO: BASE and SEG
-	return nil, fmt.Errorf("unknown operand LHS %q in %q", lhs, operand)
-}
-
-func decodeOperands(db *xeddata.Database, operands []string) (ops []operand, err error) {
-	// Decode the XED operand descriptions.
-	for _, o := range operands {
-		op, err := decodeOperand(db, o)
-		if err != nil {
-			return nil, err
-		}
-		if op != nil {
-			ops = append(ops, op)
-		}
-	}
-
-	// XED doesn't encode the size of mask operands. If there are mask operands,
-	// try to infer their sizes from other operands.
-	if err := inferMaskSizes(ops); err != nil {
-		return nil, fmt.Errorf("%w in operands %+v", err, operands)
-	}
-
-	return ops, nil
-}
-
-func inferMaskSizes(ops []operand) error {
-	// This is a heuristic and it falls apart in some cases:
-	//
-	// - Mask operations like KAND[BWDQ] have *nothing* in the XED to indicate
-	// mask size.
-	//
-	// - VINSERT*, VPSLL*, VPSRA*, and VPSRL* and some others naturally have
-	// mixed input sizes and the XED doesn't indicate which operands the mask
-	// applies to.
-	//
-	// - VPDP* and VP4DP* have really complex mixed operand patterns.
-	//
-	// I think for these we may just have to hand-write a table of which
-	// operands each mask applies to.
-	inferMask := func(r, w bool) error {
-		var masks []int
-		var rSizes, wSizes, sizes []vecShape
-		allMasks := true
-		hasWMask := false
-		for i, op := range ops {
-			action := op.common().action
-			if _, ok := op.(operandMask); ok {
-				if action.r && action.w {
-					return fmt.Errorf("unexpected rw mask")
-				}
-				if action.r == r || action.w == w {
-					masks = append(masks, i)
-				}
-				if action.w {
-					hasWMask = true
-				}
-			} else {
-				allMasks = false
-				if reg, ok := op.(operandVReg); ok {
-					if action.r {
-						rSizes = append(rSizes, reg.vecShape)
-					}
-					if action.w {
-						wSizes = append(wSizes, reg.vecShape)
-					}
-				}
-			}
-		}
-		if len(masks) == 0 {
-			return nil
-		}
-
-		if r {
-			sizes = rSizes
-			if len(sizes) == 0 {
-				sizes = wSizes
-			}
-		}
-		if w {
-			sizes = wSizes
-			if len(sizes) == 0 {
-				sizes = rSizes
-			}
-		}
-
-		if len(sizes) == 0 {
-			// If all operands are masks, leave the mask inferrence to the users.
-			if allMasks {
-				for _, i := range masks {
-					m := ops[i].(operandMask)
-					m.allMasks = true
-					ops[i] = m
-				}
-				return nil
-			}
-			return fmt.Errorf("cannot infer mask size: no register operands")
-		}
-		shape, ok := singular(sizes)
-		if !ok {
-			if !hasWMask && len(wSizes) == 1 && len(masks) == 1 {
-				// This pattern looks like predicate mask, so its shape should align with the
-				// output. TODO: verify this is a safe assumption.
-				shape = wSizes[0]
-			} else {
-				return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes)
-			}
-		}
-		for _, i := range masks {
-			m := ops[i].(operandMask)
-			m.vecShape = shape
-			ops[i] = m
-		}
-		return nil
-	}
-	if err := inferMask(true, false); err != nil {
-		return err
-	}
-	if err := inferMask(false, true); err != nil {
-		return err
-	}
-	return nil
-}
-
-// addOperandstoDef adds "in", "inVariant", and "out" to an instruction Def.
-//
-// Optional mask input operands are added to the inVariant field if
-// variant&instVariantMasked, and omitted otherwise.
-func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVariant) {
-	var inVals, inVar, outVals []*unify.Value
-	asmPos := 0
-	for _, op := range ops {
-		var db unify.DefBuilder
-		op.addToDef(&db)
-		db.Add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))
-
-		action := op.common().action
-		asmCount := 1 // # of assembly operands; 0 or 1
-		if action.r {
-			inVal := unify.NewValue(db.Build())
-			// If this is an optional mask, put it in the input variant tuple.
-			if mask, ok := op.(operandMask); ok && mask.optional {
-				if variant&instVariantMasked != 0 {
-					inVar = append(inVar, inVal)
-				} else {
-					// This operand doesn't appear in the assembly at all.
-					asmCount = 0
-				}
-			} else {
-				// Just a regular input operand.
-				inVals = append(inVals, inVal)
-			}
-		}
-		if action.w {
-			outVal := unify.NewValue(db.Build())
-			outVals = append(outVals, outVal)
-		}
-
-		asmPos += asmCount
-	}
-
-	instDB.Add("in", unify.NewValue(unify.NewTuple(inVals...)))
-	instDB.Add("inVariant", unify.NewValue(unify.NewTuple(inVar...)))
-	instDB.Add("out", unify.NewValue(unify.NewTuple(outVals...)))
-}
-
-func instToUVal(inst *xeddata.Inst, ops []operand) []*unify.Value {
-	feature, ok := decodeCPUFeature(inst)
-	if !ok {
-		return nil
-	}
-
-	var vals []*unify.Value
-	vals = append(vals, instToUVal1(inst, ops, feature, instVariantNone))
-	if hasOptionalMask(ops) {
-		vals = append(vals, instToUVal1(inst, ops, feature, instVariantMasked))
-	}
-	return vals
-}
-
-func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant instVariant) *unify.Value {
-	var db unify.DefBuilder
-	db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64")))
-	db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode())))
-	addOperandsToDef(ops, &db, variant)
-	db.Add("cpuFeature", unify.NewValue(unify.NewStringExact(feature)))
-
-	if strings.Contains(inst.Pattern, "ZEROING=0") {
-		// This is an EVEX instruction, but the ".Z" (zero-merging)
-		// instruction flag is NOT valid. EVEX.z must be zero.
-		//
-		// This can mean a few things:
-		//
-		// - The output of an instruction is a mask, so merging modes don't
-		// make any sense. E.g., VCMPPS.
-		//
-		// - There are no masks involved anywhere. (Maybe MASK=0 is also set
-		// in this case?) E.g., VINSERTPS.
-		//
-		// - The operation inherently performs merging. E.g., VCOMPRESSPS
-		// with a mem operand.
-		//
-		// There may be other reasons.
-		db.Add("zeroing", unify.NewValue(unify.NewStringExact("false")))
-	}
-	pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line}
-	return unify.NewValuePos(db.Build(), pos)
-}
-
-// decodeCPUFeature returns the CPU feature name required by inst. These match
-// the names of the "Has*" feature checks in the simd package.
-func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
-	key := cpuFeatureKey{
-		Extension: inst.Extension,
-		ISASet:    isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""),
-	}
-	feat, ok := cpuFeatureMap[key]
-	if !ok {
-		imap := unknownFeatures[key]
-		if imap == nil {
-			imap = make(map[string]struct{})
-			unknownFeatures[key] = imap
-		}
-		imap[inst.Opcode()] = struct{}{}
-		return "", false
-	}
-	if feat == "ignore" {
-		return "", false
-	}
-	return feat, true
-}
-
-var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$")
-
-type cpuFeatureKey struct {
-	Extension, ISASet string
-}
-
-// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name
-// that can be used in the SIMD API.
-var cpuFeatureMap = map[cpuFeatureKey]string{
-	{"AVX", ""}:              "AVX",
-	{"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
-	{"AVX2", ""}:             "AVX2",
-
-	// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
-	{"AVX512EVEX", "AVX512F"}:  "AVX512",
-	{"AVX512EVEX", "AVX512CD"}: "AVX512",
-	{"AVX512EVEX", "AVX512BW"}: "AVX512",
-	{"AVX512EVEX", "AVX512DQ"}: "AVX512",
-	// AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by
-	// the vector length suffix.
-
-	// AVX-512 extension features
-	{"AVX512EVEX", "AVX512_BITALG"}:    "AVX512BITALG",
-	{"AVX512EVEX", "AVX512_GFNI"}:      "AVX512GFNI",
-	{"AVX512EVEX", "AVX512_VBMI2"}:     "AVX512VBMI2",
-	{"AVX512EVEX", "AVX512_VBMI"}:      "AVX512VBMI",
-	{"AVX512EVEX", "AVX512_VNNI"}:      "AVX512VNNI",
-	{"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
-
-	// AVX 10.2 (not yet supported)
-	{"AVX512EVEX", "AVX10_2_RC"}: "ignore",
-}
-
-var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{}
-
-// hasOptionalMask returns whether there is an optional mask operand in ops.
-func hasOptionalMask(ops []operand) bool {
-	for _, op := range ops {
-		if op, ok := op.(operandMask); ok && op.optional {
-			return true
-		}
-	}
-	return false
-}
-
-func singular[T comparable](xs []T) (T, bool) {
-	if len(xs) == 0 {
-		return *new(T), false
-	}
-	for _, x := range xs[1:] {
-		if x != xs[0] {
-			return *new(T), false
-		}
-	}
-	return xs[0], true
-}
-
-// decodeReg returns class (NOT_REG_CLASS, VREG_CLASS, GREG_CLASS),
-// and width in bits.  If the operand cannot be decided as a register,
-// then the clas is NOT_REG_CLASS.
-func decodeReg(op *xeddata.Operand) (class, width int) {
-	// op.Width tells us the total width, e.g.,:
-	//
-	//    dq => 128 bits (XMM)
-	//    qq => 256 bits (YMM)
-	//    mskw => K
-	//    z[iuf?](8|16|32|...) => 512 bits (ZMM)
-	//
-	// But the encoding is really weird and it's not clear if these *always*
-	// mean XMM/YMM/ZMM or if other irregular things can use these large widths.
-	// Hence, we dig into the register sets themselves.
-
-	if !strings.HasPrefix(op.NameLHS(), "REG") {
-		return NOT_REG_CLASS, 0
-	}
-	// TODO: We shouldn't be relying on the macro naming conventions. We should
-	// use all-dec-patterns.txt, but xeddata doesn't support that table right now.
-	rhs := op.NameRHS()
-	if !strings.HasSuffix(rhs, "()") {
-		return NOT_REG_CLASS, 0
-	}
-	switch {
-	case strings.HasPrefix(rhs, "XMM_"):
-		return VREG_CLASS, 128
-	case strings.HasPrefix(rhs, "YMM_"):
-		return VREG_CLASS, 256
-	case strings.HasPrefix(rhs, "ZMM_"):
-		return VREG_CLASS, 512
-	case strings.HasPrefix(rhs, "GPR64_"), strings.HasPrefix(rhs, "VGPR64_"):
-		return GREG_CLASS, 64
-	case strings.HasPrefix(rhs, "GPR32_"), strings.HasPrefix(rhs, "VGPR32_"):
-		return GREG_CLASS, 32
-	}
-	return NOT_REG_CLASS, 0
-}
-
-var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`)
-
-// scalarBaseType describes the base type of a scalar element. This is a Go
-// type, but without the bit width suffix (with the exception of
-// scalarBaseIntOrUint).
-type scalarBaseType int
-
-const (
-	scalarBaseInt scalarBaseType = iota
-	scalarBaseUint
-	scalarBaseIntOrUint // Signed or unsigned is unspecified
-	scalarBaseFloat
-	scalarBaseComplex
-	scalarBaseBFloat
-	scalarBaseHFloat
-)
-
-func (s scalarBaseType) regex() string {
-	switch s {
-	case scalarBaseInt:
-		return "int"
-	case scalarBaseUint:
-		return "uint"
-	case scalarBaseIntOrUint:
-		return "int|uint"
-	case scalarBaseFloat:
-		return "float"
-	case scalarBaseComplex:
-		return "complex"
-	case scalarBaseBFloat:
-		return "BFloat"
-	case scalarBaseHFloat:
-		return "HFloat"
-	}
-	panic(fmt.Sprintf("unknown scalar base type %d", s))
-}
-
-func decodeType(op *xeddata.Operand) (base scalarBaseType, bits int, ok bool) {
-	// The xtype tells you the element type. i8, i16, i32, i64, f32, etc.
-	//
-	// TODO: Things like AVX2 VPAND have an xtype of u256 because they're
-	// element-width agnostic. Do I map that to all widths, or just omit the
-	// element width and let unification flesh it out? There's no u512
-	// (presumably those are all masked, so elem width matters). These are all
-	// Category: LOGICAL, so maybe we could use that info?
-
-	// Handle some weird ones.
-	switch op.Xtype {
-	// 8-bit float formats as defined by Open Compute Project "OCP 8-bit
-	// Floating Point Specification (OFP8)".
-	case "bf8": // E5M2 float
-		return scalarBaseBFloat, 8, true
-	case "hf8": // E4M3 float
-		return scalarBaseHFloat, 8, true
-	case "bf16": // bfloat16 float
-		return scalarBaseBFloat, 16, true
-	case "2f16":
-		// Complex consisting of 2 float16s. Doesn't exist in Go, but we can say
-		// what it would be.
-		return scalarBaseComplex, 32, true
-	case "2i8", "2I8":
-		// These just use the lower INT8 in each 16 bit field.
-		// As far as I can tell, "2I8" is a typo.
-		return scalarBaseInt, 8, true
-	case "2u16", "2U16":
-		// some VPDP* has it
-		// TODO: does "z" means it has zeroing?
-		return scalarBaseUint, 16, true
-	case "2i16", "2I16":
-		// some VPDP* has it
-		return scalarBaseInt, 16, true
-	case "4u8", "4U8":
-		// some VPDP* has it
-		return scalarBaseUint, 8, true
-	case "4i8", "4I8":
-		// some VPDP* has it
-		return scalarBaseInt, 8, true
-	}
-
-	// The rest follow a simple pattern.
-	m := xtypeRe.FindStringSubmatch(op.Xtype)
-	if m == nil {
-		// TODO: Report unrecognized xtype
-		return 0, 0, false
-	}
-	bits, _ = strconv.Atoi(m[2])
-	switch m[1] {
-	case "i", "u":
-		// XED is rather inconsistent about what's signed, unsigned, or doesn't
-		// matter, so merge them together and let the Go definitions narrow as
-		// appropriate. Maybe there's a better way to do this.
-		return scalarBaseIntOrUint, bits, true
-	case "f":
-		return scalarBaseFloat, bits, true
-	default:
-		panic("unreachable")
-	}
-}
diff --git a/internal/unify/closure.go b/internal/unify/closure.go
deleted file mode 100644
index e8e76e21..00000000
--- a/internal/unify/closure.go
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"fmt"
-	"iter"
-	"maps"
-	"slices"
-)
-
-type Closure struct {
-	val *Value
-	env envSet
-}
-
-func NewSum(vs ...*Value) Closure {
-	id := &ident{name: "sum"}
-	return Closure{NewValue(Var{id}), topEnv.bind(id, vs...)}
-}
-
-// IsBottom returns whether c consists of no values.
-func (c Closure) IsBottom() bool {
-	return c.val.Domain == nil
-}
-
-// Summands returns the top-level Values of c. This assumes the top-level of c
-// was constructed as a sum, and is mostly useful for debugging.
-func (c Closure) Summands() iter.Seq[*Value] {
-	return func(yield func(*Value) bool) {
-		var rec func(v *Value, env envSet) bool
-		rec = func(v *Value, env envSet) bool {
-			switch d := v.Domain.(type) {
-			case Var:
-				parts := env.partitionBy(d.id)
-				for _, part := range parts {
-					// It may be a sum of sums. Walk into this value.
-					if !rec(part.value, part.env) {
-						return false
-					}
-				}
-				return true
-			default:
-				return yield(v)
-			}
-		}
-		rec(c.val, c.env)
-	}
-}
-
-// All enumerates all possible concrete values of c by substituting variables
-// from the environment.
-//
-// E.g., enumerating this Value
-//
-//	a: !sum [1, 2]
-//	b: !sum [3, 4]
-//
-// results in
-//
-//   - {a: 1, b: 3}
-//   - {a: 1, b: 4}
-//   - {a: 2, b: 3}
-//   - {a: 2, b: 4}
-func (c Closure) All() iter.Seq[*Value] {
-	// In order to enumerate all concrete values under all possible variable
-	// bindings, we use a "non-deterministic continuation passing style" to
-	// implement this. We use CPS to traverse the Value tree, threading the
-	// (possibly narrowing) environment through that CPS following an Euler
-	// tour. Where the environment permits multiple choices, we invoke the same
-	// continuation for each choice. Similar to a yield function, the
-	// continuation can return false to stop the non-deterministic walk.
-	return func(yield func(*Value) bool) {
-		c.val.all1(c.env, func(v *Value, e envSet) bool {
-			return yield(v)
-		})
-	}
-}
-
-func (v *Value) all1(e envSet, cont func(*Value, envSet) bool) bool {
-	switch d := v.Domain.(type) {
-	default:
-		panic(fmt.Sprintf("unknown domain type %T", d))
-
-	case nil:
-		return true
-
-	case Top, String:
-		return cont(v, e)
-
-	case Def:
-		fields := d.keys()
-		// We can reuse this parts slice because we're doing a DFS through the
-		// state space. (Otherwise, we'd have to do some messy threading of an
-		// immutable slice-like value through allElt.)
-		parts := make(map[string]*Value, len(fields))
-
-		// TODO: If there are no Vars or Sums under this Def, then nothing can
-		// change the Value or env, so we could just cont(v, e).
-		var allElt func(elt int, e envSet) bool
-		allElt = func(elt int, e envSet) bool {
-			if elt == len(fields) {
-				// Build a new Def from the concrete parts. Clone parts because
-				// we may reuse it on other non-deterministic branches.
-				nVal := newValueFrom(Def{maps.Clone(parts)}, v)
-				return cont(nVal, e)
-			}
-
-			return d.fields[fields[elt]].all1(e, func(v *Value, e envSet) bool {
-				parts[fields[elt]] = v
-				return allElt(elt+1, e)
-			})
-		}
-		return allElt(0, e)
-
-	case Tuple:
-		// Essentially the same as Def.
-		if d.repeat != nil {
-			// There's nothing we can do with this.
-			return cont(v, e)
-		}
-		parts := make([]*Value, len(d.vs))
-		var allElt func(elt int, e envSet) bool
-		allElt = func(elt int, e envSet) bool {
-			if elt == len(d.vs) {
-				// Build a new tuple from the concrete parts. Clone parts because
-				// we may reuse it on other non-deterministic branches.
-				nVal := newValueFrom(Tuple{vs: slices.Clone(parts)}, v)
-				return cont(nVal, e)
-			}
-
-			return d.vs[elt].all1(e, func(v *Value, e envSet) bool {
-				parts[elt] = v
-				return allElt(elt+1, e)
-			})
-		}
-		return allElt(0, e)
-
-	case Var:
-		// Go each way this variable can be bound.
-		for _, ePart := range e.partitionBy(d.id) {
-			// d.id is no longer bound in this environment partition. We'll may
-			// need it later in the Euler tour, so bind it back to this single
-			// value.
-			env := ePart.env.bind(d.id, ePart.value)
-			if !ePart.value.all1(env, cont) {
-				return false
-			}
-		}
-		return true
-	}
-}
diff --git a/internal/unify/domain.go b/internal/unify/domain.go
deleted file mode 100644
index 1e0f2be6..00000000
--- a/internal/unify/domain.go
+++ /dev/null
@@ -1,359 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"fmt"
-	"iter"
-	"maps"
-	"reflect"
-	"regexp"
-	"slices"
-	"strconv"
-	"strings"
-)
-
-// A Domain is a non-empty set of values, all of the same kind.
-//
-// Domain may be a scalar:
-//
-//   - [String] - Represents string-typed values.
-//
-// Or a composite:
-//
-//   - [Def] - A mapping from fixed keys to [Domain]s.
-//
-//   - [Tuple] - A fixed-length sequence of [Domain]s or
-//     all possible lengths repeating a [Domain].
-//
-// Or top or bottom:
-//
-//   - [Top] - Represents all possible values of all kinds.
-//
-//   - nil - Represents no values.
-//
-// Or a variable:
-//
-//   - [Var] - A value captured in the environment.
-type Domain interface {
-	Exact() bool
-	WhyNotExact() string
-
-	// decode stores this value in a Go value. If this value is not exact, this
-	// returns a potentially wrapped *inexactError.
-	decode(reflect.Value) error
-}
-
-type inexactError struct {
-	valueType string
-	goType    string
-}
-
-func (e *inexactError) Error() string {
-	return fmt.Sprintf("cannot store inexact %s value in %s", e.valueType, e.goType)
-}
-
-type decodeError struct {
-	path string
-	err  error
-}
-
-func newDecodeError(path string, err error) *decodeError {
-	if err, ok := err.(*decodeError); ok {
-		return &decodeError{path: path + "." + err.path, err: err.err}
-	}
-	return &decodeError{path: path, err: err}
-}
-
-func (e *decodeError) Unwrap() error {
-	return e.err
-}
-
-func (e *decodeError) Error() string {
-	return fmt.Sprintf("%s: %s", e.path, e.err)
-}
-
-// Top represents all possible values of all possible types.
-type Top struct{}
-
-func (t Top) Exact() bool         { return false }
-func (t Top) WhyNotExact() string { return "is top" }
-
-func (t Top) decode(rv reflect.Value) error {
-	// We can decode Top into a pointer-typed value as nil.
-	if rv.Kind() != reflect.Pointer {
-		return &inexactError{"top", rv.Type().String()}
-	}
-	rv.SetZero()
-	return nil
-}
-
-// A Def is a mapping from field names to [Value]s. Any fields not explicitly
-// listed have [Value] [Top].
-type Def struct {
-	fields map[string]*Value
-}
-
-// A DefBuilder builds a [Def] one field at a time. The zero value is an empty
-// [Def].
-type DefBuilder struct {
-	fields map[string]*Value
-}
-
-func (b *DefBuilder) Add(name string, v *Value) {
-	if b.fields == nil {
-		b.fields = make(map[string]*Value)
-	}
-	if _, ok := b.fields[name]; ok {
-		panic(fmt.Sprintf("duplicate field %q", name))
-	}
-	b.fields[name] = v
-}
-
-// Build constructs a [Def] from the fields added to this builder.
-func (b *DefBuilder) Build() Def {
-	return Def{maps.Clone(b.fields)}
-}
-
-// Exact returns true if all field Values are exact.
-func (d Def) Exact() bool {
-	for _, v := range d.fields {
-		if !v.Exact() {
-			return false
-		}
-	}
-	return true
-}
-
-// WhyNotExact returns why the value is not exact
-func (d Def) WhyNotExact() string {
-	for s, v := range d.fields {
-		if !v.Exact() {
-			w := v.WhyNotExact()
-			return "field " + s + ": " + w
-		}
-	}
-	return ""
-}
-
-func (d Def) decode(rv reflect.Value) error {
-	if rv.Kind() != reflect.Struct {
-		return fmt.Errorf("cannot decode Def into %s", rv.Type())
-	}
-
-	var lowered map[string]string // Lower case -> canonical for d.fields.
-	rt := rv.Type()
-	for fi := range rv.NumField() {
-		fType := rt.Field(fi)
-		if fType.PkgPath != "" {
-			continue
-		}
-		v := d.fields[fType.Name]
-		if v == nil {
-			v = topValue
-
-			// Try a case-insensitive match
-			canon, ok := d.fields[strings.ToLower(fType.Name)]
-			if ok {
-				v = canon
-			} else {
-				if lowered == nil {
-					lowered = make(map[string]string, len(d.fields))
-					for k := range d.fields {
-						l := strings.ToLower(k)
-						if k != l {
-							lowered[l] = k
-						}
-					}
-				}
-				canon, ok := lowered[strings.ToLower(fType.Name)]
-				if ok {
-					v = d.fields[canon]
-				}
-			}
-		}
-		if err := decodeReflect(v, rv.Field(fi)); err != nil {
-			return newDecodeError(fType.Name, err)
-		}
-	}
-	return nil
-}
-
-func (d Def) keys() []string {
-	return slices.Sorted(maps.Keys(d.fields))
-}
-
-func (d Def) All() iter.Seq2[string, *Value] {
-	// TODO: We call All fairly often. It's probably bad to sort this every
-	// time.
-	keys := slices.Sorted(maps.Keys(d.fields))
-	return func(yield func(string, *Value) bool) {
-		for _, k := range keys {
-			if !yield(k, d.fields[k]) {
-				return
-			}
-		}
-	}
-}
-
-// A Tuple is a sequence of Values in one of two forms: 1. a fixed-length tuple,
-// where each Value can be different or 2. a "repeated tuple", which is a Value
-// repeated 0 or more times.
-type Tuple struct {
-	vs []*Value
-
-	// repeat, if non-nil, means this Tuple consists of an element repeated 0 or
-	// more times. If repeat is non-nil, vs must be nil. This is a generator
-	// function because we don't necessarily want *exactly* the same Value
-	// repeated. For example, in YAML encoding, a !sum in a repeated tuple needs
-	// a fresh variable in each instance.
-	repeat []func(envSet) (*Value, envSet)
-}
-
-func NewTuple(vs ...*Value) Tuple {
-	return Tuple{vs: vs}
-}
-
-func NewRepeat(gens ...func(envSet) (*Value, envSet)) Tuple {
-	return Tuple{repeat: gens}
-}
-
-func (d Tuple) Exact() bool {
-	if d.repeat != nil {
-		return false
-	}
-	for _, v := range d.vs {
-		if !v.Exact() {
-			return false
-		}
-	}
-	return true
-}
-
-func (d Tuple) WhyNotExact() string {
-	if d.repeat != nil {
-		return "d.repeat is not nil"
-	}
-	for i, v := range d.vs {
-		if !v.Exact() {
-			w := v.WhyNotExact()
-			return "index " + strconv.FormatInt(int64(i), 10) + ": " + w
-		}
-	}
-	return ""
-}
-
-func (d Tuple) decode(rv reflect.Value) error {
-	if d.repeat != nil {
-		return &inexactError{"repeated tuple", rv.Type().String()}
-	}
-	// TODO: We could also do arrays.
-	if rv.Kind() != reflect.Slice {
-		return fmt.Errorf("cannot decode Tuple into %s", rv.Type())
-	}
-	if rv.IsNil() || rv.Cap() < len(d.vs) {
-		rv.Set(reflect.MakeSlice(rv.Type(), len(d.vs), len(d.vs)))
-	} else {
-		rv.SetLen(len(d.vs))
-	}
-	for i, v := range d.vs {
-		if err := decodeReflect(v, rv.Index(i)); err != nil {
-			return newDecodeError(fmt.Sprintf("%d", i), err)
-		}
-	}
-	return nil
-}
-
-// A String represents a set of strings. It can represent the intersection of a
-// set of regexps, or a single exact string. In general, the domain of a String
-// is non-empty, but we do not attempt to prove emptiness of a regexp value.
-type String struct {
-	kind  stringKind
-	re    []*regexp.Regexp // Intersection of regexps
-	exact string
-}
-
-type stringKind int
-
-const (
-	stringRegex stringKind = iota
-	stringExact
-)
-
-func NewStringRegex(exprs ...string) (String, error) {
-	if len(exprs) == 0 {
-		exprs = []string{""}
-	}
-	v := String{kind: -1}
-	for _, expr := range exprs {
-		if expr == "" {
-			// Skip constructing the regexp. It won't have a "literal prefix"
-			// and so we wind up thinking this is a regexp instead of an exact
-			// (empty) string.
-			v = String{kind: stringExact, exact: ""}
-			continue
-		}
-
-		re, err := regexp.Compile(`\A(?:` + expr + `)\z`)
-		if err != nil {
-			return String{}, fmt.Errorf("parsing value: %s", err)
-		}
-
-		// An exact value narrows the whole domain to exact, so we're done, but
-		// should keep parsing.
-		if v.kind == stringExact {
-			continue
-		}
-
-		if exact, complete := re.LiteralPrefix(); complete {
-			v = String{kind: stringExact, exact: exact}
-		} else {
-			v.kind = stringRegex
-			v.re = append(v.re, re)
-		}
-	}
-	return v, nil
-}
-
-func NewStringExact(s string) String {
-	return String{kind: stringExact, exact: s}
-}
-
-// Exact returns whether this Value is known to consist of a single string.
-func (d String) Exact() bool {
-	return d.kind == stringExact
-}
-
-func (d String) WhyNotExact() string {
-	if d.kind == stringExact {
-		return ""
-	}
-	return "string is not exact"
-}
-
-func (d String) decode(rv reflect.Value) error {
-	if d.kind != stringExact {
-		return &inexactError{"regex", rv.Type().String()}
-	}
-	switch rv.Kind() {
-	default:
-		return fmt.Errorf("cannot decode String into %s", rv.Type())
-	case reflect.String:
-		rv.SetString(d.exact)
-	case reflect.Int:
-		i, err := strconv.Atoi(d.exact)
-		if err != nil {
-			return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err)
-		}
-		rv.SetInt(int64(i))
-	case reflect.Bool:
-		b, err := strconv.ParseBool(d.exact)
-		if err != nil {
-			return fmt.Errorf("cannot decode String into %s: %s", rv.Type(), err)
-		}
-		rv.SetBool(b)
-	}
-	return nil
-}
diff --git a/internal/unify/dot.go b/internal/unify/dot.go
deleted file mode 100644
index 6fafa252..00000000
--- a/internal/unify/dot.go
+++ /dev/null
@@ -1,221 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"bytes"
-	"fmt"
-	"html"
-	"io"
-	"os"
-	"os/exec"
-	"strings"
-)
-
-const maxNodes = 30
-
-type dotEncoder struct {
-	w *bytes.Buffer
-
-	idGen    int // Node name generation
-	valLimit int // Limit the number of Values in a subgraph
-
-	idp identPrinter
-}
-
-func newDotEncoder() *dotEncoder {
-	return &dotEncoder{
-		w: new(bytes.Buffer),
-	}
-}
-
-func (enc *dotEncoder) clear() {
-	enc.w.Reset()
-	enc.idGen = 0
-}
-
-func (enc *dotEncoder) writeTo(w io.Writer) {
-	fmt.Fprintln(w, "digraph {")
-	// Use the "new" ranking algorithm, which lets us put nodes from different
-	// clusters in the same rank.
-	fmt.Fprintln(w, "newrank=true;")
-	fmt.Fprintln(w, "node [shape=box, ordering=out];")
-
-	w.Write(enc.w.Bytes())
-	fmt.Fprintln(w, "}")
-}
-
-func (enc *dotEncoder) writeSvg(w io.Writer) error {
-	cmd := exec.Command("dot", "-Tsvg")
-	in, err := cmd.StdinPipe()
-	if err != nil {
-		return err
-	}
-	var out bytes.Buffer
-	cmd.Stdout = &out
-	cmd.Stderr = os.Stderr
-	if err := cmd.Start(); err != nil {
-		return err
-	}
-	enc.writeTo(in)
-	in.Close()
-	if err := cmd.Wait(); err != nil {
-		return err
-	}
-	// Trim SVG header so the result can be embedded
-	//
-	// TODO: In Graphviz 10.0.1, we could use -Tsvg_inline.
-	svg := out.Bytes()
-	if i := bytes.Index(svg, []byte("<svg ")); i >= 0 {
-		svg = svg[i:]
-	}
-	_, err = w.Write(svg)
-	return err
-}
-
-func (enc *dotEncoder) newID(f string) string {
-	id := fmt.Sprintf(f, enc.idGen)
-	enc.idGen++
-	return id
-}
-
-func (enc *dotEncoder) node(label, sublabel string) string {
-	id := enc.newID("n%d")
-	l := html.EscapeString(label)
-	if sublabel != "" {
-		l += fmt.Sprintf("<BR ALIGN=\"CENTER\"/><FONT POINT-SIZE=\"10\">%s</FONT>", html.EscapeString(sublabel))
-	}
-	fmt.Fprintf(enc.w, "%s [label=<%s>];\n", id, l)
-	return id
-}
-
-func (enc *dotEncoder) edge(from, to string, label string, args ...any) {
-	l := fmt.Sprintf(label, args...)
-	fmt.Fprintf(enc.w, "%s -> %s [label=%q];\n", from, to, l)
-}
-
-func (enc *dotEncoder) valueSubgraph(v *Value) {
-	enc.valLimit = maxNodes
-	cID := enc.newID("cluster_%d")
-	fmt.Fprintf(enc.w, "subgraph %s {\n", cID)
-	fmt.Fprintf(enc.w, "style=invis;")
-	vID := enc.value(v)
-	fmt.Fprintf(enc.w, "}\n")
-	// We don't need the IDs right now.
-	_, _ = cID, vID
-}
-
-func (enc *dotEncoder) value(v *Value) string {
-	if enc.valLimit <= 0 {
-		id := enc.newID("n%d")
-		fmt.Fprintf(enc.w, "%s [label=\"...\", shape=triangle];\n", id)
-		return id
-	}
-	enc.valLimit--
-
-	switch vd := v.Domain.(type) {
-	default:
-		panic(fmt.Sprintf("unknown domain type %T", vd))
-
-	case nil:
-		return enc.node("_|_", "")
-
-	case Top:
-		return enc.node("_", "")
-
-		// TODO: Like in YAML, figure out if this is just a sum. In dot, we
-		// could say any unentangled variable is a sum, and if it has more than
-		// one reference just share the node.
-
-	// case Sum:
-	// 	node := enc.node("Sum", "")
-	// 	for i, elt := range vd.vs {
-	// 		enc.edge(node, enc.value(elt), "%d", i)
-	// 		if enc.valLimit <= 0 {
-	// 			break
-	// 		}
-	// 	}
-	// 	return node
-
-	case Def:
-		node := enc.node("Def", "")
-		for k, v := range vd.All() {
-			enc.edge(node, enc.value(v), "%s", k)
-			if enc.valLimit <= 0 {
-				break
-			}
-		}
-		return node
-
-	case Tuple:
-		if vd.repeat == nil {
-			label := "Tuple"
-			node := enc.node(label, "")
-			for i, elt := range vd.vs {
-				enc.edge(node, enc.value(elt), "%d", i)
-				if enc.valLimit <= 0 {
-					break
-				}
-			}
-			return node
-		} else {
-			// TODO
-			return enc.node("TODO: Repeat", "")
-		}
-
-	case String:
-		switch vd.kind {
-		case stringExact:
-			return enc.node(fmt.Sprintf("%q", vd.exact), "")
-		case stringRegex:
-			var parts []string
-			for _, re := range vd.re {
-				parts = append(parts, fmt.Sprintf("%q", re))
-			}
-			return enc.node(strings.Join(parts, "&"), "")
-		}
-		panic("bad String kind")
-
-	case Var:
-		return enc.node(fmt.Sprintf("Var %s", enc.idp.unique(vd.id)), "")
-	}
-}
-
-func (enc *dotEncoder) envSubgraph(e envSet) {
-	enc.valLimit = maxNodes
-	cID := enc.newID("cluster_%d")
-	fmt.Fprintf(enc.w, "subgraph %s {\n", cID)
-	fmt.Fprintf(enc.w, "style=invis;")
-	vID := enc.env(e.root)
-	fmt.Fprintf(enc.w, "}\n")
-	_, _ = cID, vID
-}
-
-func (enc *dotEncoder) env(e *envExpr) string {
-	switch e.kind {
-	default:
-		panic("bad kind")
-	case envZero:
-		return enc.node("0", "")
-	case envUnit:
-		return enc.node("1", "")
-	case envBinding:
-		node := enc.node(fmt.Sprintf("%q :", enc.idp.unique(e.id)), "")
-		enc.edge(node, enc.value(e.val), "")
-		return node
-	case envProduct:
-		node := enc.node("⨯", "")
-		for _, op := range e.operands {
-			enc.edge(node, enc.env(op), "")
-		}
-		return node
-	case envSum:
-		node := enc.node("+", "")
-		for _, op := range e.operands {
-			enc.edge(node, enc.env(op), "")
-		}
-		return node
-	}
-}
diff --git a/internal/unify/env.go b/internal/unify/env.go
deleted file mode 100644
index 3331ff79..00000000
--- a/internal/unify/env.go
+++ /dev/null
@@ -1,480 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"fmt"
-	"iter"
-	"reflect"
-	"strings"
-)
-
-// An envSet is an immutable set of environments, where each environment is a
-// mapping from [ident]s to [Value]s.
-//
-// To keep this compact, we use an algebraic representation similar to
-// relational algebra. The atoms are zero, unit, or a singular binding:
-//
-// - A singular binding is an environment set consisting of a single environment
-// that binds a single ident to a single value.
-//
-// - Zero is the empty set.
-//
-// - Unit is an environment set consisting of a single, empty environment (no
-// bindings).
-//
-// From these, we build up more complex sets of environments using sums and
-// cross products:
-//
-// - A sum is simply the union of the two environment sets.
-//
-// - A cross product is the Cartesian product of the two environment sets,
-// followed by combining each pair of environments. Combining simply merges the
-// two mappings, but fails if the mappings overlap.
-//
-// For example, to represent {{x: 1, y: 1}, {x: 2, y: 2}}, we build the two
-// environments and sum them:
-//
-//	({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2})
-//
-// If we add a third variable z that can be 1 or 2, independent of x and y, we
-// get four logical environments:
-//
-//	{x: 1, y: 1, z: 1}
-//	{x: 2, y: 2, z: 1}
-//	{x: 1, y: 1, z: 2}
-//	{x: 2, y: 2, z: 2}
-//
-// This could be represented as a sum of all four environments, but because z is
-// independent, we can use a more compact representation:
-//
-//	(({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2})) ⨯ ({z: 1} + {z: 2})
-//
-// Environment sets obey commutative algebra rules:
-//
-//	e + 0 = e
-//	e ⨯ 0 = 0
-//	e ⨯ 1 = e
-//	e + f = f + e
-//	e ⨯ f = f ⨯ e
-type envSet struct {
-	root *envExpr
-}
-
-type envExpr struct {
-	// TODO: A tree-based data structure for this may not be ideal, since it
-	// involves a lot of walking to find things and we often have to do deep
-	// rewrites anyway for partitioning. Would some flattened array-style
-	// representation be better, possibly combined with an index of ident uses?
-	// We could even combine that with an immutable array abstraction (ala
-	// Clojure) that could enable more efficient construction operations.
-
-	kind envExprKind
-
-	// For envBinding
-	id  *ident
-	val *Value
-
-	// For sum or product. Len must be >= 2 and none of the elements can have
-	// the same kind as this node.
-	operands []*envExpr
-}
-
-type envExprKind byte
-
-const (
-	envZero envExprKind = iota
-	envUnit
-	envProduct
-	envSum
-	envBinding
-)
-
-var (
-	// topEnv is the unit value (multiplicative identity) of a [envSet].
-	topEnv = envSet{envExprUnit}
-	// bottomEnv is the zero value (additive identity) of a [envSet].
-	bottomEnv = envSet{envExprZero}
-
-	envExprZero = &envExpr{kind: envZero}
-	envExprUnit = &envExpr{kind: envUnit}
-)
-
-// bind binds id to each of vals in e.
-//
-// Its panics if id is already bound in e.
-//
-// Environments are typically initially constructed by starting with [topEnv]
-// and calling bind one or more times.
-func (e envSet) bind(id *ident, vals ...*Value) envSet {
-	if e.isEmpty() {
-		return bottomEnv
-	}
-
-	// TODO: If any of vals are _, should we just drop that val? We're kind of
-	// inconsistent about whether an id missing from e means id is invalid or
-	// means id is _.
-
-	// Check that id isn't present in e.
-	for range e.root.bindings(id) {
-		panic("id " + id.name + " already present in environment")
-	}
-
-	// Create a sum of all the values.
-	bindings := make([]*envExpr, 0, 1)
-	for _, val := range vals {
-		bindings = append(bindings, &envExpr{kind: envBinding, id: id, val: val})
-	}
-
-	// Multiply it in.
-	return envSet{newEnvExprProduct(e.root, newEnvExprSum(bindings...))}
-}
-
-func (e envSet) isEmpty() bool {
-	return e.root.kind == envZero
-}
-
-// bindings yields all [envBinding] nodes in e with the given id. If id is nil,
-// it yields all binding nodes.
-func (e *envExpr) bindings(id *ident) iter.Seq[*envExpr] {
-	// This is just a pre-order walk and it happens this is the only thing we
-	// need a pre-order walk for.
-	return func(yield func(*envExpr) bool) {
-		var rec func(e *envExpr) bool
-		rec = func(e *envExpr) bool {
-			if e.kind == envBinding && (id == nil || e.id == id) {
-				if !yield(e) {
-					return false
-				}
-			}
-			for _, o := range e.operands {
-				if !rec(o) {
-					return false
-				}
-			}
-			return true
-		}
-		rec(e)
-	}
-}
-
-// newEnvExprProduct constructs a product node from exprs, performing
-// simplifications. It does NOT check that bindings are disjoint.
-func newEnvExprProduct(exprs ...*envExpr) *envExpr {
-	factors := make([]*envExpr, 0, 2)
-	for _, expr := range exprs {
-		switch expr.kind {
-		case envZero:
-			return envExprZero
-		case envUnit:
-			// No effect on product
-		case envProduct:
-			factors = append(factors, expr.operands...)
-		default:
-			factors = append(factors, expr)
-		}
-	}
-
-	if len(factors) == 0 {
-		return envExprUnit
-	} else if len(factors) == 1 {
-		return factors[0]
-	}
-	return &envExpr{kind: envProduct, operands: factors}
-}
-
-// newEnvExprSum constructs a sum node from exprs, performing simplifications.
-func newEnvExprSum(exprs ...*envExpr) *envExpr {
-	// TODO: If all of envs are products (or bindings), factor any common terms.
-	// E.g., x * y + x * z ==> x * (y + z). This is easy to do for binding
-	// terms, but harder to do for more general terms.
-
-	var have smallSet[*envExpr]
-	terms := make([]*envExpr, 0, 2)
-	for _, expr := range exprs {
-		switch expr.kind {
-		case envZero:
-			// No effect on sum
-		case envSum:
-			for _, expr1 := range expr.operands {
-				if have.Add(expr1) {
-					terms = append(terms, expr1)
-				}
-			}
-		default:
-			if have.Add(expr) {
-				terms = append(terms, expr)
-			}
-		}
-	}
-
-	if len(terms) == 0 {
-		return envExprZero
-	} else if len(terms) == 1 {
-		return terms[0]
-	}
-	return &envExpr{kind: envSum, operands: terms}
-}
-
-func crossEnvs(env1, env2 envSet) envSet {
-	// Confirm that envs have disjoint idents.
-	var ids1 smallSet[*ident]
-	for e := range env1.root.bindings(nil) {
-		ids1.Add(e.id)
-	}
-	for e := range env2.root.bindings(nil) {
-		if ids1.Has(e.id) {
-			panic(fmt.Sprintf("%s bound on both sides of cross-product", e.id.name))
-		}
-	}
-
-	return envSet{newEnvExprProduct(env1.root, env2.root)}
-}
-
-func unionEnvs(envs ...envSet) envSet {
-	exprs := make([]*envExpr, len(envs))
-	for i := range envs {
-		exprs[i] = envs[i].root
-	}
-	return envSet{newEnvExprSum(exprs...)}
-}
-
-// envPartition is a subset of an env where id is bound to value in all
-// deterministic environments.
-type envPartition struct {
-	id    *ident
-	value *Value
-	env   envSet
-}
-
-// partitionBy splits e by distinct bindings of id and removes id from each
-// partition.
-//
-// If there are environments in e where id is not bound, they will not be
-// reflected in any partition.
-//
-// It panics if e is bottom, since attempting to partition an empty environment
-// set almost certainly indicates a bug.
-func (e envSet) partitionBy(id *ident) []envPartition {
-	if e.isEmpty() {
-		// We could return zero partitions, but getting here at all almost
-		// certainly indicates a bug.
-		panic("cannot partition empty environment set")
-	}
-
-	// Emit a partition for each value of id.
-	var seen smallSet[*Value]
-	var parts []envPartition
-	for n := range e.root.bindings(id) {
-		if !seen.Add(n.val) {
-			// Already emitted a partition for this value.
-			continue
-		}
-
-		parts = append(parts, envPartition{
-			id:    id,
-			value: n.val,
-			env:   envSet{e.root.substitute(id, n.val)},
-		})
-	}
-
-	return parts
-}
-
-// substitute replaces bindings of id to val with 1 and bindings of id to any
-// other value with 0 and simplifies the result.
-func (e *envExpr) substitute(id *ident, val *Value) *envExpr {
-	switch e.kind {
-	default:
-		panic("bad kind")
-
-	case envZero, envUnit:
-		return e
-
-	case envBinding:
-		if e.id != id {
-			return e
-		} else if e.val != val {
-			return envExprZero
-		} else {
-			return envExprUnit
-		}
-
-	case envProduct, envSum:
-		// Substitute each operand. Sometimes, this won't change anything, so we
-		// build the new operands list lazily.
-		var nOperands []*envExpr
-		for i, op := range e.operands {
-			nOp := op.substitute(id, val)
-			if nOperands == nil && op != nOp {
-				// Operand diverged; initialize nOperands.
-				nOperands = make([]*envExpr, 0, len(e.operands))
-				nOperands = append(nOperands, e.operands[:i]...)
-			}
-			if nOperands != nil {
-				nOperands = append(nOperands, nOp)
-			}
-		}
-		if nOperands == nil {
-			// Nothing changed.
-			return e
-		}
-		if e.kind == envProduct {
-			return newEnvExprProduct(nOperands...)
-		} else {
-			return newEnvExprSum(nOperands...)
-		}
-	}
-}
-
-// A smallSet is a set optimized for stack allocation when small.
-type smallSet[T comparable] struct {
-	array [32]T
-	n     int
-
-	m map[T]struct{}
-}
-
-// Has returns whether val is in set.
-func (s *smallSet[T]) Has(val T) bool {
-	arr := s.array[:s.n]
-	for i := range arr {
-		if arr[i] == val {
-			return true
-		}
-	}
-	_, ok := s.m[val]
-	return ok
-}
-
-// Add adds val to the set and returns true if it was added (not already
-// present).
-func (s *smallSet[T]) Add(val T) bool {
-	// Test for presence.
-	if s.Has(val) {
-		return false
-	}
-
-	// Add it
-	if s.n < len(s.array) {
-		s.array[s.n] = val
-		s.n++
-	} else {
-		if s.m == nil {
-			s.m = make(map[T]struct{})
-		}
-		s.m[val] = struct{}{}
-	}
-	return true
-}
-
-type ident struct {
-	_    [0]func() // Not comparable (only compare *ident)
-	name string
-}
-
-type Var struct {
-	id *ident
-}
-
-func (d Var) Exact() bool {
-	// These can't appear in concrete Values.
-	panic("Exact called on non-concrete Value")
-}
-
-func (d Var) WhyNotExact() string {
-	// These can't appear in concrete Values.
-	return "WhyNotExact called on non-concrete Value"
-}
-
-func (d Var) decode(rv reflect.Value) error {
-	return &inexactError{"var", rv.Type().String()}
-}
-
-func (d Var) unify(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
-	// TODO: Vars from !sums in the input can have a huge number of values.
-	// Unifying these could be way more efficient with some indexes over any
-	// exact values we can pull out, like Def fields that are exact Strings.
-	// Maybe we try to produce an array of yes/no/maybe matches and then we only
-	// have to do deeper evaluation of the maybes. We could probably cache this
-	// on an envTerm. It may also help to special-case Var/Var unification to
-	// pick which one to index versus enumerate.
-
-	if vd, ok := w.Domain.(Var); ok && d.id == vd.id {
-		// Unifying $x with $x results in $x. If we descend into this we'll have
-		// problems because we strip $x out of the environment to keep ourselves
-		// honest and then can't find it on the other side.
-		//
-		// TODO: I'm not positive this is the right fix.
-		return vd, e, nil
-	}
-
-	// We need to unify w with the value of d in each possible environment. We
-	// can save some work by grouping environments by the value of d, since
-	// there will be a lot of redundancy here.
-	var nEnvs []envSet
-	envParts := e.partitionBy(d.id)
-	for i, envPart := range envParts {
-		exit := uf.enterVar(d.id, i)
-		// Each branch logically gets its own copy of the initial environment
-		// (narrowed down to just this binding of the variable), and each branch
-		// may result in different changes to that starting environment.
-		res, e2, err := w.unify(envPart.value, envPart.env, swap, uf)
-		exit.exit()
-		if err != nil {
-			return nil, envSet{}, err
-		}
-		if res.Domain == nil {
-			// This branch entirely failed to unify, so it's gone.
-			continue
-		}
-		nEnv := e2.bind(d.id, res)
-		nEnvs = append(nEnvs, nEnv)
-	}
-
-	if len(nEnvs) == 0 {
-		// All branches failed
-		return nil, bottomEnv, nil
-	}
-
-	// The effect of this is entirely captured in the environment. We can return
-	// back the same Bind node.
-	return d, unionEnvs(nEnvs...), nil
-}
-
-// An identPrinter maps [ident]s to unique string names.
-type identPrinter struct {
-	ids   map[*ident]string
-	idGen map[string]int
-}
-
-func (p *identPrinter) unique(id *ident) string {
-	if p.ids == nil {
-		p.ids = make(map[*ident]string)
-		p.idGen = make(map[string]int)
-	}
-
-	name, ok := p.ids[id]
-	if !ok {
-		gen := p.idGen[id.name]
-		p.idGen[id.name]++
-		if gen == 0 {
-			name = id.name
-		} else {
-			name = fmt.Sprintf("%s#%d", id.name, gen)
-		}
-		p.ids[id] = name
-	}
-
-	return name
-}
-
-func (p *identPrinter) slice(ids []*ident) string {
-	var strs []string
-	for _, id := range ids {
-		strs = append(strs, p.unique(id))
-	}
-	return fmt.Sprintf("[%s]", strings.Join(strs, ", "))
-}
diff --git a/internal/unify/html.go b/internal/unify/html.go
deleted file mode 100644
index 036b80e2..00000000
--- a/internal/unify/html.go
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"fmt"
-	"html"
-	"io"
-	"strings"
-)
-
-func (t *tracer) writeHTML(w io.Writer) {
-	if !t.saveTree {
-		panic("writeHTML called without tracer.saveTree")
-	}
-
-	fmt.Fprintf(w, "<html><head><style>%s</style></head>", htmlCSS)
-	for _, root := range t.trees {
-		dot := newDotEncoder()
-		html := htmlTracer{w: w, dot: dot}
-		html.writeTree(root)
-	}
-	fmt.Fprintf(w, "</html>\n")
-}
-
-const htmlCSS = `
-.unify {
-	display: grid;
-	grid-auto-columns: min-content;
-	text-align: center;
-}
-
-.header {
-	grid-row: 1;
-	font-weight: bold;
-	padding: 0.25em;
-	position: sticky;
-	top: 0;
-	background: white;
-}
-
-.envFactor {
-	display: grid;
-	grid-auto-rows: min-content;
-	grid-template-columns: subgrid;
-	text-align: center;
-}
-`
-
-type htmlTracer struct {
-	w    io.Writer
-	dot  *dotEncoder
-	svgs map[any]string
-}
-
-func (t *htmlTracer) writeTree(node *traceTree) {
-	// TODO: This could be really nice.
-	//
-	// - Put nodes that were unified on the same rank with {rank=same; a; b}
-	//
-	// - On hover, highlight nodes that node was unified with and the result. If
-	// it's a variable, highlight it in the environment, too.
-	//
-	// - On click, show the details of unifying that node.
-	//
-	// This could be the only way to navigate, without necessarily needing the
-	// whole nest of <detail> nodes.
-
-	// TODO: It might be possible to write this out on the fly.
-
-	t.emit([]*Value{node.v, node.w}, []string{"v", "w"}, node.envIn)
-
-	// Render children.
-	for i, child := range node.children {
-		if i >= 10 {
-			fmt.Fprintf(t.w, `<div style="margin-left: 4em">...</div>`)
-			break
-		}
-		fmt.Fprintf(t.w, `<details style="margin-left: 4em"><summary>%s</summary>`, html.EscapeString(child.label))
-		t.writeTree(child)
-		fmt.Fprintf(t.w, "</details>\n")
-	}
-
-	// Render result.
-	if node.err != nil {
-		fmt.Fprintf(t.w, "Error: %s\n", html.EscapeString(node.err.Error()))
-	} else {
-		t.emit([]*Value{node.res}, []string{"res"}, node.env)
-	}
-}
-
-func htmlSVG[Key comparable](t *htmlTracer, f func(Key), arg Key) string {
-	if s, ok := t.svgs[arg]; ok {
-		return s
-	}
-	var buf strings.Builder
-	f(arg)
-	t.dot.writeSvg(&buf)
-	t.dot.clear()
-	svg := buf.String()
-	if t.svgs == nil {
-		t.svgs = make(map[any]string)
-	}
-	t.svgs[arg] = svg
-	buf.Reset()
-	return svg
-}
-
-func (t *htmlTracer) emit(vs []*Value, labels []string, env envSet) {
-	fmt.Fprintf(t.w, `<div class="unify">`)
-	for i, v := range vs {
-		fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">%s</div>`, i+1, html.EscapeString(labels[i]))
-		fmt.Fprintf(t.w, `<div style="grid-area: 2 / %d">%s</div>`, i+1, htmlSVG(t, t.dot.valueSubgraph, v))
-	}
-	col := len(vs)
-
-	fmt.Fprintf(t.w, `<div class="header" style="grid-column: %d">in</div>`, col+1)
-	fmt.Fprintf(t.w, `<div style="grid-area: 2 / %d">%s</div>`, col+1, htmlSVG(t, t.dot.envSubgraph, env))
-
-	fmt.Fprintf(t.w, `</div>`)
-}
diff --git a/internal/unify/pos.go b/internal/unify/pos.go
deleted file mode 100644
index 4f7046a4..00000000
--- a/internal/unify/pos.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"fmt"
-)
-
-type Pos struct {
-	Path string
-	Line int
-}
-
-func (p Pos) String() string {
-	var b []byte
-	b, _ = p.AppendText(b)
-	return string(b)
-}
-
-func (p Pos) AppendText(b []byte) ([]byte, error) {
-	if p.Line == 0 {
-		if p.Path == "" {
-			return append(b, "?:?"...), nil
-		} else {
-			return append(b, p.Path...), nil
-		}
-	} else if p.Path == "" {
-		return fmt.Appendf(b, "?:%d", p.Line), nil
-	}
-	return fmt.Appendf(b, "%s:%d", p.Path, p.Line), nil
-}
diff --git a/internal/unify/testdata/stress.yaml b/internal/unify/testdata/stress.yaml
deleted file mode 100644
index e4478536..00000000
--- a/internal/unify/testdata/stress.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# In the original representation of environments, this caused an exponential
-# blowup in time and allocation. With that representation, this took about 20
-# seconds on my laptop and had a max RSS of ~12 GB. Big enough to be really
-# noticeable, but not so big it's likely to crash a developer machine. With the
-# better environment representation, it runs almost instantly and has an RSS of
-# ~90 MB.
-unify:
-- !sum
-  - !sum [1, 2]
-  - !sum [3, 4]
-  - !sum [5, 6]
-  - !sum [7, 8]
-  - !sum [9, 10]
-  - !sum [11, 12]
-  - !sum [13, 14]
-  - !sum [15, 16]
-  - !sum [17, 18]
-  - !sum [19, 20]
-  - !sum [21, 22]
-- !sum
-  - !sum [1, 2]
-  - !sum [3, 4]
-  - !sum [5, 6]
-  - !sum [7, 8]
-  - !sum [9, 10]
-  - !sum [11, 12]
-  - !sum [13, 14]
-  - !sum [15, 16]
-  - !sum [17, 18]
-  - !sum [19, 20]
-  - !sum [21, 22]
-all:
-  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
diff --git a/internal/unify/testdata/unify.yaml b/internal/unify/testdata/unify.yaml
deleted file mode 100644
index 131e527c..00000000
--- a/internal/unify/testdata/unify.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-# Basic tests of unification
-
-#
-# Terminals
-#
-
-unify:
-- _
-- _
-want:
-  _
----
-unify:
-- _
-- test
-want:
-  test
----
-unify:
-- test
-- t?est
-want:
-  test
----
-unify:
-- 1
-- 1
-want:
-  1
----
-unify:
-- test
-- foo
-want:
-  _|_
-
-#
-# Tuple
-#
-
----
-unify:
-- [a, b]
-- [a, b]
-want:
-  [a, b]
----
-unify:
-- [a, _]
-- [_, b]
-want:
-  [a, b]
----
-unify:
-- ["ab?c", "de?f"]
-- [ac, def]
-want:
-  [ac, def]
-
-#
-# Repeats
-#
-
----
-unify:
-- !repeat [a]
-- [_]
-want:
-  [a]
----
-unify:
-- !repeat [a]
-- [_, _]
-want:
-  [a, a]
----
-unify:
-- !repeat [a]
-- [b]
-want:
-  _|_
----
-unify:
-- !repeat [xy*]
-- [x, xy, xyy]
-want:
-  [x, xy, xyy]
----
-unify:
-- !repeat [xy*]
-- !repeat ["xz?y*"]
-- [x, xy, xyy]
-want:
-  [x, xy, xyy]
----
-unify:
-- !repeat [!sum [a, b]]
-- [a, b, a]
-all:
-- [a, b, a]
----
-unify:
-- !repeat [!sum [a, b]]
-- !repeat [!sum [b, c]]
-- [b, b, b]
-all:
-- [b, b, b]
----
-unify:
-- !repeat [!sum [a, b]]
-- !repeat [!sum [b, c]]
-- [a]
-all: []
-
-#
-# Def
-#
-
----
-unify:
-- {a: a, b: b}
-- {a: a, b: b}
-want:
-  {a: a, b: b}
----
-unify:
-- {a: a}
-- {b: b}
-want:
-  {a: a, b: b}
-
-#
-# Sum
-#
-
----
-unify:
-- !sum [1, 2]
-- !sum [2, 3]
-all:
-- 2
----
-unify:
-- !sum [{label: a, value: abc}, {label: b, value: def}]
-- !sum [{value: "ab?c", extra: d}, {value: "def?", extra: g}]
-all:
-- {extra: d, label: a, value: abc}
-- {extra: g, label: b, value: def}
----
-# A sum of repeats must deal with different dynamically-created variables in
-# each branch.
-unify:
-- !sum [!repeat [a], !repeat [b]]
-- [a, a, a]
-all:
-- [a, a, a]
----
-unify:
-- !sum [!repeat [a], !repeat [b]]
-- [a, a, b]
-all: []
----
-# Exercise sumEnvs with more than one result
-unify:
-- !sum
-  - [a|b, c|d]
-  - [e, g]
-- [!sum [a, b, e, f], !sum [c, d, g, h]]
-all:
-- [a, c]
-- [a, d]
-- [b, c]
-- [b, d]
-- [e, g]
diff --git a/internal/unify/testdata/vars.yaml b/internal/unify/testdata/vars.yaml
deleted file mode 100644
index fe8a57e4..00000000
--- a/internal/unify/testdata/vars.yaml
+++ /dev/null
@@ -1,175 +0,0 @@
-#
-# Basic tests
-#
-
-name: "basic string"
-unify:
-- $x
-- test
-all:
-- test
----
-name: "basic tuple"
-unify:
-- [$x, $x]
-- [test, test]
-all:
-- [test, test]
----
-name: "three tuples"
-unify:
-- [$x, $x]
-- [test, _]
-- [_, test]
-all:
-- [test, test]
----
-name: "basic def"
-unify:
-- {a: $x, b: $x}
-- {a: test, b: test}
-all:
-- {a: test, b: test}
----
-name: "three defs"
-unify:
-- {a: $x, b: $x}
-- {a: test}
-- {b: test}
-all:
-- {a: test, b: test}
-
-#
-# Bottom tests
-#
-
----
-name: "basic bottom"
-unify:
-- [$x, $x]
-- [test, foo]
-all: []
----
-name: "three-way bottom"
-unify:
-- [$x, $x]
-- [test, _]
-- [_, foo]
-all: []
-
-#
-# Basic sum tests
-#
-
----
-name: "basic sum"
-unify:
-- $x
-- !sum [a, b]
-all:
-- a
-- b
----
-name: "sum of tuples"
-unify:
-- [$x]
-- !sum [[a], [b]]
-all:
-- [a]
-- [b]
----
-name: "acausal sum"
-unify:
-- [_, !sum [a, b]]
-- [$x, $x]
-all:
-- [a, a]
-- [b, b]
-
-#
-# Transitivity tests
-#
-
----
-name: "transitivity"
-unify:
-- [_, _, _, test]
-- [$x, $x,   _,  _]
-- [ _, $x,  $x,  _]
-- [ _,  _,  $x, $x]
-all:
-- [test, test, test, test]
-
-#
-# Multiple vars
-#
-
----
-name: "basic uncorrelated vars"
-unify:
-- - !sum [1, 2]
-  - !sum [3, 4]
-- - $a
-  - $b
-all:
-- [1, 3]
-- [1, 4]
-- [2, 3]
-- [2, 4]
----
-name: "uncorrelated vars"
-unify:
-- - !sum [1, 2]
-  - !sum [3, 4]
-  - !sum [1, 2]
-- - $a
-  - $b
-  - $a
-all:
-- [1, 3, 1]
-- [1, 4, 1]
-- [2, 3, 2]
-- [2, 4, 2]
----
-name: "entangled vars"
-unify:
-- - !sum [[1,2],[3,4]]
-  - !sum [[2,1],[3,4],[4,3]]
-- - [$a, $b]
-  - [$b, $a]
-all:
-- - [1, 2]
-  - [2, 1]
-- - [3, 4]
-  - [4, 3]
-
-#
-# End-to-end examples
-#
-
----
-name: "end-to-end"
-unify:
-- go: Add
-  in:
-  - go: $t
-  - go: $t
-- in: !repeat
-  - !sum
-    - go: Int32x4
-      base: int
-    - go: Uint32x4
-      base: uint
-all:
-- go: Add
-  in:
-  - base: int
-    go: Int32x4
-  - base: int
-    go: Int32x4
-- go: Add
-  in:
-  - base: uint
-    go: Uint32x4
-  - base: uint
-    go: Uint32x4
diff --git a/internal/unify/trace.go b/internal/unify/trace.go
deleted file mode 100644
index b0aa3525..00000000
--- a/internal/unify/trace.go
+++ /dev/null
@@ -1,168 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"fmt"
-	"io"
-	"strings"
-
-	"gopkg.in/yaml.v3"
-)
-
-// debugDotInHTML, if true, includes dot code for all graphs in the HTML. Useful
-// for debugging the dot output itself.
-const debugDotInHTML = false
-
-var Debug struct {
-	// UnifyLog, if non-nil, receives a streaming text trace of unification.
-	UnifyLog io.Writer
-
-	// HTML, if non-nil, writes an HTML trace of unification to HTML.
-	HTML io.Writer
-}
-
-type tracer struct {
-	logw io.Writer
-
-	enc yamlEncoder // Print consistent idents throughout
-
-	saveTree bool // if set, record tree; required for HTML output
-
-	path []string
-
-	node  *traceTree
-	trees []*traceTree
-}
-
-type traceTree struct {
-	label string // Identifies this node as a child of parent
-	v, w  *Value // Unification inputs
-	envIn envSet
-	res   *Value // Unification result
-	env   envSet
-	err   error // or error
-
-	parent   *traceTree
-	children []*traceTree
-}
-
-type tracerExit struct {
-	t    *tracer
-	len  int
-	node *traceTree
-}
-
-func (t *tracer) enter(pat string, vals ...any) tracerExit {
-	if t == nil {
-		return tracerExit{}
-	}
-
-	label := fmt.Sprintf(pat, vals...)
-
-	var p *traceTree
-	if t.saveTree {
-		p = t.node
-		if p != nil {
-			t.node = &traceTree{label: label, parent: p}
-			p.children = append(p.children, t.node)
-		}
-	}
-
-	t.path = append(t.path, label)
-	return tracerExit{t, len(t.path) - 1, p}
-}
-
-func (t *tracer) enterVar(id *ident, branch int) tracerExit {
-	if t == nil {
-		return tracerExit{}
-	}
-
-	// Use the tracer's ident printer
-	return t.enter("Var %s br %d", t.enc.idp.unique(id), branch)
-}
-
-func (te tracerExit) exit() {
-	if te.t == nil {
-		return
-	}
-	te.t.path = te.t.path[:te.len]
-	te.t.node = te.node
-}
-
-func indentf(prefix string, pat string, vals ...any) string {
-	s := fmt.Sprintf(pat, vals...)
-	if len(prefix) == 0 {
-		return s
-	}
-	if !strings.Contains(s, "\n") {
-		return prefix + s
-	}
-
-	indent := prefix
-	if strings.TrimLeft(prefix, " ") != "" {
-		// Prefix has non-space characters in it. Construct an all space-indent.
-		indent = strings.Repeat(" ", len(prefix))
-	}
-	return prefix + strings.ReplaceAll(s, "\n", "\n"+indent)
-}
-
-func yamlf(prefix string, node *yaml.Node) string {
-	b, err := yaml.Marshal(node)
-	if err != nil {
-		return fmt.Sprintf("<marshal failed: %s>", err)
-	}
-	return strings.TrimRight(indentf(prefix, "%s", b), " \n")
-}
-
-func (t *tracer) logf(pat string, vals ...any) {
-	if t == nil || t.logw == nil {
-		return
-	}
-	prefix := fmt.Sprintf("[%s] ", strings.Join(t.path, "/"))
-	s := indentf(prefix, pat, vals...)
-	s = strings.TrimRight(s, " \n")
-	fmt.Fprintf(t.logw, "%s\n", s)
-}
-
-func (t *tracer) traceUnify(v, w *Value, e envSet) {
-	if t == nil {
-		return
-	}
-
-	t.logf("Unify\n%s\nwith\n%s\nin\n%s",
-		yamlf("  ", t.enc.value(v)),
-		yamlf("  ", t.enc.value(w)),
-		yamlf("  ", t.enc.env(e)))
-
-	if t.saveTree {
-		if t.node == nil {
-			t.node = &traceTree{}
-			t.trees = append(t.trees, t.node)
-		}
-		t.node.v, t.node.w, t.node.envIn = v, w, e
-	}
-}
-
-func (t *tracer) traceDone(res *Value, e envSet, err error) {
-	if t == nil {
-		return
-	}
-
-	if err != nil {
-		t.logf("==> %s", err)
-	} else {
-		t.logf("==>\n%s", yamlf("  ", t.enc.closure(Closure{res, e})))
-	}
-
-	if t.saveTree {
-		node := t.node
-		if node == nil {
-			panic("popped top of trace stack")
-		}
-		node.res, node.err = res, err
-		node.env = e
-	}
-}
diff --git a/internal/unify/unify.go b/internal/unify/unify.go
deleted file mode 100644
index 9d22bf19..00000000
--- a/internal/unify/unify.go
+++ /dev/null
@@ -1,322 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package unify implements unification of structured values.
-//
-// A [Value] represents a possibly infinite set of concrete values, where a
-// value is either a string ([String]), a tuple of values ([Tuple]), or a
-// string-keyed map of values called a "def" ([Def]). These sets can be further
-// constrained by variables ([Var]). A [Value] combined with bindings of
-// variables is a [Closure].
-//
-// [Unify] finds a [Closure] that satisfies two or more other [Closure]s. This
-// can be thought of as intersecting the sets represented by these Closures'
-// values, or as the greatest lower bound/infimum of these Closures. If no such
-// Closure exists, the result of unification is "bottom", or the empty set.
-//
-// # Examples
-//
-// The regular expression "a*" is the infinite set of strings of zero or more
-// "a"s. "a*" can be unified with "a" or "aa" or "aaa", and the result is just
-// "a", "aa", or "aaa", respectively. However, unifying "a*" with "b" fails
-// because there are no values that satisfy both.
-//
-// Sums express sets directly. For example, !sum [a, b] is the set consisting of
-// "a" and "b". Unifying this with !sum [b, c] results in just "b". This also
-// makes it easy to demonstrate that unification isn't necessarily a single
-// concrete value. For example, unifying !sum [a, b, c] with !sum [b, c, d]
-// results in two concrete values: "b" and "c".
-//
-// The special value _ or "top" represents all possible values. Unifying _ with
-// any value x results in x.
-//
-// Unifying composite values—tuples and defs—unifies their elements.
-//
-// The value [a*, aa] is an infinite set of tuples. If we unify that with the
-// value [aaa, a*], the only possible value that satisfies both is [aaa, aa].
-// Likewise, this is the intersection of the sets described by these two values.
-//
-// Defs are similar to tuples, but they are indexed by strings and don't have a
-// fixed length. For example, {x: a, y: b} is a def with two fields. Any field
-// not mentioned in a def is implicitly top. Thus, unifying this with {y: b, z:
-// c} results in {x: a, y: b, z: c}.
-//
-// Variables constrain values. For example, the value [$x, $x] represents all
-// tuples whose first and second values are the same, but doesn't otherwise
-// constrain that value. Thus, this set includes [a, a] as well as [[b, c, d],
-// [b, c, d]], but it doesn't include [a, b].
-//
-// Sums are internally implemented as fresh variables that are simultaneously
-// bound to all values of the sum. That is !sum [a, b] is actually $var (where
-// var is some fresh name), closed under the environment $var=a | $var=b.
-package unify
-
-import (
-	"errors"
-	"fmt"
-	"slices"
-)
-
-// Unify computes a Closure that satisfies each input Closure. If no such
-// Closure exists, it returns bottom.
-func Unify(closures ...Closure) (Closure, error) {
-	if len(closures) == 0 {
-		return Closure{topValue, topEnv}, nil
-	}
-
-	var trace *tracer
-	if Debug.UnifyLog != nil || Debug.HTML != nil {
-		trace = &tracer{
-			logw:     Debug.UnifyLog,
-			saveTree: Debug.HTML != nil,
-		}
-	}
-
-	unified := closures[0]
-	for _, c := range closures[1:] {
-		var err error
-		uf := newUnifier()
-		uf.tracer = trace
-		e := crossEnvs(unified.env, c.env)
-		unified.val, unified.env, err = unified.val.unify(c.val, e, false, uf)
-		if Debug.HTML != nil {
-			uf.writeHTML(Debug.HTML)
-		}
-		if err != nil {
-			return Closure{}, err
-		}
-	}
-
-	return unified, nil
-}
-
-type unifier struct {
-	*tracer
-}
-
-func newUnifier() *unifier {
-	return &unifier{}
-}
-
-// errDomains is a sentinel error used between unify and unify1 to indicate that
-// unify1 could not unify the domains of the two values.
-var errDomains = errors.New("cannot unify domains")
-
-func (v *Value) unify(w *Value, e envSet, swap bool, uf *unifier) (*Value, envSet, error) {
-	if swap {
-		// Put the values in order. This just happens to be a handy choke-point
-		// to do this at.
-		v, w = w, v
-	}
-
-	uf.traceUnify(v, w, e)
-
-	d, e2, err := v.unify1(w, e, false, uf)
-	if err == errDomains {
-		// Try the other order.
-		d, e2, err = w.unify1(v, e, true, uf)
-		if err == errDomains {
-			// Okay, we really can't unify these.
-			err = fmt.Errorf("cannot unify %T (%s) and %T (%s): kind mismatch", v.Domain, v.PosString(), w.Domain, w.PosString())
-		}
-	}
-	if err != nil {
-		uf.traceDone(nil, envSet{}, err)
-		return nil, envSet{}, err
-	}
-	res := unified(d, v, w)
-	uf.traceDone(res, e2, nil)
-	if d == nil {
-		// Double check that a bottom Value also has a bottom env.
-		if !e2.isEmpty() {
-			panic("bottom Value has non-bottom environment")
-		}
-	}
-
-	return res, e2, nil
-}
-
-func (v *Value) unify1(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
-	// TODO: If there's an error, attach position information to it.
-
-	vd, wd := v.Domain, w.Domain
-
-	// Bottom returns bottom, and eliminates all possible environments.
-	if vd == nil || wd == nil {
-		return nil, bottomEnv, nil
-	}
-
-	// Top always returns the other.
-	if _, ok := vd.(Top); ok {
-		return wd, e, nil
-	}
-
-	// Variables
-	if vd, ok := vd.(Var); ok {
-		return vd.unify(w, e, swap, uf)
-	}
-
-	// Composite values
-	if vd, ok := vd.(Def); ok {
-		if wd, ok := wd.(Def); ok {
-			return vd.unify(wd, e, swap, uf)
-		}
-	}
-	if vd, ok := vd.(Tuple); ok {
-		if wd, ok := wd.(Tuple); ok {
-			return vd.unify(wd, e, swap, uf)
-		}
-	}
-
-	// Scalar values
-	if vd, ok := vd.(String); ok {
-		if wd, ok := wd.(String); ok {
-			res := vd.unify(wd)
-			if res == nil {
-				e = bottomEnv
-			}
-			return res, e, nil
-		}
-	}
-
-	return nil, envSet{}, errDomains
-}
-
-func (d Def) unify(o Def, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
-	out := Def{fields: make(map[string]*Value)}
-
-	// Check keys of d against o.
-	for key, dv := range d.All() {
-		ov, ok := o.fields[key]
-		if !ok {
-			// ov is implicitly Top. Bypass unification.
-			out.fields[key] = dv
-			continue
-		}
-		exit := uf.enter("%s", key)
-		res, e2, err := dv.unify(ov, e, swap, uf)
-		exit.exit()
-		if err != nil {
-			return nil, envSet{}, err
-		} else if res.Domain == nil {
-			// No match.
-			return nil, bottomEnv, nil
-		}
-		out.fields[key] = res
-		e = e2
-	}
-	// Check keys of o that we didn't already check. These all implicitly match
-	// because we know the corresponding fields in d are all Top.
-	for key, dv := range o.All() {
-		if _, ok := d.fields[key]; !ok {
-			out.fields[key] = dv
-		}
-	}
-	return out, e, nil
-}
-
-func (v Tuple) unify(w Tuple, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
-	if v.repeat != nil && w.repeat != nil {
-		// Since we generate the content of these lazily, there's not much we
-		// can do but just stick them on a list to unify later.
-		return Tuple{repeat: concat(v.repeat, w.repeat)}, e, nil
-	}
-
-	// Expand any repeated tuples.
-	tuples := make([]Tuple, 0, 2)
-	if v.repeat == nil {
-		tuples = append(tuples, v)
-	} else {
-		v2, e2 := v.doRepeat(e, len(w.vs))
-		tuples = append(tuples, v2...)
-		e = e2
-	}
-	if w.repeat == nil {
-		tuples = append(tuples, w)
-	} else {
-		w2, e2 := w.doRepeat(e, len(v.vs))
-		tuples = append(tuples, w2...)
-		e = e2
-	}
-
-	// Now unify all of the tuples (usually this will be just 2 tuples)
-	out := tuples[0]
-	for _, t := range tuples[1:] {
-		if len(out.vs) != len(t.vs) {
-			uf.logf("tuple length mismatch")
-			return nil, bottomEnv, nil
-		}
-		zs := make([]*Value, len(out.vs))
-		for i, v1 := range out.vs {
-			exit := uf.enter("%d", i)
-			z, e2, err := v1.unify(t.vs[i], e, swap, uf)
-			exit.exit()
-			if err != nil {
-				return nil, envSet{}, err
-			} else if z.Domain == nil {
-				return nil, bottomEnv, nil
-			}
-			zs[i] = z
-			e = e2
-		}
-		out = Tuple{vs: zs}
-	}
-
-	return out, e, nil
-}
-
-// doRepeat creates a fixed-length tuple from a repeated tuple. The caller is
-// expected to unify the returned tuples.
-func (v Tuple) doRepeat(e envSet, n int) ([]Tuple, envSet) {
-	res := make([]Tuple, len(v.repeat))
-	for i, gen := range v.repeat {
-		res[i].vs = make([]*Value, n)
-		for j := range n {
-			res[i].vs[j], e = gen(e)
-		}
-	}
-	return res, e
-}
-
-// unify intersects the domains of two [String]s. If it can prove that this
-// domain is empty, it returns nil (bottom).
-//
-// TODO: Consider splitting literals and regexps into two domains.
-func (v String) unify(w String) Domain {
-	// Unification is symmetric, so put them in order of string kind so we only
-	// have to deal with half the cases.
-	if v.kind > w.kind {
-		v, w = w, v
-	}
-
-	switch v.kind {
-	case stringRegex:
-		switch w.kind {
-		case stringRegex:
-			// Construct a match against all of the regexps
-			return String{kind: stringRegex, re: slices.Concat(v.re, w.re)}
-		case stringExact:
-			for _, re := range v.re {
-				if !re.MatchString(w.exact) {
-					return nil
-				}
-			}
-			return w
-		}
-	case stringExact:
-		if v.exact != w.exact {
-			return nil
-		}
-		return v
-	}
-	panic("bad string kind")
-}
-
-func concat[T any](s1, s2 []T) []T {
-	// Reuse s1 or s2 if possible.
-	if len(s1) == 0 {
-		return s2
-	}
-	return append(s1[:len(s1):len(s1)], s2...)
-}
diff --git a/internal/unify/unify_test.go b/internal/unify/unify_test.go
deleted file mode 100644
index 8071e0c9..00000000
--- a/internal/unify/unify_test.go
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"bytes"
-	"fmt"
-	"io"
-	"os"
-	"path/filepath"
-	"slices"
-	"strings"
-	"testing"
-
-	"gopkg.in/yaml.v3"
-)
-
-func TestUnify(t *testing.T) {
-	paths, err := filepath.Glob("testdata/*")
-	if err != nil {
-		t.Fatal(err)
-	}
-	if len(paths) == 0 {
-		t.Fatal("no testdata found")
-	}
-	for _, path := range paths {
-		// Skip paths starting with _ so experimental files can be added.
-		base := filepath.Base(path)
-		if base[0] == '_' {
-			continue
-		}
-		if !strings.HasSuffix(base, ".yaml") {
-			t.Errorf("non-.yaml file in testdata: %s", base)
-			continue
-		}
-		base = strings.TrimSuffix(base, ".yaml")
-
-		t.Run(base, func(t *testing.T) {
-			testUnify(t, path)
-		})
-	}
-}
-
-func testUnify(t *testing.T, path string) {
-	f, err := os.Open(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	type testCase struct {
-		Skip  bool
-		Name  string
-		Unify []Closure
-		Want  yaml.Node
-		All   yaml.Node
-	}
-	dec := yaml.NewDecoder(f)
-
-	for i := 0; ; i++ {
-		var tc testCase
-		err := dec.Decode(&tc)
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		name := tc.Name
-		if name == "" {
-			name = fmt.Sprint(i)
-		}
-
-		t.Run(name, func(t *testing.T) {
-			if tc.Skip {
-				t.Skip("skip: true set in test case")
-			}
-
-			defer func() {
-				p := recover()
-				if p != nil || t.Failed() {
-					// Redo with a trace
-					//
-					// TODO: Use t.Output() in Go 1.25.
-					var buf bytes.Buffer
-					Debug.UnifyLog = &buf
-					func() {
-						defer func() {
-							// If the original unify panicked, the second one
-							// probably will, too. Ignore it and let the first panic
-							// bubble.
-							recover()
-						}()
-						Unify(tc.Unify...)
-					}()
-					Debug.UnifyLog = nil
-					t.Logf("Trace:\n%s", buf.String())
-				}
-				if p != nil {
-					panic(p)
-				}
-			}()
-
-			// Unify the test cases
-			//
-			// TODO: Try reordering the inputs also
-			c, err := Unify(tc.Unify...)
-			if err != nil {
-				// TODO: Tests of errors
-				t.Fatal(err)
-			}
-
-			// Encode the result back to YAML so we can check if it's structurally
-			// equal.
-			clean := func(val any) *yaml.Node {
-				var node yaml.Node
-				node.Encode(val)
-				for n := range allYamlNodes(&node) {
-					// Canonicalize the style. There may be other style flags we need to
-					// muck with.
-					n.Style &^= yaml.FlowStyle
-					n.HeadComment = ""
-					n.LineComment = ""
-					n.FootComment = ""
-				}
-				return &node
-			}
-			check := func(gotVal any, wantNode *yaml.Node) {
-				got, err := yaml.Marshal(clean(gotVal))
-				if err != nil {
-					t.Fatalf("Encoding Value back to yaml failed: %s", err)
-				}
-				want, err := yaml.Marshal(clean(wantNode))
-				if err != nil {
-					t.Fatalf("Encoding Want back to yaml failed: %s", err)
-				}
-
-				if !bytes.Equal(got, want) {
-					t.Errorf("%s:%d:\nwant:\n%sgot\n%s", f.Name(), wantNode.Line, want, got)
-				}
-			}
-			if tc.Want.Kind != 0 {
-				check(c.val, &tc.Want)
-			}
-			if tc.All.Kind != 0 {
-				fVal := slices.Collect(c.All())
-				check(fVal, &tc.All)
-			}
-		})
-	}
-}
diff --git a/internal/unify/value.go b/internal/unify/value.go
deleted file mode 100644
index ffc25b87..00000000
--- a/internal/unify/value.go
+++ /dev/null
@@ -1,167 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"fmt"
-	"iter"
-	"reflect"
-)
-
-// A Value represents a structured, non-deterministic value consisting of
-// strings, tuples of Values, and string-keyed maps of Values. A
-// non-deterministic Value will also contain variables, which are resolved via
-// an environment as part of a [Closure].
-//
-// For debugging, a Value can also track the source position it was read from in
-// an input file, and its provenance from other Values.
-type Value struct {
-	Domain Domain
-
-	// A Value has either a pos or parents (or neither).
-	pos     *Pos
-	parents *[2]*Value
-}
-
-var (
-	topValue    = &Value{Domain: Top{}}
-	bottomValue = &Value{Domain: nil}
-)
-
-// NewValue returns a new [Value] with the given domain and no position
-// information.
-func NewValue(d Domain) *Value {
-	return &Value{Domain: d}
-}
-
-// NewValuePos returns a new [Value] with the given domain at position p.
-func NewValuePos(d Domain, p Pos) *Value {
-	return &Value{Domain: d, pos: &p}
-}
-
-// newValueFrom returns a new [Value] with the given domain that copies the
-// position information of p.
-func newValueFrom(d Domain, p *Value) *Value {
-	return &Value{Domain: d, pos: p.pos, parents: p.parents}
-}
-
-func unified(d Domain, p1, p2 *Value) *Value {
-	return &Value{Domain: d, parents: &[2]*Value{p1, p2}}
-}
-
-func (v *Value) Pos() Pos {
-	if v.pos == nil {
-		return Pos{}
-	}
-	return *v.pos
-}
-
-func (v *Value) PosString() string {
-	var b []byte
-	for root := range v.Provenance() {
-		if len(b) > 0 {
-			b = append(b, ' ')
-		}
-		b, _ = root.pos.AppendText(b)
-	}
-	return string(b)
-}
-
-func (v *Value) WhyNotExact() string {
-	if v.Domain == nil {
-		return "v.Domain is nil"
-	}
-	return v.Domain.WhyNotExact()
-}
-
-func (v *Value) Exact() bool {
-	if v.Domain == nil {
-		return false
-	}
-	return v.Domain.Exact()
-}
-
-// Decode decodes v into a Go value.
-//
-// v must be exact, except that it can include Top. into must be a pointer.
-// [Def]s are decoded into structs. [Tuple]s are decoded into slices. [String]s
-// are decoded into strings or ints. Any field can itself be a pointer to one of
-// these types. Top can be decoded into a pointer-typed field and will set the
-// field to nil. Anything else will allocate a value if necessary.
-//
-// Any type may implement [Decoder], in which case its DecodeUnified method will
-// be called instead of using the default decoding scheme.
-func (v *Value) Decode(into any) error {
-	rv := reflect.ValueOf(into)
-	if rv.Kind() != reflect.Pointer {
-		return fmt.Errorf("cannot decode into non-pointer %T", into)
-	}
-	return decodeReflect(v, rv.Elem())
-}
-
-func decodeReflect(v *Value, rv reflect.Value) error {
-	var ptr reflect.Value
-	if rv.Kind() == reflect.Pointer {
-		if rv.IsNil() {
-			// Transparently allocate through pointers, *except* for Top, which
-			// wants to set the pointer to nil.
-			//
-			// TODO: Drop this condition if I switch to an explicit Optional[T]
-			// or move the Top logic into Def.
-			if _, ok := v.Domain.(Top); !ok {
-				// Allocate the value to fill in, but don't actually store it in
-				// the pointer until we successfully decode.
-				ptr = rv
-				rv = reflect.New(rv.Type().Elem()).Elem()
-			}
-		} else {
-			rv = rv.Elem()
-		}
-	}
-
-	var err error
-	if reflect.PointerTo(rv.Type()).Implements(decoderType) {
-		// Use the custom decoder.
-		err = rv.Addr().Interface().(Decoder).DecodeUnified(v)
-	} else {
-		err = v.Domain.decode(rv)
-	}
-	if err == nil && ptr.IsValid() {
-		ptr.Set(rv.Addr())
-	}
-	return err
-}
-
-// Decoder can be implemented by types as a custom implementation of [Decode]
-// for that type.
-type Decoder interface {
-	DecodeUnified(v *Value) error
-}
-
-var decoderType = reflect.TypeOf((*Decoder)(nil)).Elem()
-
-// Provenance iterates over all of the source Values that have contributed to
-// this Value.
-func (v *Value) Provenance() iter.Seq[*Value] {
-	return func(yield func(*Value) bool) {
-		var rec func(d *Value) bool
-		rec = func(d *Value) bool {
-			if d.pos != nil {
-				if !yield(d) {
-					return false
-				}
-			}
-			if d.parents != nil {
-				for _, p := range d.parents {
-					if !rec(p) {
-						return false
-					}
-				}
-			}
-			return true
-		}
-		rec(v)
-	}
-}
diff --git a/internal/unify/value_test.go b/internal/unify/value_test.go
deleted file mode 100644
index 54937c68..00000000
--- a/internal/unify/value_test.go
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"reflect"
-	"slices"
-	"testing"
-)
-
-func ExampleClosure_All_tuple() {
-	v := mustParse(`
-- !sum [1, 2]
-- !sum [3, 4]
-`)
-	printYaml(slices.Collect(v.All()))
-
-	// Output:
-	// - [1, 3]
-	// - [1, 4]
-	// - [2, 3]
-	// - [2, 4]
-}
-
-func ExampleClosure_All_def() {
-	v := mustParse(`
-a: !sum [1, 2]
-b: !sum [3, 4]
-c: 5
-`)
-	printYaml(slices.Collect(v.All()))
-
-	// Output:
-	// - {a: 1, b: 3, c: 5}
-	// - {a: 1, b: 4, c: 5}
-	// - {a: 2, b: 3, c: 5}
-	// - {a: 2, b: 4, c: 5}
-}
-
-func checkDecode[T any](t *testing.T, got *Value, want T) {
-	var gotT T
-	if err := got.Decode(&gotT); err != nil {
-		t.Fatalf("Decode failed: %v", err)
-	}
-	if !reflect.DeepEqual(&gotT, &want) {
-		t.Fatalf("got:\n%s\nwant:\n%s", prettyYaml(gotT), prettyYaml(want))
-	}
-}
diff --git a/internal/unify/yaml.go b/internal/unify/yaml.go
deleted file mode 100644
index dadcd71d..00000000
--- a/internal/unify/yaml.go
+++ /dev/null
@@ -1,619 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"errors"
-	"fmt"
-	"io"
-	"io/fs"
-	"os"
-	"path/filepath"
-	"regexp"
-	"strings"
-
-	"gopkg.in/yaml.v3"
-)
-
-// ReadOpts provides options to [Read] and related functions. The zero value is
-// the default options.
-type ReadOpts struct {
-	// FS, if non-nil, is the file system from which to resolve !import file
-	// names.
-	FS fs.FS
-}
-
-// Read reads a [Closure] in YAML format from r, using path for error messages.
-//
-// It maps YAML nodes into terminal Values as follows:
-//
-// - "_" or !top _ is the top value ([Top]).
-//
-// - "_|_" or !bottom _ is the bottom value. This is an error during
-// unmarshaling, but can appear in marshaled values.
-//
-// - "$<name>" or !var <name> is a variable ([Var]). Everywhere the same name
-// appears within a single unmarshal operation, it is mapped to the same
-// variable. Different unmarshal operations get different variables, even if
-// they have the same string name.
-//
-// - !regex "x" is a regular expression ([String]), as is any string that
-// doesn't match "_", "_|_", or "$...". Regular expressions are implicitly
-// anchored at the beginning and end. If the string doesn't contain any
-// meta-characters (that is, it's a "literal" regular expression), then it's
-// treated as an exact string.
-//
-// - !string "x", or any int, float, bool, or binary value is an exact string
-// ([String]).
-//
-// - !regex [x, y, ...] is an intersection of regular expressions ([String]).
-//
-// It maps YAML nodes into non-terminal Values as follows:
-//
-// - Sequence nodes like [x, y, z] are tuples ([Tuple]).
-//
-// - !repeat [x] is a repeated tuple ([Tuple]), which is 0 or more instances of
-// x. There must be exactly one element in the list.
-//
-// - Mapping nodes like {a: x, b: y} are defs ([Def]). Any fields not listed are
-// implicitly top.
-//
-// - !sum [x, y, z] is a sum of its children. This can be thought of as a union
-// of the values x, y, and z, or as a non-deterministic choice between x, y, and
-// z. If a variable appears both inside the sum and outside of it, only the
-// non-deterministic choice view really works. The unifier does not directly
-// implement sums; instead, this is decoded as a fresh variable that's
-// simultaneously bound to x, y, and z.
-//
-// - !import glob is like a !sum, but its children are read from all files
-// matching the given glob pattern, which is interpreted relative to the current
-// file path. Each file gets its own variable scope.
-func Read(r io.Reader, path string, opts ReadOpts) (Closure, error) {
-	dec := yamlDecoder{opts: opts, path: path, env: topEnv}
-	v, err := dec.read(r)
-	if err != nil {
-		return Closure{}, err
-	}
-	return dec.close(v), nil
-}
-
-// ReadFile reads a [Closure] in YAML format from a file.
-//
-// The file must consist of a single YAML document.
-//
-// If opts.FS is not set, this sets it to a FS rooted at path's directory.
-//
-// See [Read] for details.
-func ReadFile(path string, opts ReadOpts) (Closure, error) {
-	f, err := os.Open(path)
-	if err != nil {
-		return Closure{}, err
-	}
-	defer f.Close()
-
-	if opts.FS == nil {
-		opts.FS = os.DirFS(filepath.Dir(path))
-	}
-
-	return Read(f, path, opts)
-}
-
-// UnmarshalYAML implements [yaml.Unmarshaler].
-//
-// Since there is no way to pass [ReadOpts] to this function, it assumes default
-// options.
-func (c *Closure) UnmarshalYAML(node *yaml.Node) error {
-	dec := yamlDecoder{path: "<yaml.Node>", env: topEnv}
-	v, err := dec.root(node)
-	if err != nil {
-		return err
-	}
-	*c = dec.close(v)
-	return nil
-}
-
-type yamlDecoder struct {
-	opts ReadOpts
-	path string
-
-	vars  map[string]*ident
-	nSums int
-
-	env envSet
-}
-
-func (dec *yamlDecoder) read(r io.Reader) (*Value, error) {
-	n, err := readOneNode(r)
-	if err != nil {
-		return nil, fmt.Errorf("%s: %w", dec.path, err)
-	}
-
-	// Decode YAML node to a Value
-	v, err := dec.root(n)
-	if err != nil {
-		return nil, fmt.Errorf("%s: %w", dec.path, err)
-	}
-
-	return v, nil
-}
-
-// readOneNode reads a single YAML document from r and returns an error if there
-// are more documents in r.
-func readOneNode(r io.Reader) (*yaml.Node, error) {
-	yd := yaml.NewDecoder(r)
-
-	// Decode as a YAML node
-	var node yaml.Node
-	if err := yd.Decode(&node); err != nil {
-		return nil, err
-	}
-	np := &node
-	if np.Kind == yaml.DocumentNode {
-		np = node.Content[0]
-	}
-
-	// Ensure there are no more YAML docs in this file
-	if err := yd.Decode(nil); err == nil {
-		return nil, fmt.Errorf("must not contain multiple documents")
-	} else if err != io.EOF {
-		return nil, err
-	}
-
-	return np, nil
-}
-
-// root parses the root of a file.
-func (dec *yamlDecoder) root(node *yaml.Node) (*Value, error) {
-	// Prepare for variable name resolution in this file. This may be a nested
-	// root, so restore the current values when we're done.
-	oldVars, oldNSums := dec.vars, dec.nSums
-	defer func() {
-		dec.vars, dec.nSums = oldVars, oldNSums
-	}()
-	dec.vars = make(map[string]*ident, 0)
-	dec.nSums = 0
-
-	return dec.value(node)
-}
-
-// close wraps a decoded [Value] into a [Closure].
-func (dec *yamlDecoder) close(v *Value) Closure {
-	return Closure{v, dec.env}
-}
-
-func (dec *yamlDecoder) value(node *yaml.Node) (vOut *Value, errOut error) {
-	pos := &Pos{Path: dec.path, Line: node.Line}
-
-	// Resolve alias nodes.
-	if node.Kind == yaml.AliasNode {
-		node = node.Alias
-	}
-
-	mk := func(d Domain) (*Value, error) {
-		v := &Value{Domain: d, pos: pos}
-		return v, nil
-	}
-	mk2 := func(d Domain, err error) (*Value, error) {
-		if err != nil {
-			return nil, err
-		}
-		return mk(d)
-	}
-
-	// is tests the kind and long tag of node.
-	is := func(kind yaml.Kind, tag string) bool {
-		return node.Kind == kind && node.LongTag() == tag
-	}
-	isExact := func() bool {
-		if node.Kind != yaml.ScalarNode {
-			return false
-		}
-		// We treat any string-ish YAML node as a string.
-		switch node.LongTag() {
-		case "!string", "tag:yaml.org,2002:int", "tag:yaml.org,2002:float", "tag:yaml.org,2002:bool", "tag:yaml.org,2002:binary":
-			return true
-		}
-		return false
-	}
-
-	// !!str nodes provide a short-hand syntax for several leaf domains that are
-	// also available under explicit tags. To simplify checking below, we set
-	// strVal to non-"" only for !!str nodes.
-	strVal := ""
-	isStr := is(yaml.ScalarNode, "tag:yaml.org,2002:str")
-	if isStr {
-		strVal = node.Value
-	}
-
-	switch {
-	case is(yaml.ScalarNode, "!var"):
-		strVal = "$" + node.Value
-		fallthrough
-	case strings.HasPrefix(strVal, "$"):
-		id, ok := dec.vars[strVal]
-		if !ok {
-			// We encode different idents with the same string name by adding a
-			// #N suffix. Strip that off so it doesn't accumulate. This isn't
-			// meant to be used in user-written input, though nothing stops that.
-			name, _, _ := strings.Cut(strVal, "#")
-			id = &ident{name: name}
-			dec.vars[strVal] = id
-			dec.env = dec.env.bind(id, topValue)
-		}
-		return mk(Var{id: id})
-
-	case strVal == "_" || is(yaml.ScalarNode, "!top"):
-		return mk(Top{})
-
-	case strVal == "_|_" || is(yaml.ScalarNode, "!bottom"):
-		return nil, errors.New("found bottom")
-
-	case isExact():
-		val := node.Value
-		return mk(NewStringExact(val))
-
-	case isStr || is(yaml.ScalarNode, "!regex"):
-		// Any other string we treat as a regex. This will produce an exact
-		// string anyway if the regex is literal.
-		val := node.Value
-		return mk2(NewStringRegex(val))
-
-	case is(yaml.SequenceNode, "!regex"):
-		var vals []string
-		if err := node.Decode(&vals); err != nil {
-			return nil, err
-		}
-		return mk2(NewStringRegex(vals...))
-
-	case is(yaml.MappingNode, "tag:yaml.org,2002:map"):
-		var db DefBuilder
-		for i := 0; i < len(node.Content); i += 2 {
-			key := node.Content[i]
-			if key.Kind != yaml.ScalarNode {
-				return nil, fmt.Errorf("non-scalar key %q", key.Value)
-			}
-			val, err := dec.value(node.Content[i+1])
-			if err != nil {
-				return nil, err
-			}
-			db.Add(key.Value, val)
-		}
-		return mk(db.Build())
-
-	case is(yaml.SequenceNode, "tag:yaml.org,2002:seq"):
-		elts := node.Content
-		vs := make([]*Value, 0, len(elts))
-		for _, elt := range elts {
-			v, err := dec.value(elt)
-			if err != nil {
-				return nil, err
-			}
-			vs = append(vs, v)
-		}
-		return mk(NewTuple(vs...))
-
-	case is(yaml.SequenceNode, "!repeat") || is(yaml.SequenceNode, "!repeat-unify"):
-		// !repeat must have one child. !repeat-unify is used internally for
-		// delayed unification, and is the same, it's just allowed to have more
-		// than one child.
-		if node.LongTag() == "!repeat" && len(node.Content) != 1 {
-			return nil, fmt.Errorf("!repeat must have exactly one child")
-		}
-
-		// Decode the children to make sure they're well-formed, but otherwise
-		// discard that decoding and do it again every time we need a new
-		// element.
-		var gen []func(e envSet) (*Value, envSet)
-		origEnv := dec.env
-		elts := node.Content
-		for i, elt := range elts {
-			_, err := dec.value(elt)
-			if err != nil {
-				return nil, err
-			}
-			// Undo any effects on the environment. We *do* keep any named
-			// variables that were added to the vars map in case they were
-			// introduced within the element.
-			dec.env = origEnv
-			// Add a generator function
-			gen = append(gen, func(e envSet) (*Value, envSet) {
-				dec.env = e
-				// TODO: If this is in a sum, this tends to generate a ton of
-				// fresh variables that are different on each branch of the
-				// parent sum. Does it make sense to hold on to the i'th value
-				// of the tuple after we've generated it?
-				v, err := dec.value(elts[i])
-				if err != nil {
-					// It worked the first time, so this really shouldn't hapen.
-					panic("decoding repeat element failed")
-				}
-				return v, dec.env
-			})
-		}
-		return mk(NewRepeat(gen...))
-
-	case is(yaml.SequenceNode, "!sum"):
-		vs := make([]*Value, 0, len(node.Content))
-		for _, elt := range node.Content {
-			v, err := dec.value(elt)
-			if err != nil {
-				return nil, err
-			}
-			vs = append(vs, v)
-		}
-		if len(vs) == 1 {
-			return vs[0], nil
-		}
-
-		// A sum is implemented as a fresh variable that's simultaneously bound
-		// to each of the descendants.
-		id := &ident{name: fmt.Sprintf("sum%d", dec.nSums)}
-		dec.nSums++
-		dec.env = dec.env.bind(id, vs...)
-		return mk(Var{id: id})
-
-	case is(yaml.ScalarNode, "!import"):
-		if dec.opts.FS == nil {
-			return nil, fmt.Errorf("!import not allowed (ReadOpts.FS not set)")
-		}
-		pat := node.Value
-
-		if !fs.ValidPath(pat) {
-			// This will result in Glob returning no results. Give a more useful
-			// error message for this case.
-			return nil, fmt.Errorf("!import path must not contain '.' or '..'")
-		}
-
-		ms, err := fs.Glob(dec.opts.FS, pat)
-		if err != nil {
-			return nil, fmt.Errorf("resolving !import: %w", err)
-		}
-		if len(ms) == 0 {
-			return nil, fmt.Errorf("!import did not match any files")
-		}
-
-		// Parse each file
-		vs := make([]*Value, 0, len(ms))
-		for _, m := range ms {
-			v, err := dec.import1(m)
-			if err != nil {
-				return nil, err
-			}
-			vs = append(vs, v)
-		}
-
-		// Create a sum.
-		if len(vs) == 1 {
-			return vs[0], nil
-		}
-		id := &ident{name: "import"}
-		dec.env = dec.env.bind(id, vs...)
-		return mk(Var{id: id})
-	}
-
-	return nil, fmt.Errorf("unknown node kind %d %v", node.Kind, node.Tag)
-}
-
-func (dec *yamlDecoder) import1(path string) (*Value, error) {
-	// Make sure we can open the path first.
-	f, err := dec.opts.FS.Open(path)
-	if err != nil {
-		return nil, fmt.Errorf("!import failed: %w", err)
-	}
-	defer f.Close()
-
-	// Prepare the enter path.
-	oldFS, oldPath := dec.opts.FS, dec.path
-	defer func() {
-		dec.opts.FS, dec.path = oldFS, oldPath
-	}()
-
-	// Enter path, which is relative to the current path's directory.
-	newPath := filepath.Join(filepath.Dir(dec.path), path)
-	subFS, err := fs.Sub(dec.opts.FS, filepath.Dir(path))
-	if err != nil {
-		return nil, err
-	}
-	dec.opts.FS, dec.path = subFS, newPath
-
-	// Parse the file.
-	return dec.read(f)
-}
-
-type yamlEncoder struct {
-	idp identPrinter
-	e   envSet // We track the environment for !repeat nodes.
-}
-
-// TODO: Switch some Value marshaling to Closure?
-
-func (c Closure) MarshalYAML() (any, error) {
-	// TODO: If the environment is trivial, just marshal the value.
-	enc := &yamlEncoder{}
-	return enc.closure(c), nil
-}
-
-func (c Closure) String() string {
-	b, err := yaml.Marshal(c)
-	if err != nil {
-		return fmt.Sprintf("marshal failed: %s", err)
-	}
-	return string(b)
-}
-
-func (v *Value) MarshalYAML() (any, error) {
-	enc := &yamlEncoder{}
-	return enc.value(v), nil
-}
-
-func (v *Value) String() string {
-	b, err := yaml.Marshal(v)
-	if err != nil {
-		return fmt.Sprintf("marshal failed: %s", err)
-	}
-	return string(b)
-}
-
-func (enc *yamlEncoder) closure(c Closure) *yaml.Node {
-	enc.e = c.env
-	var n yaml.Node
-	n.Kind = yaml.MappingNode
-	n.Tag = "!closure"
-	n.Content = make([]*yaml.Node, 4)
-	n.Content[0] = new(yaml.Node)
-	n.Content[0].SetString("env")
-	n.Content[2] = new(yaml.Node)
-	n.Content[2].SetString("in")
-	n.Content[3] = enc.value(c.val)
-	// Fill in the env after we've written the value in case value encoding
-	// affects the env.
-	n.Content[1] = enc.env(enc.e)
-	enc.e = envSet{} // Allow GC'ing the env
-	return &n
-}
-
-func (enc *yamlEncoder) env(e envSet) *yaml.Node {
-	var encode func(e *envExpr) *yaml.Node
-	encode = func(e *envExpr) *yaml.Node {
-		var n yaml.Node
-		switch e.kind {
-		default:
-			panic("bad kind")
-		case envZero:
-			n.SetString("0")
-		case envUnit:
-			n.SetString("1")
-		case envBinding:
-			var id yaml.Node
-			id.SetString(enc.idp.unique(e.id))
-			n.Kind = yaml.MappingNode
-			n.Content = []*yaml.Node{&id, enc.value(e.val)}
-		case envProduct, envSum:
-			n.Kind = yaml.SequenceNode
-			if e.kind == envProduct {
-				n.Tag = "!product"
-			} else {
-				n.Tag = "!sum"
-			}
-			for _, e2 := range e.operands {
-				n.Content = append(n.Content, encode(e2))
-			}
-		}
-		return &n
-	}
-	return encode(e.root)
-}
-
-var yamlIntRe = regexp.MustCompile(`^-?[0-9]+$`)
-
-func (enc *yamlEncoder) value(v *Value) *yaml.Node {
-	var n yaml.Node
-	switch d := v.Domain.(type) {
-	case nil:
-		// Not allowed by unmarshaler, but useful for understanding when
-		// something goes horribly wrong.
-		//
-		// TODO: We might be able to track useful provenance for this, which
-		// would really help with debugging unexpected bottoms.
-		n.SetString("_|_")
-		return &n
-
-	case Top:
-		n.SetString("_")
-		return &n
-
-	case Def:
-		n.Kind = yaml.MappingNode
-		for k, elt := range d.All() {
-			var kn yaml.Node
-			kn.SetString(k)
-			n.Content = append(n.Content, &kn, enc.value(elt))
-		}
-		n.HeadComment = v.PosString()
-		return &n
-
-	case Tuple:
-		n.Kind = yaml.SequenceNode
-		if d.repeat == nil {
-			for _, elt := range d.vs {
-				n.Content = append(n.Content, enc.value(elt))
-			}
-		} else {
-			if len(d.repeat) == 1 {
-				n.Tag = "!repeat"
-			} else {
-				n.Tag = "!repeat-unify"
-			}
-			// TODO: I'm not positive this will round-trip everything correctly.
-			for _, gen := range d.repeat {
-				v, e := gen(enc.e)
-				enc.e = e
-				n.Content = append(n.Content, enc.value(v))
-			}
-		}
-		return &n
-
-	case String:
-		switch d.kind {
-		case stringExact:
-			n.SetString(d.exact)
-			switch {
-			// Make this into a "nice" !!int node if I can.
-			case yamlIntRe.MatchString(d.exact):
-				n.Tag = "tag:yaml.org,2002:int"
-
-			// Or a "nice" !!bool node.
-			case d.exact == "false" || d.exact == "true":
-				n.Tag = "tag:yaml.org,2002:bool"
-
-			// If this doesn't require escaping, leave it as a str node to avoid
-			// the annoying YAML tags. Otherwise, mark it as an exact string.
-			// Alternatively, we could always emit a str node with regexp
-			// quoting.
-			case d.exact != regexp.QuoteMeta(d.exact):
-				n.Tag = "!string"
-			}
-			return &n
-		case stringRegex:
-			o := make([]string, 0, 1)
-			for _, re := range d.re {
-				s := re.String()
-				s = strings.TrimSuffix(strings.TrimPrefix(s, `\A(?:`), `)\z`)
-				o = append(o, s)
-			}
-			if len(o) == 1 {
-				n.SetString(o[0])
-				return &n
-			}
-			n.Encode(o)
-			n.Tag = "!regex"
-			return &n
-		}
-		panic("bad String kind")
-
-	case Var:
-		// TODO: If Var only appears once in the whole Value and is independent
-		// in the environment (part of a term that is only over Var), then emit
-		// this as a !sum instead.
-		if false {
-			var vs []*Value // TODO: Get values of this var.
-			if len(vs) == 1 {
-				return enc.value(vs[0])
-			}
-			n.Kind = yaml.SequenceNode
-			n.Tag = "!sum"
-			for _, elt := range vs {
-				n.Content = append(n.Content, enc.value(elt))
-			}
-			return &n
-		}
-		n.SetString(enc.idp.unique(d.id))
-		if !strings.HasPrefix(d.id.name, "$") {
-			n.Tag = "!var"
-		}
-		return &n
-	}
-	panic(fmt.Sprintf("unknown domain type %T", v.Domain))
-}
diff --git a/internal/unify/yaml_test.go b/internal/unify/yaml_test.go
deleted file mode 100644
index 4f0aef43..00000000
--- a/internal/unify/yaml_test.go
+++ /dev/null
@@ -1,202 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package unify
-
-import (
-	"bytes"
-	"fmt"
-	"iter"
-	"log"
-	"strings"
-	"testing"
-	"testing/fstest"
-
-	"gopkg.in/yaml.v3"
-)
-
-func mustParse(expr string) Closure {
-	var c Closure
-	if err := yaml.Unmarshal([]byte(expr), &c); err != nil {
-		panic(err)
-	}
-	return c
-}
-
-func oneValue(t *testing.T, c Closure) *Value {
-	t.Helper()
-	var v *Value
-	var i int
-	for v = range c.All() {
-		i++
-	}
-	if i != 1 {
-		t.Fatalf("expected 1 value, got %d", i)
-	}
-	return v
-}
-
-func printYaml(val any) {
-	fmt.Println(prettyYaml(val))
-}
-
-func prettyYaml(val any) string {
-	b, err := yaml.Marshal(val)
-	if err != nil {
-		panic(err)
-	}
-	var node yaml.Node
-	if err := yaml.Unmarshal(b, &node); err != nil {
-		panic(err)
-	}
-
-	// Map lines to start offsets. We'll use this to figure out when nodes are
-	// "small" and should use inline style.
-	lines := []int{-1, 0}
-	for pos := 0; pos < len(b); {
-		next := bytes.IndexByte(b[pos:], '\n')
-		if next == -1 {
-			break
-		}
-		pos += next + 1
-		lines = append(lines, pos)
-	}
-	lines = append(lines, len(b))
-
-	// Strip comments and switch small nodes to inline style
-	cleanYaml(&node, lines, len(b))
-
-	b, err = yaml.Marshal(&node)
-	if err != nil {
-		panic(err)
-	}
-	return string(b)
-}
-
-func cleanYaml(node *yaml.Node, lines []int, endPos int) {
-	node.HeadComment = ""
-	node.FootComment = ""
-	node.LineComment = ""
-
-	for i, n2 := range node.Content {
-		end2 := endPos
-		if i < len(node.Content)-1 {
-			end2 = lines[node.Content[i+1].Line]
-		}
-		cleanYaml(n2, lines, end2)
-	}
-
-	// Use inline style?
-	switch node.Kind {
-	case yaml.MappingNode, yaml.SequenceNode:
-		if endPos-lines[node.Line] < 40 {
-			node.Style = yaml.FlowStyle
-		}
-	}
-}
-
-func allYamlNodes(n *yaml.Node) iter.Seq[*yaml.Node] {
-	return func(yield func(*yaml.Node) bool) {
-		if !yield(n) {
-			return
-		}
-		for _, n2 := range n.Content {
-			for n3 := range allYamlNodes(n2) {
-				if !yield(n3) {
-					return
-				}
-			}
-		}
-	}
-}
-
-func TestRoundTripString(t *testing.T) {
-	// Check that we can round-trip a string with regexp meta-characters in it.
-	const y = `!string test*`
-	t.Logf("input:\n%s", y)
-
-	v1 := oneValue(t, mustParse(y))
-	var buf1 strings.Builder
-	enc := yaml.NewEncoder(&buf1)
-	if err := enc.Encode(v1); err != nil {
-		log.Fatal(err)
-	}
-	enc.Close()
-	t.Logf("after parse 1:\n%s", buf1.String())
-
-	v2 := oneValue(t, mustParse(buf1.String()))
-	var buf2 strings.Builder
-	enc = yaml.NewEncoder(&buf2)
-	if err := enc.Encode(v2); err != nil {
-		log.Fatal(err)
-	}
-	enc.Close()
-	t.Logf("after parse 2:\n%s", buf2.String())
-
-	if buf1.String() != buf2.String() {
-		t.Fatal("parse 1 and parse 2 differ")
-	}
-}
-
-func TestEmptyString(t *testing.T) {
-	// Regression test. Make sure an empty string is parsed as an exact string,
-	// not a regexp.
-	const y = `""`
-	t.Logf("input:\n%s", y)
-
-	v1 := oneValue(t, mustParse(y))
-	if !v1.Exact() {
-		t.Fatal("expected exact string")
-	}
-}
-
-func TestImport(t *testing.T) {
-	// Test a basic import
-	main := strings.NewReader("!import x/y.yaml")
-	fs := fstest.MapFS{
-		// Test a glob import with a relative path
-		"x/y.yaml":   {Data: []byte("!import y/*.yaml")},
-		"x/y/z.yaml": {Data: []byte("42")},
-	}
-	cl, err := Read(main, "x.yaml", ReadOpts{FS: fs})
-	if err != nil {
-		t.Fatal(err)
-	}
-	x := 42
-	checkDecode(t, oneValue(t, cl), &x)
-}
-
-func TestImportEscape(t *testing.T) {
-	// Make sure an import can't escape its subdirectory.
-	main := strings.NewReader("!import x/y.yaml")
-	fs := fstest.MapFS{
-		"x/y.yaml": {Data: []byte("!import ../y/*.yaml")},
-		"y/z.yaml": {Data: []byte("42")},
-	}
-	_, err := Read(main, "x.yaml", ReadOpts{FS: fs})
-	if err == nil {
-		t.Fatal("relative !import should have failed")
-	}
-	if !strings.Contains(err.Error(), "must not contain") {
-		t.Fatalf("unexpected error %v", err)
-	}
-}
-
-func TestImportScope(t *testing.T) {
-	// Test that imports have different variable scopes.
-	main := strings.NewReader("[!import y.yaml, !import y.yaml]")
-	fs := fstest.MapFS{
-		"y.yaml": {Data: []byte("$v")},
-	}
-	cl1, err := Read(main, "x.yaml", ReadOpts{FS: fs})
-	if err != nil {
-		t.Fatal(err)
-	}
-	cl2 := mustParse("[1, 2]")
-	res, err := Unify(cl1, cl2)
-	if err != nil {
-		t.Fatal(err)
-	}
-	checkDecode(t, oneValue(t, res), []int{1, 2})
-}

From 3476d8e3db440a7f3418917a5139e56eee685ca6 Mon Sep 17 00:00:00 2001
From: Cherry Mui <cherryyz@google.com>
Date: Thu, 14 Aug 2025 12:04:00 -0400
Subject: [PATCH 198/200] arm64/arm64asm: stop relying on global rand.Seed

The global rand.Seed becomes no-op as of Go 1.24. Use a local
random source with the seed instead.

Updates golang/go#67273.
For golang/go#69095.

Change-Id: Ie50f197ba3dc115d4b514a9ba2baa72563bebbd6
Reviewed-on: https://go-review.googlesource.com/c/arch/+/696135
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
---
 arm64/arm64asm/ext_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arm64/arm64asm/ext_test.go b/arm64/arm64asm/ext_test.go
index f0d18e93..839eb3fd 100644
--- a/arm64/arm64asm/ext_test.go
+++ b/arm64/arm64asm/ext_test.go
@@ -422,7 +422,7 @@ var condmark bool = false
 func doFuzzy(inst *InstJson, Ninst int) {
 	var testdata uint32
 	var NonDigRE = regexp.MustCompile(`[\D]`)
-	rand.Seed(int64(Round + Ninst))
+	rand := rand.New(rand.NewSource(int64(Round + Ninst)))
 	off := 0
 	DigBit := ""
 	if condmark == true && !strings.Contains(inst.Bits, "cond") {

From 981dfb93ab29835405565cbd6975de348c266385 Mon Sep 17 00:00:00 2001
From: Gopher Robot <gobot@golang.org>
Date: Wed, 13 Aug 2025 14:21:36 +0000
Subject: [PATCH 199/200] all: upgrade go directive to at least 1.24.0
 [generated]

By now Go 1.25.0 has been released, and Go 1.23 is no longer supported
per the Go Release Policy (see https://go.dev/doc/devel/release#policy).

For golang/go#69095.

[git-generate]
(cd . && go get go@1.24.0 && go mod tidy && go fix ./... && go mod edit -toolchain=none)

Change-Id: I27cc60c60dde64df29829e4f8577b4ae9cba33a3
Reviewed-on: https://go-review.googlesource.com/c/arch/+/695695
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
---
 go.mod | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index b72ba1a5..0db7aa41 100644
--- a/go.mod
+++ b/go.mod
@@ -1,5 +1,5 @@
 module golang.org/x/arch
 
-go 1.23.0
+go 1.24.0
 
 require rsc.io/pdf v0.1.1

From 090af6d6344176653a725a32332977a644bca8f9 Mon Sep 17 00:00:00 2001
From: Mark Ryan <markdryan@rivosinc.com>
Date: Mon, 25 Aug 2025 14:16:31 +0200
Subject: [PATCH 200/200] riscv64: fix argument count check in spec.go

The code was panicking instead of reporting an error when an incorrect
number of arguments were passed.

Change-Id: I1ed8c94cedc8501160dbc65cdfc28badf67bf4f0
Reviewed-on: https://go-review.googlesource.com/c/arch/+/698895
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Joel Sing <joel@sing.id.au>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Carlos Amedee <carlos@golang.org>
---
 riscv64/riscv64spec/spec.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/riscv64/riscv64spec/spec.go b/riscv64/riscv64spec/spec.go
index b65ea697..5b80c868 100644
--- a/riscv64/riscv64spec/spec.go
+++ b/riscv64/riscv64spec/spec.go
@@ -68,7 +68,7 @@ func main() {
 	log.SetFlags(0)
 	log.SetPrefix("riscv64spec: ")
 
-	if len(os.Args) < 1 {
+	if len(os.Args) < 2 {
 		log.Fatal("usage: go run spec.go <opcodes-repo-path>")
 	}
 	extensionsPath := filepath.Join(os.Args[1], "extensions")